1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v4i16_v3i16__u_u_u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v4i16_v3i16__u_u_u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <4 x i16> asm "; def $0", "=v"() 13 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 14 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> poison 15 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 16 ret void 17} 18 19define void @v_shuffle_v4i16_v3i16__0_u_u_u(ptr addrspace(1) inreg %ptr) { 20; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_u_u_u: 21; GFX900: ; %bb.0: 22; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GFX900-NEXT: v_mov_b32_e32 v2, 0 24; GFX900-NEXT: ;;#ASMSTART 25; GFX900-NEXT: ; def v[0:1] 26; GFX900-NEXT: ;;#ASMEND 27; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 28; GFX900-NEXT: s_waitcnt vmcnt(0) 29; GFX900-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_u_u_u: 32; GFX90A: ; %bb.0: 33; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX90A-NEXT: v_mov_b32_e32 v2, 0 35; GFX90A-NEXT: ;;#ASMSTART 36; GFX90A-NEXT: ; def v[0:1] 37; GFX90A-NEXT: ;;#ASMEND 38; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 39; GFX90A-NEXT: s_waitcnt vmcnt(0) 40; GFX90A-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_u_u_u: 43; GFX940: ; %bb.0: 44; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX940-NEXT: v_mov_b32_e32 v2, 0 46; GFX940-NEXT: ;;#ASMSTART 47; GFX940-NEXT: ; def v[0:1] 48; GFX940-NEXT: ;;#ASMEND 49; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 50; GFX940-NEXT: s_waitcnt vmcnt(0) 51; GFX940-NEXT: s_setpc_b64 s[30:31] 52 %vec0 = call <4 x i16> asm "; def $0", "=v"() 53 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 54 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 55 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 56 ret void 57} 58 59define void @v_shuffle_v4i16_v3i16__1_u_u_u(ptr addrspace(1) inreg %ptr) { 60; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_u_u_u: 61; GFX900: ; %bb.0: 62; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GFX900-NEXT: ;;#ASMSTART 64; GFX900-NEXT: ; def v[0:1] 65; GFX900-NEXT: ;;#ASMEND 66; GFX900-NEXT: v_mov_b32_e32 v2, 0 67; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 68; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 69; GFX900-NEXT: s_waitcnt vmcnt(0) 70; GFX900-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_u_u_u: 73; GFX90A: ; %bb.0: 74; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX90A-NEXT: ;;#ASMSTART 76; GFX90A-NEXT: ; def v[0:1] 77; GFX90A-NEXT: ;;#ASMEND 78; GFX90A-NEXT: v_mov_b32_e32 v2, 0 79; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 80; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 81; GFX90A-NEXT: s_waitcnt vmcnt(0) 82; GFX90A-NEXT: s_setpc_b64 s[30:31] 83; 84; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_u_u_u: 85; GFX940: ; %bb.0: 86; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GFX940-NEXT: ;;#ASMSTART 88; GFX940-NEXT: ; def v[0:1] 89; GFX940-NEXT: ;;#ASMEND 90; GFX940-NEXT: v_mov_b32_e32 v2, 0 91; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 92; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 93; GFX940-NEXT: s_waitcnt vmcnt(0) 94; GFX940-NEXT: s_setpc_b64 s[30:31] 95 %vec0 = call <4 x i16> asm "; def $0", "=v"() 96 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 97 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 98 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 99 ret void 100} 101 102define void @v_shuffle_v4i16_v3i16__2_u_u_u(ptr addrspace(1) inreg %ptr) { 103; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_u_u_u: 104; GFX900: ; %bb.0: 105; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GFX900-NEXT: ;;#ASMSTART 107; GFX900-NEXT: ; def v[0:1] 108; GFX900-NEXT: ;;#ASMEND 109; GFX900-NEXT: v_mov_b32_e32 v2, 0 110; GFX900-NEXT: v_mov_b32_e32 v0, v1 111; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 112; GFX900-NEXT: s_waitcnt vmcnt(0) 113; GFX900-NEXT: s_setpc_b64 s[30:31] 114; 115; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_u_u_u: 116; GFX90A: ; %bb.0: 117; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GFX90A-NEXT: ;;#ASMSTART 119; GFX90A-NEXT: ; def v[0:1] 120; GFX90A-NEXT: ;;#ASMEND 121; GFX90A-NEXT: v_mov_b32_e32 v2, 0 122; GFX90A-NEXT: v_mov_b32_e32 v0, v1 123; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 124; GFX90A-NEXT: s_waitcnt vmcnt(0) 125; GFX90A-NEXT: s_setpc_b64 s[30:31] 126; 127; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_u_u_u: 128; GFX940: ; %bb.0: 129; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GFX940-NEXT: ;;#ASMSTART 131; GFX940-NEXT: ; def v[0:1] 132; GFX940-NEXT: ;;#ASMEND 133; GFX940-NEXT: v_mov_b32_e32 v2, 0 134; GFX940-NEXT: v_mov_b32_e32 v0, v1 135; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 136; GFX940-NEXT: s_waitcnt vmcnt(0) 137; GFX940-NEXT: s_setpc_b64 s[30:31] 138 %vec0 = call <4 x i16> asm "; def $0", "=v"() 139 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 140 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 141 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 142 ret void 143} 144 145define void @v_shuffle_v4i16_v3i16__3_u_u_u(ptr addrspace(1) inreg %ptr) { 146; GFX9-LABEL: v_shuffle_v4i16_v3i16__3_u_u_u: 147; GFX9: ; %bb.0: 148; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 149; GFX9-NEXT: s_setpc_b64 s[30:31] 150 %vec0 = call <4 x i16> asm "; def $0", "=v"() 151 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 152 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison> 153 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 154 ret void 155} 156 157define void @v_shuffle_v4i16_v3i16__4_u_u_u(ptr addrspace(1) inreg %ptr) { 158; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_u_u_u: 159; GFX900: ; %bb.0: 160; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 161; GFX900-NEXT: ;;#ASMSTART 162; GFX900-NEXT: ; def v[0:1] 163; GFX900-NEXT: ;;#ASMEND 164; GFX900-NEXT: v_mov_b32_e32 v2, 0 165; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 166; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 167; GFX900-NEXT: s_waitcnt vmcnt(0) 168; GFX900-NEXT: s_setpc_b64 s[30:31] 169; 170; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_u_u_u: 171; GFX90A: ; %bb.0: 172; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX90A-NEXT: ;;#ASMSTART 174; GFX90A-NEXT: ; def v[0:1] 175; GFX90A-NEXT: ;;#ASMEND 176; GFX90A-NEXT: v_mov_b32_e32 v2, 0 177; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 178; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 179; GFX90A-NEXT: s_waitcnt vmcnt(0) 180; GFX90A-NEXT: s_setpc_b64 s[30:31] 181; 182; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_u_u_u: 183; GFX940: ; %bb.0: 184; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 185; GFX940-NEXT: ;;#ASMSTART 186; GFX940-NEXT: ; def v[0:1] 187; GFX940-NEXT: ;;#ASMEND 188; GFX940-NEXT: v_mov_b32_e32 v2, 0 189; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 190; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 191; GFX940-NEXT: s_waitcnt vmcnt(0) 192; GFX940-NEXT: s_setpc_b64 s[30:31] 193 %vec0 = call <4 x i16> asm "; def $0", "=v"() 194 %vec1 = call <4 x i16> asm "; def $0", "=v"() 195 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 196 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 197 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison> 198 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 199 ret void 200} 201 202define void @v_shuffle_v4i16_v3i16__5_u_u_u(ptr addrspace(1) inreg %ptr) { 203; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_u_u: 204; GFX900: ; %bb.0: 205; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX900-NEXT: ;;#ASMSTART 207; GFX900-NEXT: ; def v[0:1] 208; GFX900-NEXT: ;;#ASMEND 209; GFX900-NEXT: v_mov_b32_e32 v2, 0 210; GFX900-NEXT: v_mov_b32_e32 v0, v1 211; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 212; GFX900-NEXT: s_waitcnt vmcnt(0) 213; GFX900-NEXT: s_setpc_b64 s[30:31] 214; 215; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_u_u: 216; GFX90A: ; %bb.0: 217; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 218; GFX90A-NEXT: ;;#ASMSTART 219; GFX90A-NEXT: ; def v[0:1] 220; GFX90A-NEXT: ;;#ASMEND 221; GFX90A-NEXT: v_mov_b32_e32 v2, 0 222; GFX90A-NEXT: v_mov_b32_e32 v0, v1 223; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 224; GFX90A-NEXT: s_waitcnt vmcnt(0) 225; GFX90A-NEXT: s_setpc_b64 s[30:31] 226; 227; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_u_u: 228; GFX940: ; %bb.0: 229; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 230; GFX940-NEXT: ;;#ASMSTART 231; GFX940-NEXT: ; def v[0:1] 232; GFX940-NEXT: ;;#ASMEND 233; GFX940-NEXT: v_mov_b32_e32 v2, 0 234; GFX940-NEXT: v_mov_b32_e32 v0, v1 235; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 236; GFX940-NEXT: s_waitcnt vmcnt(0) 237; GFX940-NEXT: s_setpc_b64 s[30:31] 238 %vec0 = call <4 x i16> asm "; def $0", "=v"() 239 %vec1 = call <4 x i16> asm "; def $0", "=v"() 240 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 241 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 242 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison> 243 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 244 ret void 245} 246 247define void @v_shuffle_v4i16_v3i16__5_0_u_u(ptr addrspace(1) inreg %ptr) { 248; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_u_u: 249; GFX900: ; %bb.0: 250; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 251; GFX900-NEXT: ;;#ASMSTART 252; GFX900-NEXT: ; def v[0:1] 253; GFX900-NEXT: ;;#ASMEND 254; GFX900-NEXT: s_mov_b32 s4, 0x5040100 255; GFX900-NEXT: v_mov_b32_e32 v3, 0 256; GFX900-NEXT: ;;#ASMSTART 257; GFX900-NEXT: ; def v[1:2] 258; GFX900-NEXT: ;;#ASMEND 259; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 260; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 261; GFX900-NEXT: s_waitcnt vmcnt(0) 262; GFX900-NEXT: s_setpc_b64 s[30:31] 263; 264; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_u_u: 265; GFX90A: ; %bb.0: 266; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 267; GFX90A-NEXT: ;;#ASMSTART 268; GFX90A-NEXT: ; def v[0:1] 269; GFX90A-NEXT: ;;#ASMEND 270; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 271; GFX90A-NEXT: v_mov_b32_e32 v4, 0 272; GFX90A-NEXT: ;;#ASMSTART 273; GFX90A-NEXT: ; def v[2:3] 274; GFX90A-NEXT: ;;#ASMEND 275; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 276; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 277; GFX90A-NEXT: s_waitcnt vmcnt(0) 278; GFX90A-NEXT: s_setpc_b64 s[30:31] 279; 280; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_u_u: 281; GFX940: ; %bb.0: 282; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 283; GFX940-NEXT: ;;#ASMSTART 284; GFX940-NEXT: ; def v[0:1] 285; GFX940-NEXT: ;;#ASMEND 286; GFX940-NEXT: s_mov_b32 s2, 0x5040100 287; GFX940-NEXT: v_mov_b32_e32 v4, 0 288; GFX940-NEXT: ;;#ASMSTART 289; GFX940-NEXT: ; def v[2:3] 290; GFX940-NEXT: ;;#ASMEND 291; GFX940-NEXT: s_nop 0 292; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 293; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 294; GFX940-NEXT: s_waitcnt vmcnt(0) 295; GFX940-NEXT: s_setpc_b64 s[30:31] 296 %vec0 = call <4 x i16> asm "; def $0", "=v"() 297 %vec1 = call <4 x i16> asm "; def $0", "=v"() 298 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 299 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 300 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison> 301 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 302 ret void 303} 304 305define void @v_shuffle_v4i16_v3i16__5_1_u_u(ptr addrspace(1) inreg %ptr) { 306; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_u_u: 307; GFX900: ; %bb.0: 308; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 309; GFX900-NEXT: ;;#ASMSTART 310; GFX900-NEXT: ; def v[0:1] 311; GFX900-NEXT: ;;#ASMEND 312; GFX900-NEXT: s_mov_b32 s4, 0xffff 313; GFX900-NEXT: v_mov_b32_e32 v3, 0 314; GFX900-NEXT: ;;#ASMSTART 315; GFX900-NEXT: ; def v[1:2] 316; GFX900-NEXT: ;;#ASMEND 317; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 318; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 319; GFX900-NEXT: s_waitcnt vmcnt(0) 320; GFX900-NEXT: s_setpc_b64 s[30:31] 321; 322; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_u_u: 323; GFX90A: ; %bb.0: 324; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 325; GFX90A-NEXT: ;;#ASMSTART 326; GFX90A-NEXT: ; def v[0:1] 327; GFX90A-NEXT: ;;#ASMEND 328; GFX90A-NEXT: s_mov_b32 s4, 0xffff 329; GFX90A-NEXT: v_mov_b32_e32 v4, 0 330; GFX90A-NEXT: ;;#ASMSTART 331; GFX90A-NEXT: ; def v[2:3] 332; GFX90A-NEXT: ;;#ASMEND 333; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 334; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 335; GFX90A-NEXT: s_waitcnt vmcnt(0) 336; GFX90A-NEXT: s_setpc_b64 s[30:31] 337; 338; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_u_u: 339; GFX940: ; %bb.0: 340; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 341; GFX940-NEXT: ;;#ASMSTART 342; GFX940-NEXT: ; def v[0:1] 343; GFX940-NEXT: ;;#ASMEND 344; GFX940-NEXT: s_mov_b32 s2, 0xffff 345; GFX940-NEXT: v_mov_b32_e32 v4, 0 346; GFX940-NEXT: ;;#ASMSTART 347; GFX940-NEXT: ; def v[2:3] 348; GFX940-NEXT: ;;#ASMEND 349; GFX940-NEXT: s_nop 0 350; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 351; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 352; GFX940-NEXT: s_waitcnt vmcnt(0) 353; GFX940-NEXT: s_setpc_b64 s[30:31] 354 %vec0 = call <4 x i16> asm "; def $0", "=v"() 355 %vec1 = call <4 x i16> asm "; def $0", "=v"() 356 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 357 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 358 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison> 359 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 360 ret void 361} 362 363define void @v_shuffle_v4i16_v3i16__5_2_u_u(ptr addrspace(1) inreg %ptr) { 364; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_u_u: 365; GFX900: ; %bb.0: 366; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 367; GFX900-NEXT: ;;#ASMSTART 368; GFX900-NEXT: ; def v[0:1] 369; GFX900-NEXT: ;;#ASMEND 370; GFX900-NEXT: s_mov_b32 s4, 0x5040100 371; GFX900-NEXT: v_mov_b32_e32 v4, 0 372; GFX900-NEXT: ;;#ASMSTART 373; GFX900-NEXT: ; def v[2:3] 374; GFX900-NEXT: ;;#ASMEND 375; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 376; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 377; GFX900-NEXT: s_waitcnt vmcnt(0) 378; GFX900-NEXT: s_setpc_b64 s[30:31] 379; 380; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_u_u: 381; GFX90A: ; %bb.0: 382; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 383; GFX90A-NEXT: ;;#ASMSTART 384; GFX90A-NEXT: ; def v[0:1] 385; GFX90A-NEXT: ;;#ASMEND 386; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 387; GFX90A-NEXT: v_mov_b32_e32 v4, 0 388; GFX90A-NEXT: ;;#ASMSTART 389; GFX90A-NEXT: ; def v[2:3] 390; GFX90A-NEXT: ;;#ASMEND 391; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 392; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 393; GFX90A-NEXT: s_waitcnt vmcnt(0) 394; GFX90A-NEXT: s_setpc_b64 s[30:31] 395; 396; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_u_u: 397; GFX940: ; %bb.0: 398; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 399; GFX940-NEXT: ;;#ASMSTART 400; GFX940-NEXT: ; def v[0:1] 401; GFX940-NEXT: ;;#ASMEND 402; GFX940-NEXT: s_mov_b32 s2, 0x5040100 403; GFX940-NEXT: v_mov_b32_e32 v4, 0 404; GFX940-NEXT: ;;#ASMSTART 405; GFX940-NEXT: ; def v[2:3] 406; GFX940-NEXT: ;;#ASMEND 407; GFX940-NEXT: s_nop 0 408; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 409; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 410; GFX940-NEXT: s_waitcnt vmcnt(0) 411; GFX940-NEXT: s_setpc_b64 s[30:31] 412 %vec0 = call <4 x i16> asm "; def $0", "=v"() 413 %vec1 = call <4 x i16> asm "; def $0", "=v"() 414 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 415 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 416 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison> 417 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 418 ret void 419} 420 421define void @v_shuffle_v4i16_v3i16__5_3_u_u(ptr addrspace(1) inreg %ptr) { 422; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_u_u: 423; GFX900: ; %bb.0: 424; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GFX900-NEXT: ;;#ASMSTART 426; GFX900-NEXT: ; def v[0:1] 427; GFX900-NEXT: ;;#ASMEND 428; GFX900-NEXT: s_mov_b32 s4, 0x5040100 429; GFX900-NEXT: v_mov_b32_e32 v2, 0 430; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 431; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 432; GFX900-NEXT: s_waitcnt vmcnt(0) 433; GFX900-NEXT: s_setpc_b64 s[30:31] 434; 435; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_u_u: 436; GFX90A: ; %bb.0: 437; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 438; GFX90A-NEXT: ;;#ASMSTART 439; GFX90A-NEXT: ; def v[0:1] 440; GFX90A-NEXT: ;;#ASMEND 441; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 442; GFX90A-NEXT: v_mov_b32_e32 v2, 0 443; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 444; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 445; GFX90A-NEXT: s_waitcnt vmcnt(0) 446; GFX90A-NEXT: s_setpc_b64 s[30:31] 447; 448; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_u_u: 449; GFX940: ; %bb.0: 450; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 451; GFX940-NEXT: ;;#ASMSTART 452; GFX940-NEXT: ; def v[0:1] 453; GFX940-NEXT: ;;#ASMEND 454; GFX940-NEXT: s_mov_b32 s2, 0x5040100 455; GFX940-NEXT: v_mov_b32_e32 v2, 0 456; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 457; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 458; GFX940-NEXT: s_waitcnt vmcnt(0) 459; GFX940-NEXT: s_setpc_b64 s[30:31] 460 %vec0 = call <4 x i16> asm "; def $0", "=v"() 461 %vec1 = call <4 x i16> asm "; def $0", "=v"() 462 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 463 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 464 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison> 465 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 466 ret void 467} 468 469define void @v_shuffle_v4i16_v3i16__5_4_u_u(ptr addrspace(1) inreg %ptr) { 470; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_u_u: 471; GFX900: ; %bb.0: 472; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 473; GFX900-NEXT: ;;#ASMSTART 474; GFX900-NEXT: ; def v[0:1] 475; GFX900-NEXT: ;;#ASMEND 476; GFX900-NEXT: s_mov_b32 s4, 0xffff 477; GFX900-NEXT: v_mov_b32_e32 v2, 0 478; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0 479; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 480; GFX900-NEXT: s_waitcnt vmcnt(0) 481; GFX900-NEXT: s_setpc_b64 s[30:31] 482; 483; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_u_u: 484; GFX90A: ; %bb.0: 485; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; GFX90A-NEXT: ;;#ASMSTART 487; GFX90A-NEXT: ; def v[0:1] 488; GFX90A-NEXT: ;;#ASMEND 489; GFX90A-NEXT: s_mov_b32 s4, 0xffff 490; GFX90A-NEXT: v_mov_b32_e32 v2, 0 491; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0 492; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 493; GFX90A-NEXT: s_waitcnt vmcnt(0) 494; GFX90A-NEXT: s_setpc_b64 s[30:31] 495; 496; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_u_u: 497; GFX940: ; %bb.0: 498; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 499; GFX940-NEXT: ;;#ASMSTART 500; GFX940-NEXT: ; def v[0:1] 501; GFX940-NEXT: ;;#ASMEND 502; GFX940-NEXT: s_mov_b32 s2, 0xffff 503; GFX940-NEXT: v_mov_b32_e32 v2, 0 504; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 505; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 506; GFX940-NEXT: s_waitcnt vmcnt(0) 507; GFX940-NEXT: s_setpc_b64 s[30:31] 508 %vec0 = call <4 x i16> asm "; def $0", "=v"() 509 %vec1 = call <4 x i16> asm "; def $0", "=v"() 510 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 511 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 512 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison> 513 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 514 ret void 515} 516 517define void @v_shuffle_v4i16_v3i16__5_5_u_u(ptr addrspace(1) inreg %ptr) { 518; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_u: 519; GFX900: ; %bb.0: 520; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 521; GFX900-NEXT: ;;#ASMSTART 522; GFX900-NEXT: ; def v[0:1] 523; GFX900-NEXT: ;;#ASMEND 524; GFX900-NEXT: s_mov_b32 s4, 0x5040100 525; GFX900-NEXT: v_mov_b32_e32 v2, 0 526; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 527; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 528; GFX900-NEXT: s_waitcnt vmcnt(0) 529; GFX900-NEXT: s_setpc_b64 s[30:31] 530; 531; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_u: 532; GFX90A: ; %bb.0: 533; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 534; GFX90A-NEXT: ;;#ASMSTART 535; GFX90A-NEXT: ; def v[0:1] 536; GFX90A-NEXT: ;;#ASMEND 537; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 538; GFX90A-NEXT: v_mov_b32_e32 v2, 0 539; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 540; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 541; GFX90A-NEXT: s_waitcnt vmcnt(0) 542; GFX90A-NEXT: s_setpc_b64 s[30:31] 543; 544; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_u: 545; GFX940: ; %bb.0: 546; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 547; GFX940-NEXT: ;;#ASMSTART 548; GFX940-NEXT: ; def v[0:1] 549; GFX940-NEXT: ;;#ASMEND 550; GFX940-NEXT: s_mov_b32 s2, 0x5040100 551; GFX940-NEXT: v_mov_b32_e32 v2, 0 552; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 553; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 554; GFX940-NEXT: s_waitcnt vmcnt(0) 555; GFX940-NEXT: s_setpc_b64 s[30:31] 556 %vec0 = call <4 x i16> asm "; def $0", "=v"() 557 %vec1 = call <4 x i16> asm "; def $0", "=v"() 558 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 559 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 560 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison> 561 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 562 ret void 563} 564 565define void @v_shuffle_v4i16_v3i16__5_5_0_u(ptr addrspace(1) inreg %ptr) { 566; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_u: 567; GFX900: ; %bb.0: 568; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 569; GFX900-NEXT: ;;#ASMSTART 570; GFX900-NEXT: ; def v[0:1] 571; GFX900-NEXT: ;;#ASMEND 572; GFX900-NEXT: ;;#ASMSTART 573; GFX900-NEXT: ; def v[1:2] 574; GFX900-NEXT: ;;#ASMEND 575; GFX900-NEXT: s_mov_b32 s4, 0x5040100 576; GFX900-NEXT: v_mov_b32_e32 v3, 0 577; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 578; GFX900-NEXT: v_mov_b32_e32 v2, v0 579; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 580; GFX900-NEXT: s_waitcnt vmcnt(0) 581; GFX900-NEXT: s_setpc_b64 s[30:31] 582; 583; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_u: 584; GFX90A: ; %bb.0: 585; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 586; GFX90A-NEXT: ;;#ASMSTART 587; GFX90A-NEXT: ; def v[2:3] 588; GFX90A-NEXT: ;;#ASMEND 589; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 590; GFX90A-NEXT: v_mov_b32_e32 v4, 0 591; GFX90A-NEXT: ;;#ASMSTART 592; GFX90A-NEXT: ; def v[0:1] 593; GFX90A-NEXT: ;;#ASMEND 594; GFX90A-NEXT: v_perm_b32 v2, v3, v3, s4 595; GFX90A-NEXT: v_mov_b32_e32 v3, v0 596; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 597; GFX90A-NEXT: s_waitcnt vmcnt(0) 598; GFX90A-NEXT: s_setpc_b64 s[30:31] 599; 600; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_u: 601; GFX940: ; %bb.0: 602; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 603; GFX940-NEXT: ;;#ASMSTART 604; GFX940-NEXT: ; def v[2:3] 605; GFX940-NEXT: ;;#ASMEND 606; GFX940-NEXT: s_mov_b32 s2, 0x5040100 607; GFX940-NEXT: v_mov_b32_e32 v4, 0 608; GFX940-NEXT: ;;#ASMSTART 609; GFX940-NEXT: ; def v[0:1] 610; GFX940-NEXT: ;;#ASMEND 611; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 612; GFX940-NEXT: v_mov_b32_e32 v3, v0 613; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 614; GFX940-NEXT: s_waitcnt vmcnt(0) 615; GFX940-NEXT: s_setpc_b64 s[30:31] 616 %vec0 = call <4 x i16> asm "; def $0", "=v"() 617 %vec1 = call <4 x i16> asm "; def $0", "=v"() 618 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 619 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 620 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 poison> 621 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 622 ret void 623} 624 625define void @v_shuffle_v4i16_v3i16__5_5_1_u(ptr addrspace(1) inreg %ptr) { 626; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_u: 627; GFX900: ; %bb.0: 628; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 629; GFX900-NEXT: ;;#ASMSTART 630; GFX900-NEXT: ; def v[0:1] 631; GFX900-NEXT: ;;#ASMEND 632; GFX900-NEXT: ;;#ASMSTART 633; GFX900-NEXT: ; def v[1:2] 634; GFX900-NEXT: ;;#ASMEND 635; GFX900-NEXT: v_alignbit_b32 v1, s4, v0, 16 636; GFX900-NEXT: s_mov_b32 s4, 0x5040100 637; GFX900-NEXT: v_mov_b32_e32 v3, 0 638; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 639; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 640; GFX900-NEXT: s_waitcnt vmcnt(0) 641; GFX900-NEXT: s_setpc_b64 s[30:31] 642; 643; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_u: 644; GFX90A: ; %bb.0: 645; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; GFX90A-NEXT: ;;#ASMSTART 647; GFX90A-NEXT: ; def v[0:1] 648; GFX90A-NEXT: ;;#ASMEND 649; GFX90A-NEXT: v_alignbit_b32 v1, s4, v0, 16 650; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 651; GFX90A-NEXT: v_mov_b32_e32 v4, 0 652; GFX90A-NEXT: ;;#ASMSTART 653; GFX90A-NEXT: ; def v[2:3] 654; GFX90A-NEXT: ;;#ASMEND 655; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 656; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 657; GFX90A-NEXT: s_waitcnt vmcnt(0) 658; GFX90A-NEXT: s_setpc_b64 s[30:31] 659; 660; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_u: 661; GFX940: ; %bb.0: 662; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 663; GFX940-NEXT: ;;#ASMSTART 664; GFX940-NEXT: ; def v[0:1] 665; GFX940-NEXT: ;;#ASMEND 666; GFX940-NEXT: s_mov_b32 s2, 0x5040100 667; GFX940-NEXT: v_mov_b32_e32 v4, 0 668; GFX940-NEXT: ;;#ASMSTART 669; GFX940-NEXT: ; def v[2:3] 670; GFX940-NEXT: ;;#ASMEND 671; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 672; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 673; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 674; GFX940-NEXT: s_waitcnt vmcnt(0) 675; GFX940-NEXT: s_setpc_b64 s[30:31] 676 %vec0 = call <4 x i16> asm "; def $0", "=v"() 677 %vec1 = call <4 x i16> asm "; def $0", "=v"() 678 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 679 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 680 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 poison> 681 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 682 ret void 683} 684 685define void @v_shuffle_v4i16_v3i16__5_5_2_u(ptr addrspace(1) inreg %ptr) { 686; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_u: 687; GFX900: ; %bb.0: 688; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 689; GFX900-NEXT: ;;#ASMSTART 690; GFX900-NEXT: ; def v[0:1] 691; GFX900-NEXT: ;;#ASMEND 692; GFX900-NEXT: s_mov_b32 s4, 0x5040100 693; GFX900-NEXT: v_mov_b32_e32 v4, 0 694; GFX900-NEXT: ;;#ASMSTART 695; GFX900-NEXT: ; def v[2:3] 696; GFX900-NEXT: ;;#ASMEND 697; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 698; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 699; GFX900-NEXT: s_waitcnt vmcnt(0) 700; GFX900-NEXT: s_setpc_b64 s[30:31] 701; 702; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_u: 703; GFX90A: ; %bb.0: 704; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 705; GFX90A-NEXT: ;;#ASMSTART 706; GFX90A-NEXT: ; def v[0:1] 707; GFX90A-NEXT: ;;#ASMEND 708; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 709; GFX90A-NEXT: v_mov_b32_e32 v4, 0 710; GFX90A-NEXT: ;;#ASMSTART 711; GFX90A-NEXT: ; def v[2:3] 712; GFX90A-NEXT: ;;#ASMEND 713; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 714; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 715; GFX90A-NEXT: s_waitcnt vmcnt(0) 716; GFX90A-NEXT: s_setpc_b64 s[30:31] 717; 718; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_u: 719; GFX940: ; %bb.0: 720; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 721; GFX940-NEXT: ;;#ASMSTART 722; GFX940-NEXT: ; def v[0:1] 723; GFX940-NEXT: ;;#ASMEND 724; GFX940-NEXT: s_mov_b32 s2, 0x5040100 725; GFX940-NEXT: v_mov_b32_e32 v4, 0 726; GFX940-NEXT: ;;#ASMSTART 727; GFX940-NEXT: ; def v[2:3] 728; GFX940-NEXT: ;;#ASMEND 729; GFX940-NEXT: s_nop 0 730; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 731; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 732; GFX940-NEXT: s_waitcnt vmcnt(0) 733; GFX940-NEXT: s_setpc_b64 s[30:31] 734 %vec0 = call <4 x i16> asm "; def $0", "=v"() 735 %vec1 = call <4 x i16> asm "; def $0", "=v"() 736 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 737 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 738 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 poison> 739 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 740 ret void 741} 742 743define void @v_shuffle_v4i16_v3i16__5_5_3_u(ptr addrspace(1) inreg %ptr) { 744; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_u: 745; GFX900: ; %bb.0: 746; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 747; GFX900-NEXT: ;;#ASMSTART 748; GFX900-NEXT: ; def v[0:1] 749; GFX900-NEXT: ;;#ASMEND 750; GFX900-NEXT: s_mov_b32 s4, 0x5040100 751; GFX900-NEXT: v_mov_b32_e32 v3, 0 752; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 753; GFX900-NEXT: v_mov_b32_e32 v2, v0 754; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 755; GFX900-NEXT: s_waitcnt vmcnt(0) 756; GFX900-NEXT: s_setpc_b64 s[30:31] 757; 758; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_u: 759; GFX90A: ; %bb.0: 760; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 761; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 762; GFX90A-NEXT: v_mov_b32_e32 v4, 0 763; GFX90A-NEXT: ;;#ASMSTART 764; GFX90A-NEXT: ; def v[0:1] 765; GFX90A-NEXT: ;;#ASMEND 766; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 767; GFX90A-NEXT: v_mov_b32_e32 v3, v0 768; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 769; GFX90A-NEXT: s_waitcnt vmcnt(0) 770; GFX90A-NEXT: s_setpc_b64 s[30:31] 771; 772; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_u: 773; GFX940: ; %bb.0: 774; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 775; GFX940-NEXT: s_mov_b32 s2, 0x5040100 776; GFX940-NEXT: v_mov_b32_e32 v4, 0 777; GFX940-NEXT: ;;#ASMSTART 778; GFX940-NEXT: ; def v[0:1] 779; GFX940-NEXT: ;;#ASMEND 780; GFX940-NEXT: s_nop 0 781; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 782; GFX940-NEXT: v_mov_b32_e32 v3, v0 783; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 784; GFX940-NEXT: s_waitcnt vmcnt(0) 785; GFX940-NEXT: s_setpc_b64 s[30:31] 786 %vec0 = call <4 x i16> asm "; def $0", "=v"() 787 %vec1 = call <4 x i16> asm "; def $0", "=v"() 788 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 789 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 790 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 poison> 791 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 792 ret void 793} 794 795define void @v_shuffle_v4i16_v3i16__5_5_4_u(ptr addrspace(1) inreg %ptr) { 796; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_u: 797; GFX900: ; %bb.0: 798; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 799; GFX900-NEXT: ;;#ASMSTART 800; GFX900-NEXT: ; def v[0:1] 801; GFX900-NEXT: ;;#ASMEND 802; GFX900-NEXT: v_alignbit_b32 v2, s4, v0, 16 803; GFX900-NEXT: s_mov_b32 s4, 0x5040100 804; GFX900-NEXT: v_mov_b32_e32 v3, 0 805; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 806; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 807; GFX900-NEXT: s_waitcnt vmcnt(0) 808; GFX900-NEXT: s_setpc_b64 s[30:31] 809; 810; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_u: 811; GFX90A: ; %bb.0: 812; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 813; GFX90A-NEXT: ;;#ASMSTART 814; GFX90A-NEXT: ; def v[0:1] 815; GFX90A-NEXT: ;;#ASMEND 816; GFX90A-NEXT: v_alignbit_b32 v3, s4, v0, 16 817; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 818; GFX90A-NEXT: v_mov_b32_e32 v4, 0 819; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 820; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 821; GFX90A-NEXT: s_waitcnt vmcnt(0) 822; GFX90A-NEXT: s_setpc_b64 s[30:31] 823; 824; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_u: 825; GFX940: ; %bb.0: 826; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 827; GFX940-NEXT: s_mov_b32 s2, 0x5040100 828; GFX940-NEXT: v_mov_b32_e32 v4, 0 829; GFX940-NEXT: ;;#ASMSTART 830; GFX940-NEXT: ; def v[0:1] 831; GFX940-NEXT: ;;#ASMEND 832; GFX940-NEXT: s_nop 0 833; GFX940-NEXT: v_alignbit_b32 v3, s0, v0, 16 834; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 835; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 836; GFX940-NEXT: s_waitcnt vmcnt(0) 837; GFX940-NEXT: s_setpc_b64 s[30:31] 838 %vec0 = call <4 x i16> asm "; def $0", "=v"() 839 %vec1 = call <4 x i16> asm "; def $0", "=v"() 840 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 841 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 842 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 poison> 843 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 844 ret void 845} 846 847define void @v_shuffle_v4i16_v3i16__5_5_5_u(ptr addrspace(1) inreg %ptr) { 848; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_u: 849; GFX900: ; %bb.0: 850; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 851; GFX900-NEXT: ;;#ASMSTART 852; GFX900-NEXT: ; def v[0:1] 853; GFX900-NEXT: ;;#ASMEND 854; GFX900-NEXT: s_mov_b32 s4, 0x5040100 855; GFX900-NEXT: v_mov_b32_e32 v2, 0 856; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 857; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 858; GFX900-NEXT: s_waitcnt vmcnt(0) 859; GFX900-NEXT: s_setpc_b64 s[30:31] 860; 861; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_u: 862; GFX90A: ; %bb.0: 863; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 864; GFX90A-NEXT: ;;#ASMSTART 865; GFX90A-NEXT: ; def v[0:1] 866; GFX90A-NEXT: ;;#ASMEND 867; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 868; GFX90A-NEXT: v_mov_b32_e32 v2, 0 869; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 870; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 871; GFX90A-NEXT: s_waitcnt vmcnt(0) 872; GFX90A-NEXT: s_setpc_b64 s[30:31] 873; 874; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_u: 875; GFX940: ; %bb.0: 876; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 877; GFX940-NEXT: ;;#ASMSTART 878; GFX940-NEXT: ; def v[0:1] 879; GFX940-NEXT: ;;#ASMEND 880; GFX940-NEXT: s_mov_b32 s2, 0x5040100 881; GFX940-NEXT: v_mov_b32_e32 v2, 0 882; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 883; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 884; GFX940-NEXT: s_waitcnt vmcnt(0) 885; GFX940-NEXT: s_setpc_b64 s[30:31] 886 %vec0 = call <4 x i16> asm "; def $0", "=v"() 887 %vec1 = call <4 x i16> asm "; def $0", "=v"() 888 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 889 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 890 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 poison> 891 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 892 ret void 893} 894 895define void @v_shuffle_v4i16_v3i16__5_5_5_0(ptr addrspace(1) inreg %ptr) { 896; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_0: 897; GFX900: ; %bb.0: 898; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 899; GFX900-NEXT: ;;#ASMSTART 900; GFX900-NEXT: ; def v[0:1] 901; GFX900-NEXT: ;;#ASMEND 902; GFX900-NEXT: ;;#ASMSTART 903; GFX900-NEXT: ; def v[1:2] 904; GFX900-NEXT: ;;#ASMEND 905; GFX900-NEXT: s_mov_b32 s4, 0x5040100 906; GFX900-NEXT: v_mov_b32_e32 v3, 0 907; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 908; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 909; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 910; GFX900-NEXT: s_waitcnt vmcnt(0) 911; GFX900-NEXT: s_setpc_b64 s[30:31] 912; 913; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_0: 914; GFX90A: ; %bb.0: 915; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 916; GFX90A-NEXT: ;;#ASMSTART 917; GFX90A-NEXT: ; def v[0:1] 918; GFX90A-NEXT: ;;#ASMEND 919; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 920; GFX90A-NEXT: v_mov_b32_e32 v4, 0 921; GFX90A-NEXT: ;;#ASMSTART 922; GFX90A-NEXT: ; def v[2:3] 923; GFX90A-NEXT: ;;#ASMEND 924; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4 925; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 926; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 927; GFX90A-NEXT: s_waitcnt vmcnt(0) 928; GFX90A-NEXT: s_setpc_b64 s[30:31] 929; 930; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_0: 931; GFX940: ; %bb.0: 932; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 933; GFX940-NEXT: ;;#ASMSTART 934; GFX940-NEXT: ; def v[0:1] 935; GFX940-NEXT: ;;#ASMEND 936; GFX940-NEXT: s_mov_b32 s2, 0x5040100 937; GFX940-NEXT: v_mov_b32_e32 v4, 0 938; GFX940-NEXT: ;;#ASMSTART 939; GFX940-NEXT: ; def v[2:3] 940; GFX940-NEXT: ;;#ASMEND 941; GFX940-NEXT: s_nop 0 942; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 943; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 944; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 945; GFX940-NEXT: s_waitcnt vmcnt(0) 946; GFX940-NEXT: s_setpc_b64 s[30:31] 947 %vec0 = call <4 x i16> asm "; def $0", "=v"() 948 %vec1 = call <4 x i16> asm "; def $0", "=v"() 949 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 950 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 951 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 0> 952 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 953 ret void 954} 955 956define void @v_shuffle_v4i16_v3i16__5_5_5_1(ptr addrspace(1) inreg %ptr) { 957; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_1: 958; GFX900: ; %bb.0: 959; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 960; GFX900-NEXT: ;;#ASMSTART 961; GFX900-NEXT: ; def v[0:1] 962; GFX900-NEXT: ;;#ASMEND 963; GFX900-NEXT: ;;#ASMSTART 964; GFX900-NEXT: ; def v[1:2] 965; GFX900-NEXT: ;;#ASMEND 966; GFX900-NEXT: s_mov_b32 s4, 0xffff 967; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0 968; GFX900-NEXT: s_mov_b32 s4, 0x5040100 969; GFX900-NEXT: v_mov_b32_e32 v3, 0 970; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 971; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 972; GFX900-NEXT: s_waitcnt vmcnt(0) 973; GFX900-NEXT: s_setpc_b64 s[30:31] 974; 975; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_1: 976; GFX90A: ; %bb.0: 977; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 978; GFX90A-NEXT: ;;#ASMSTART 979; GFX90A-NEXT: ; def v[0:1] 980; GFX90A-NEXT: ;;#ASMEND 981; GFX90A-NEXT: s_mov_b32 s4, 0xffff 982; GFX90A-NEXT: ;;#ASMSTART 983; GFX90A-NEXT: ; def v[2:3] 984; GFX90A-NEXT: ;;#ASMEND 985; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v0 986; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 987; GFX90A-NEXT: v_mov_b32_e32 v4, 0 988; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 989; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 990; GFX90A-NEXT: s_waitcnt vmcnt(0) 991; GFX90A-NEXT: s_setpc_b64 s[30:31] 992; 993; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_1: 994; GFX940: ; %bb.0: 995; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 996; GFX940-NEXT: ;;#ASMSTART 997; GFX940-NEXT: ; def v[0:1] 998; GFX940-NEXT: ;;#ASMEND 999; GFX940-NEXT: s_mov_b32 s2, 0xffff 1000; GFX940-NEXT: ;;#ASMSTART 1001; GFX940-NEXT: ; def v[2:3] 1002; GFX940-NEXT: ;;#ASMEND 1003; GFX940-NEXT: v_mov_b32_e32 v4, 0 1004; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 1005; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1006; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 1007; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 1008; GFX940-NEXT: s_waitcnt vmcnt(0) 1009; GFX940-NEXT: s_setpc_b64 s[30:31] 1010 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1011 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1012 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1013 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1014 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 1> 1015 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1016 ret void 1017} 1018 1019define void @v_shuffle_v4i16_v3i16__5_5_5_2(ptr addrspace(1) inreg %ptr) { 1020; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_2: 1021; GFX900: ; %bb.0: 1022; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1023; GFX900-NEXT: ;;#ASMSTART 1024; GFX900-NEXT: ; def v[0:1] 1025; GFX900-NEXT: ;;#ASMEND 1026; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1027; GFX900-NEXT: v_mov_b32_e32 v4, 0 1028; GFX900-NEXT: ;;#ASMSTART 1029; GFX900-NEXT: ; def v[2:3] 1030; GFX900-NEXT: ;;#ASMEND 1031; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 1032; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 1033; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1034; GFX900-NEXT: s_waitcnt vmcnt(0) 1035; GFX900-NEXT: s_setpc_b64 s[30:31] 1036; 1037; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_2: 1038; GFX90A: ; %bb.0: 1039; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1040; GFX90A-NEXT: ;;#ASMSTART 1041; GFX90A-NEXT: ; def v[0:1] 1042; GFX90A-NEXT: ;;#ASMEND 1043; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1044; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1045; GFX90A-NEXT: ;;#ASMSTART 1046; GFX90A-NEXT: ; def v[2:3] 1047; GFX90A-NEXT: ;;#ASMEND 1048; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4 1049; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 1050; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1051; GFX90A-NEXT: s_waitcnt vmcnt(0) 1052; GFX90A-NEXT: s_setpc_b64 s[30:31] 1053; 1054; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_2: 1055; GFX940: ; %bb.0: 1056; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1057; GFX940-NEXT: ;;#ASMSTART 1058; GFX940-NEXT: ; def v[0:1] 1059; GFX940-NEXT: ;;#ASMEND 1060; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1061; GFX940-NEXT: v_mov_b32_e32 v4, 0 1062; GFX940-NEXT: ;;#ASMSTART 1063; GFX940-NEXT: ; def v[2:3] 1064; GFX940-NEXT: ;;#ASMEND 1065; GFX940-NEXT: s_nop 0 1066; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 1067; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 1068; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 1069; GFX940-NEXT: s_waitcnt vmcnt(0) 1070; GFX940-NEXT: s_setpc_b64 s[30:31] 1071 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1072 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1073 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1074 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1075 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 2> 1076 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1077 ret void 1078} 1079 1080define void @v_shuffle_v4i16_v3i16__5_5_5_3(ptr addrspace(1) inreg %ptr) { 1081; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_3: 1082; GFX900: ; %bb.0: 1083; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1084; GFX900-NEXT: ;;#ASMSTART 1085; GFX900-NEXT: ; def v[0:1] 1086; GFX900-NEXT: ;;#ASMEND 1087; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1088; GFX900-NEXT: v_mov_b32_e32 v3, 0 1089; GFX900-NEXT: v_perm_b32 v2, v0, v1, s4 1090; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 1091; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1092; GFX900-NEXT: s_waitcnt vmcnt(0) 1093; GFX900-NEXT: s_setpc_b64 s[30:31] 1094; 1095; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_3: 1096; GFX90A: ; %bb.0: 1097; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1098; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1099; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1100; GFX90A-NEXT: ;;#ASMSTART 1101; GFX90A-NEXT: ; def v[0:1] 1102; GFX90A-NEXT: ;;#ASMEND 1103; GFX90A-NEXT: v_perm_b32 v3, v0, v1, s4 1104; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 1105; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1106; GFX90A-NEXT: s_waitcnt vmcnt(0) 1107; GFX90A-NEXT: s_setpc_b64 s[30:31] 1108; 1109; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_3: 1110; GFX940: ; %bb.0: 1111; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1112; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1113; GFX940-NEXT: v_mov_b32_e32 v4, 0 1114; GFX940-NEXT: ;;#ASMSTART 1115; GFX940-NEXT: ; def v[0:1] 1116; GFX940-NEXT: ;;#ASMEND 1117; GFX940-NEXT: s_nop 0 1118; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 1119; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 1120; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1121; GFX940-NEXT: s_waitcnt vmcnt(0) 1122; GFX940-NEXT: s_setpc_b64 s[30:31] 1123 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1124 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1125 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1126 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1127 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 3> 1128 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1129 ret void 1130} 1131 1132define void @v_shuffle_v4i16_v3i16__5_5_5_4(ptr addrspace(1) inreg %ptr) { 1133; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_4: 1134; GFX900: ; %bb.0: 1135; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1136; GFX900-NEXT: s_mov_b32 s4, 0xffff 1137; GFX900-NEXT: ;;#ASMSTART 1138; GFX900-NEXT: ; def v[0:1] 1139; GFX900-NEXT: ;;#ASMEND 1140; GFX900-NEXT: v_bfi_b32 v2, s4, v1, v0 1141; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1142; GFX900-NEXT: v_mov_b32_e32 v3, 0 1143; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 1144; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1145; GFX900-NEXT: s_waitcnt vmcnt(0) 1146; GFX900-NEXT: s_setpc_b64 s[30:31] 1147; 1148; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_4: 1149; GFX90A: ; %bb.0: 1150; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1151; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1152; GFX90A-NEXT: ;;#ASMSTART 1153; GFX90A-NEXT: ; def v[0:1] 1154; GFX90A-NEXT: ;;#ASMEND 1155; GFX90A-NEXT: v_bfi_b32 v3, s4, v1, v0 1156; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1157; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1158; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 1159; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1160; GFX90A-NEXT: s_waitcnt vmcnt(0) 1161; GFX90A-NEXT: s_setpc_b64 s[30:31] 1162; 1163; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_4: 1164; GFX940: ; %bb.0: 1165; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1166; GFX940-NEXT: s_mov_b32 s2, 0xffff 1167; GFX940-NEXT: ;;#ASMSTART 1168; GFX940-NEXT: ; def v[0:1] 1169; GFX940-NEXT: ;;#ASMEND 1170; GFX940-NEXT: v_mov_b32_e32 v4, 0 1171; GFX940-NEXT: v_bfi_b32 v3, s2, v1, v0 1172; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1173; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 1174; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1175; GFX940-NEXT: s_waitcnt vmcnt(0) 1176; GFX940-NEXT: s_setpc_b64 s[30:31] 1177 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1178 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1179 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1180 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1181 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 4> 1182 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1183 ret void 1184} 1185 1186define void @v_shuffle_v4i16_v3i16__5_5_5_5(ptr addrspace(1) inreg %ptr) { 1187; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_5: 1188; GFX900: ; %bb.0: 1189; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1190; GFX900-NEXT: ;;#ASMSTART 1191; GFX900-NEXT: ; def v[0:1] 1192; GFX900-NEXT: ;;#ASMEND 1193; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1194; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 1195; GFX900-NEXT: v_mov_b32_e32 v2, 0 1196; GFX900-NEXT: v_mov_b32_e32 v1, v0 1197; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1198; GFX900-NEXT: s_waitcnt vmcnt(0) 1199; GFX900-NEXT: s_setpc_b64 s[30:31] 1200; 1201; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_5: 1202; GFX90A: ; %bb.0: 1203; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1204; GFX90A-NEXT: ;;#ASMSTART 1205; GFX90A-NEXT: ; def v[0:1] 1206; GFX90A-NEXT: ;;#ASMEND 1207; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1208; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 1209; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1210; GFX90A-NEXT: v_mov_b32_e32 v1, v0 1211; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1212; GFX90A-NEXT: s_waitcnt vmcnt(0) 1213; GFX90A-NEXT: s_setpc_b64 s[30:31] 1214; 1215; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_5: 1216; GFX940: ; %bb.0: 1217; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1218; GFX940-NEXT: ;;#ASMSTART 1219; GFX940-NEXT: ; def v[0:1] 1220; GFX940-NEXT: ;;#ASMEND 1221; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1222; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 1223; GFX940-NEXT: v_mov_b32_e32 v2, 0 1224; GFX940-NEXT: v_mov_b32_e32 v1, v0 1225; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1226; GFX940-NEXT: s_waitcnt vmcnt(0) 1227; GFX940-NEXT: s_setpc_b64 s[30:31] 1228 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1229 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1230 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1231 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1232 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 5> 1233 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1234 ret void 1235} 1236 1237define void @v_shuffle_v4i16_v3i16__u_0_0_0(ptr addrspace(1) inreg %ptr) { 1238; GFX900-LABEL: v_shuffle_v4i16_v3i16__u_0_0_0: 1239; GFX900: ; %bb.0: 1240; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1241; GFX900-NEXT: ;;#ASMSTART 1242; GFX900-NEXT: ; def v[0:1] 1243; GFX900-NEXT: ;;#ASMEND 1244; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1245; GFX900-NEXT: v_mov_b32_e32 v2, 0 1246; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1247; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1248; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1249; GFX900-NEXT: s_waitcnt vmcnt(0) 1250; GFX900-NEXT: s_setpc_b64 s[30:31] 1251; 1252; GFX90A-LABEL: v_shuffle_v4i16_v3i16__u_0_0_0: 1253; GFX90A: ; %bb.0: 1254; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1255; GFX90A-NEXT: ;;#ASMSTART 1256; GFX90A-NEXT: ; def v[0:1] 1257; GFX90A-NEXT: ;;#ASMEND 1258; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1259; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1260; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1261; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1262; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1263; GFX90A-NEXT: s_waitcnt vmcnt(0) 1264; GFX90A-NEXT: s_setpc_b64 s[30:31] 1265; 1266; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_0_0_0: 1267; GFX940: ; %bb.0: 1268; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1269; GFX940-NEXT: ;;#ASMSTART 1270; GFX940-NEXT: ; def v[0:1] 1271; GFX940-NEXT: ;;#ASMEND 1272; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1273; GFX940-NEXT: v_mov_b32_e32 v2, 0 1274; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1275; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1276; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1277; GFX940-NEXT: s_waitcnt vmcnt(0) 1278; GFX940-NEXT: s_setpc_b64 s[30:31] 1279 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1280 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1281 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0> 1282 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1283 ret void 1284} 1285 1286define void @v_shuffle_v4i16_v3i16__0_0_0_0(ptr addrspace(1) inreg %ptr) { 1287; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_0_0_0: 1288; GFX900: ; %bb.0: 1289; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1290; GFX900-NEXT: ;;#ASMSTART 1291; GFX900-NEXT: ; def v[0:1] 1292; GFX900-NEXT: ;;#ASMEND 1293; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1294; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 1295; GFX900-NEXT: v_mov_b32_e32 v2, 0 1296; GFX900-NEXT: v_mov_b32_e32 v1, v0 1297; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1298; GFX900-NEXT: s_waitcnt vmcnt(0) 1299; GFX900-NEXT: s_setpc_b64 s[30:31] 1300; 1301; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_0_0_0: 1302; GFX90A: ; %bb.0: 1303; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1304; GFX90A-NEXT: ;;#ASMSTART 1305; GFX90A-NEXT: ; def v[0:1] 1306; GFX90A-NEXT: ;;#ASMEND 1307; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1308; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 1309; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1310; GFX90A-NEXT: v_mov_b32_e32 v1, v0 1311; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1312; GFX90A-NEXT: s_waitcnt vmcnt(0) 1313; GFX90A-NEXT: s_setpc_b64 s[30:31] 1314; 1315; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_0_0_0: 1316; GFX940: ; %bb.0: 1317; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1318; GFX940-NEXT: ;;#ASMSTART 1319; GFX940-NEXT: ; def v[0:1] 1320; GFX940-NEXT: ;;#ASMEND 1321; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1322; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 1323; GFX940-NEXT: v_mov_b32_e32 v2, 0 1324; GFX940-NEXT: v_mov_b32_e32 v1, v0 1325; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1326; GFX940-NEXT: s_waitcnt vmcnt(0) 1327; GFX940-NEXT: s_setpc_b64 s[30:31] 1328 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1329 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1330 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> zeroinitializer 1331 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1332 ret void 1333} 1334 1335define void @v_shuffle_v4i16_v3i16__1_0_0_0(ptr addrspace(1) inreg %ptr) { 1336; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_0_0_0: 1337; GFX900: ; %bb.0: 1338; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1339; GFX900-NEXT: ;;#ASMSTART 1340; GFX900-NEXT: ; def v[0:1] 1341; GFX900-NEXT: ;;#ASMEND 1342; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1343; GFX900-NEXT: v_mov_b32_e32 v2, 0 1344; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1345; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16 1346; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1347; GFX900-NEXT: s_waitcnt vmcnt(0) 1348; GFX900-NEXT: s_setpc_b64 s[30:31] 1349; 1350; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_0_0_0: 1351; GFX90A: ; %bb.0: 1352; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1353; GFX90A-NEXT: ;;#ASMSTART 1354; GFX90A-NEXT: ; def v[0:1] 1355; GFX90A-NEXT: ;;#ASMEND 1356; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1357; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1358; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1359; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16 1360; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1361; GFX90A-NEXT: s_waitcnt vmcnt(0) 1362; GFX90A-NEXT: s_setpc_b64 s[30:31] 1363; 1364; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_0_0_0: 1365; GFX940: ; %bb.0: 1366; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1367; GFX940-NEXT: ;;#ASMSTART 1368; GFX940-NEXT: ; def v[0:1] 1369; GFX940-NEXT: ;;#ASMEND 1370; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1371; GFX940-NEXT: v_mov_b32_e32 v2, 0 1372; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1373; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 1374; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1375; GFX940-NEXT: s_waitcnt vmcnt(0) 1376; GFX940-NEXT: s_setpc_b64 s[30:31] 1377 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1378 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1379 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 1380 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1381 ret void 1382} 1383 1384define void @v_shuffle_v4i16_v3i16__2_0_0_0(ptr addrspace(1) inreg %ptr) { 1385; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_0_0_0: 1386; GFX900: ; %bb.0: 1387; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1388; GFX900-NEXT: ;;#ASMSTART 1389; GFX900-NEXT: ; def v[0:1] 1390; GFX900-NEXT: ;;#ASMEND 1391; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1392; GFX900-NEXT: v_mov_b32_e32 v3, 0 1393; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 1394; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 1395; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1396; GFX900-NEXT: s_waitcnt vmcnt(0) 1397; GFX900-NEXT: s_setpc_b64 s[30:31] 1398; 1399; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_0_0_0: 1400; GFX90A: ; %bb.0: 1401; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1402; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1403; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1404; GFX90A-NEXT: ;;#ASMSTART 1405; GFX90A-NEXT: ; def v[0:1] 1406; GFX90A-NEXT: ;;#ASMEND 1407; GFX90A-NEXT: v_perm_b32 v2, v0, v1, s4 1408; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 1409; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1410; GFX90A-NEXT: s_waitcnt vmcnt(0) 1411; GFX90A-NEXT: s_setpc_b64 s[30:31] 1412; 1413; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_0_0_0: 1414; GFX940: ; %bb.0: 1415; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1416; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1417; GFX940-NEXT: v_mov_b32_e32 v4, 0 1418; GFX940-NEXT: ;;#ASMSTART 1419; GFX940-NEXT: ; def v[0:1] 1420; GFX940-NEXT: ;;#ASMEND 1421; GFX940-NEXT: s_nop 0 1422; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 1423; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 1424; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1425; GFX940-NEXT: s_waitcnt vmcnt(0) 1426; GFX940-NEXT: s_setpc_b64 s[30:31] 1427 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1428 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1429 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0> 1430 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1431 ret void 1432} 1433 1434define void @v_shuffle_v4i16_v3i16__3_0_0_0(ptr addrspace(1) inreg %ptr) { 1435; GFX900-LABEL: v_shuffle_v4i16_v3i16__3_0_0_0: 1436; GFX900: ; %bb.0: 1437; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1438; GFX900-NEXT: ;;#ASMSTART 1439; GFX900-NEXT: ; def v[0:1] 1440; GFX900-NEXT: ;;#ASMEND 1441; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1442; GFX900-NEXT: v_mov_b32_e32 v2, 0 1443; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1444; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1445; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1446; GFX900-NEXT: s_waitcnt vmcnt(0) 1447; GFX900-NEXT: s_setpc_b64 s[30:31] 1448; 1449; GFX90A-LABEL: v_shuffle_v4i16_v3i16__3_0_0_0: 1450; GFX90A: ; %bb.0: 1451; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1452; GFX90A-NEXT: ;;#ASMSTART 1453; GFX90A-NEXT: ; def v[0:1] 1454; GFX90A-NEXT: ;;#ASMEND 1455; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1456; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1457; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1458; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1459; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1460; GFX90A-NEXT: s_waitcnt vmcnt(0) 1461; GFX90A-NEXT: s_setpc_b64 s[30:31] 1462; 1463; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_0_0_0: 1464; GFX940: ; %bb.0: 1465; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1466; GFX940-NEXT: ;;#ASMSTART 1467; GFX940-NEXT: ; def v[0:1] 1468; GFX940-NEXT: ;;#ASMEND 1469; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1470; GFX940-NEXT: v_mov_b32_e32 v2, 0 1471; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1472; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1473; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1474; GFX940-NEXT: s_waitcnt vmcnt(0) 1475; GFX940-NEXT: s_setpc_b64 s[30:31] 1476 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1477 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1478 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 1479 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1480 ret void 1481} 1482 1483define void @v_shuffle_v4i16_v3i16__4_0_0_0(ptr addrspace(1) inreg %ptr) { 1484; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_0_0_0: 1485; GFX900: ; %bb.0: 1486; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1487; GFX900-NEXT: ;;#ASMSTART 1488; GFX900-NEXT: ; def v[0:1] 1489; GFX900-NEXT: ;;#ASMEND 1490; GFX900-NEXT: ;;#ASMSTART 1491; GFX900-NEXT: ; def v[1:2] 1492; GFX900-NEXT: ;;#ASMEND 1493; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1494; GFX900-NEXT: v_mov_b32_e32 v3, 0 1495; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 1496; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 1497; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1498; GFX900-NEXT: s_waitcnt vmcnt(0) 1499; GFX900-NEXT: s_setpc_b64 s[30:31] 1500; 1501; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_0_0_0: 1502; GFX90A: ; %bb.0: 1503; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1504; GFX90A-NEXT: ;;#ASMSTART 1505; GFX90A-NEXT: ; def v[0:1] 1506; GFX90A-NEXT: ;;#ASMEND 1507; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1508; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1509; GFX90A-NEXT: ;;#ASMSTART 1510; GFX90A-NEXT: ; def v[2:3] 1511; GFX90A-NEXT: ;;#ASMEND 1512; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1513; GFX90A-NEXT: v_alignbit_b32 v0, v0, v2, 16 1514; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1515; GFX90A-NEXT: s_waitcnt vmcnt(0) 1516; GFX90A-NEXT: s_setpc_b64 s[30:31] 1517; 1518; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_0_0_0: 1519; GFX940: ; %bb.0: 1520; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1521; GFX940-NEXT: ;;#ASMSTART 1522; GFX940-NEXT: ; def v[0:1] 1523; GFX940-NEXT: ;;#ASMEND 1524; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1525; GFX940-NEXT: v_mov_b32_e32 v4, 0 1526; GFX940-NEXT: ;;#ASMSTART 1527; GFX940-NEXT: ; def v[2:3] 1528; GFX940-NEXT: ;;#ASMEND 1529; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1530; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 1531; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 1532; GFX940-NEXT: s_waitcnt vmcnt(0) 1533; GFX940-NEXT: s_setpc_b64 s[30:31] 1534 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1535 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1536 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1537 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1538 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 0, i32 0, i32 0> 1539 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1540 ret void 1541} 1542 1543define void @v_shuffle_v4i16_v3i16__5_0_0_0(ptr addrspace(1) inreg %ptr) { 1544; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_0_0: 1545; GFX900: ; %bb.0: 1546; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1547; GFX900-NEXT: ;;#ASMSTART 1548; GFX900-NEXT: ; def v[0:1] 1549; GFX900-NEXT: ;;#ASMEND 1550; GFX900-NEXT: ;;#ASMSTART 1551; GFX900-NEXT: ; def v[1:2] 1552; GFX900-NEXT: ;;#ASMEND 1553; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1554; GFX900-NEXT: v_mov_b32_e32 v3, 0 1555; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 1556; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 1557; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1558; GFX900-NEXT: s_waitcnt vmcnt(0) 1559; GFX900-NEXT: s_setpc_b64 s[30:31] 1560; 1561; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_0_0: 1562; GFX90A: ; %bb.0: 1563; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1564; GFX90A-NEXT: ;;#ASMSTART 1565; GFX90A-NEXT: ; def v[2:3] 1566; GFX90A-NEXT: ;;#ASMEND 1567; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1568; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1569; GFX90A-NEXT: ;;#ASMSTART 1570; GFX90A-NEXT: ; def v[0:1] 1571; GFX90A-NEXT: ;;#ASMEND 1572; GFX90A-NEXT: v_perm_b32 v2, v0, v3, s4 1573; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 1574; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1575; GFX90A-NEXT: s_waitcnt vmcnt(0) 1576; GFX90A-NEXT: s_setpc_b64 s[30:31] 1577; 1578; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_0_0: 1579; GFX940: ; %bb.0: 1580; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1581; GFX940-NEXT: ;;#ASMSTART 1582; GFX940-NEXT: ; def v[2:3] 1583; GFX940-NEXT: ;;#ASMEND 1584; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1585; GFX940-NEXT: v_mov_b32_e32 v4, 0 1586; GFX940-NEXT: ;;#ASMSTART 1587; GFX940-NEXT: ; def v[0:1] 1588; GFX940-NEXT: ;;#ASMEND 1589; GFX940-NEXT: s_nop 0 1590; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 1591; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 1592; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1593; GFX940-NEXT: s_waitcnt vmcnt(0) 1594; GFX940-NEXT: s_setpc_b64 s[30:31] 1595 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1596 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1597 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1598 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1599 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 0, i32 0> 1600 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1601 ret void 1602} 1603 1604define void @v_shuffle_v4i16_v3i16__5_u_0_0(ptr addrspace(1) inreg %ptr) { 1605; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_0_0: 1606; GFX900: ; %bb.0: 1607; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1608; GFX900-NEXT: ;;#ASMSTART 1609; GFX900-NEXT: ; def v[0:1] 1610; GFX900-NEXT: ;;#ASMEND 1611; GFX900-NEXT: ;;#ASMSTART 1612; GFX900-NEXT: ; def v[1:2] 1613; GFX900-NEXT: ;;#ASMEND 1614; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1615; GFX900-NEXT: v_mov_b32_e32 v3, 0 1616; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1617; GFX900-NEXT: v_mov_b32_e32 v0, v2 1618; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 1619; GFX900-NEXT: s_waitcnt vmcnt(0) 1620; GFX900-NEXT: s_setpc_b64 s[30:31] 1621; 1622; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_0_0: 1623; GFX90A: ; %bb.0: 1624; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1625; GFX90A-NEXT: ;;#ASMSTART 1626; GFX90A-NEXT: ; def v[0:1] 1627; GFX90A-NEXT: ;;#ASMEND 1628; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1629; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1630; GFX90A-NEXT: ;;#ASMSTART 1631; GFX90A-NEXT: ; def v[2:3] 1632; GFX90A-NEXT: ;;#ASMEND 1633; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1634; GFX90A-NEXT: v_mov_b32_e32 v0, v3 1635; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1636; GFX90A-NEXT: s_waitcnt vmcnt(0) 1637; GFX90A-NEXT: s_setpc_b64 s[30:31] 1638; 1639; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_0_0: 1640; GFX940: ; %bb.0: 1641; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1642; GFX940-NEXT: ;;#ASMSTART 1643; GFX940-NEXT: ; def v[0:1] 1644; GFX940-NEXT: ;;#ASMEND 1645; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1646; GFX940-NEXT: v_mov_b32_e32 v4, 0 1647; GFX940-NEXT: ;;#ASMSTART 1648; GFX940-NEXT: ; def v[2:3] 1649; GFX940-NEXT: ;;#ASMEND 1650; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1651; GFX940-NEXT: v_mov_b32_e32 v0, v3 1652; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 1653; GFX940-NEXT: s_waitcnt vmcnt(0) 1654; GFX940-NEXT: s_setpc_b64 s[30:31] 1655 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1656 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1657 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1658 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1659 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 0, i32 0> 1660 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1661 ret void 1662} 1663 1664define void @v_shuffle_v4i16_v3i16__5_1_0_0(ptr addrspace(1) inreg %ptr) { 1665; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_0_0: 1666; GFX900: ; %bb.0: 1667; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1668; GFX900-NEXT: ;;#ASMSTART 1669; GFX900-NEXT: ; def v[0:1] 1670; GFX900-NEXT: ;;#ASMEND 1671; GFX900-NEXT: ;;#ASMSTART 1672; GFX900-NEXT: ; def v[1:2] 1673; GFX900-NEXT: ;;#ASMEND 1674; GFX900-NEXT: s_mov_b32 s4, 0xffff 1675; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0 1676; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1677; GFX900-NEXT: v_mov_b32_e32 v3, 0 1678; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 1679; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1680; GFX900-NEXT: s_waitcnt vmcnt(0) 1681; GFX900-NEXT: s_setpc_b64 s[30:31] 1682; 1683; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_0_0: 1684; GFX90A: ; %bb.0: 1685; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1686; GFX90A-NEXT: ;;#ASMSTART 1687; GFX90A-NEXT: ; def v[2:3] 1688; GFX90A-NEXT: ;;#ASMEND 1689; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1690; GFX90A-NEXT: ;;#ASMSTART 1691; GFX90A-NEXT: ; def v[0:1] 1692; GFX90A-NEXT: ;;#ASMEND 1693; GFX90A-NEXT: v_bfi_b32 v2, s4, v3, v0 1694; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1695; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1696; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 1697; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1698; GFX90A-NEXT: s_waitcnt vmcnt(0) 1699; GFX90A-NEXT: s_setpc_b64 s[30:31] 1700; 1701; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_0_0: 1702; GFX940: ; %bb.0: 1703; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1704; GFX940-NEXT: ;;#ASMSTART 1705; GFX940-NEXT: ; def v[2:3] 1706; GFX940-NEXT: ;;#ASMEND 1707; GFX940-NEXT: s_mov_b32 s2, 0xffff 1708; GFX940-NEXT: ;;#ASMSTART 1709; GFX940-NEXT: ; def v[0:1] 1710; GFX940-NEXT: ;;#ASMEND 1711; GFX940-NEXT: v_mov_b32_e32 v4, 0 1712; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 1713; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1714; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 1715; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1716; GFX940-NEXT: s_waitcnt vmcnt(0) 1717; GFX940-NEXT: s_setpc_b64 s[30:31] 1718 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1719 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1720 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1721 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1722 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 0, i32 0> 1723 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1724 ret void 1725} 1726 1727define void @v_shuffle_v4i16_v3i16__5_2_0_0(ptr addrspace(1) inreg %ptr) { 1728; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_0_0: 1729; GFX900: ; %bb.0: 1730; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1731; GFX900-NEXT: ;;#ASMSTART 1732; GFX900-NEXT: ; def v[0:1] 1733; GFX900-NEXT: ;;#ASMEND 1734; GFX900-NEXT: ;;#ASMSTART 1735; GFX900-NEXT: ; def v[2:3] 1736; GFX900-NEXT: ;;#ASMEND 1737; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1738; GFX900-NEXT: v_mov_b32_e32 v4, 0 1739; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 1740; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 1741; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 1742; GFX900-NEXT: s_waitcnt vmcnt(0) 1743; GFX900-NEXT: s_setpc_b64 s[30:31] 1744; 1745; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_0_0: 1746; GFX90A: ; %bb.0: 1747; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1748; GFX90A-NEXT: ;;#ASMSTART 1749; GFX90A-NEXT: ; def v[2:3] 1750; GFX90A-NEXT: ;;#ASMEND 1751; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1752; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1753; GFX90A-NEXT: ;;#ASMSTART 1754; GFX90A-NEXT: ; def v[0:1] 1755; GFX90A-NEXT: ;;#ASMEND 1756; GFX90A-NEXT: v_perm_b32 v2, v1, v3, s4 1757; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 1758; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1759; GFX90A-NEXT: s_waitcnt vmcnt(0) 1760; GFX90A-NEXT: s_setpc_b64 s[30:31] 1761; 1762; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_0_0: 1763; GFX940: ; %bb.0: 1764; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1765; GFX940-NEXT: ;;#ASMSTART 1766; GFX940-NEXT: ; def v[2:3] 1767; GFX940-NEXT: ;;#ASMEND 1768; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1769; GFX940-NEXT: v_mov_b32_e32 v4, 0 1770; GFX940-NEXT: ;;#ASMSTART 1771; GFX940-NEXT: ; def v[0:1] 1772; GFX940-NEXT: ;;#ASMEND 1773; GFX940-NEXT: s_nop 0 1774; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 1775; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 1776; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1777; GFX940-NEXT: s_waitcnt vmcnt(0) 1778; GFX940-NEXT: s_setpc_b64 s[30:31] 1779 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1780 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1781 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1782 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1783 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 0, i32 0> 1784 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1785 ret void 1786} 1787 1788define void @v_shuffle_v4i16_v3i16__5_3_0_0(ptr addrspace(1) inreg %ptr) { 1789; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_0_0: 1790; GFX900: ; %bb.0: 1791; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1792; GFX900-NEXT: ;;#ASMSTART 1793; GFX900-NEXT: ; def v[0:1] 1794; GFX900-NEXT: ;;#ASMEND 1795; GFX900-NEXT: ;;#ASMSTART 1796; GFX900-NEXT: ; def v[1:2] 1797; GFX900-NEXT: ;;#ASMEND 1798; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1799; GFX900-NEXT: v_mov_b32_e32 v3, 0 1800; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 1801; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 1802; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1803; GFX900-NEXT: s_waitcnt vmcnt(0) 1804; GFX900-NEXT: s_setpc_b64 s[30:31] 1805; 1806; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_0_0: 1807; GFX90A: ; %bb.0: 1808; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1809; GFX90A-NEXT: ;;#ASMSTART 1810; GFX90A-NEXT: ; def v[2:3] 1811; GFX90A-NEXT: ;;#ASMEND 1812; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1813; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1814; GFX90A-NEXT: ;;#ASMSTART 1815; GFX90A-NEXT: ; def v[0:1] 1816; GFX90A-NEXT: ;;#ASMEND 1817; GFX90A-NEXT: v_perm_b32 v2, v2, v3, s4 1818; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 1819; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1820; GFX90A-NEXT: s_waitcnt vmcnt(0) 1821; GFX90A-NEXT: s_setpc_b64 s[30:31] 1822; 1823; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_0_0: 1824; GFX940: ; %bb.0: 1825; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1826; GFX940-NEXT: ;;#ASMSTART 1827; GFX940-NEXT: ; def v[2:3] 1828; GFX940-NEXT: ;;#ASMEND 1829; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1830; GFX940-NEXT: v_mov_b32_e32 v4, 0 1831; GFX940-NEXT: ;;#ASMSTART 1832; GFX940-NEXT: ; def v[0:1] 1833; GFX940-NEXT: ;;#ASMEND 1834; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 1835; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 1836; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1837; GFX940-NEXT: s_waitcnt vmcnt(0) 1838; GFX940-NEXT: s_setpc_b64 s[30:31] 1839 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1840 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1841 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1842 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1843 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 0, i32 0> 1844 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1845 ret void 1846} 1847 1848define void @v_shuffle_v4i16_v3i16__5_4_0_0(ptr addrspace(1) inreg %ptr) { 1849; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_0_0: 1850; GFX900: ; %bb.0: 1851; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1852; GFX900-NEXT: ;;#ASMSTART 1853; GFX900-NEXT: ; def v[0:1] 1854; GFX900-NEXT: ;;#ASMEND 1855; GFX900-NEXT: ;;#ASMSTART 1856; GFX900-NEXT: ; def v[1:2] 1857; GFX900-NEXT: ;;#ASMEND 1858; GFX900-NEXT: s_mov_b32 s4, 0xffff 1859; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v1 1860; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1861; GFX900-NEXT: v_mov_b32_e32 v3, 0 1862; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 1863; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1864; GFX900-NEXT: s_waitcnt vmcnt(0) 1865; GFX900-NEXT: s_setpc_b64 s[30:31] 1866; 1867; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_0_0: 1868; GFX90A: ; %bb.0: 1869; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1870; GFX90A-NEXT: ;;#ASMSTART 1871; GFX90A-NEXT: ; def v[2:3] 1872; GFX90A-NEXT: ;;#ASMEND 1873; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1874; GFX90A-NEXT: v_bfi_b32 v2, s4, v3, v2 1875; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1876; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1877; GFX90A-NEXT: ;;#ASMSTART 1878; GFX90A-NEXT: ; def v[0:1] 1879; GFX90A-NEXT: ;;#ASMEND 1880; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 1881; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1882; GFX90A-NEXT: s_waitcnt vmcnt(0) 1883; GFX90A-NEXT: s_setpc_b64 s[30:31] 1884; 1885; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_0_0: 1886; GFX940: ; %bb.0: 1887; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1888; GFX940-NEXT: ;;#ASMSTART 1889; GFX940-NEXT: ; def v[2:3] 1890; GFX940-NEXT: ;;#ASMEND 1891; GFX940-NEXT: s_mov_b32 s2, 0xffff 1892; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v2 1893; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1894; GFX940-NEXT: v_mov_b32_e32 v4, 0 1895; GFX940-NEXT: ;;#ASMSTART 1896; GFX940-NEXT: ; def v[0:1] 1897; GFX940-NEXT: ;;#ASMEND 1898; GFX940-NEXT: s_nop 0 1899; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 1900; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1901; GFX940-NEXT: s_waitcnt vmcnt(0) 1902; GFX940-NEXT: s_setpc_b64 s[30:31] 1903 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1904 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1905 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1906 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1907 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 0, i32 0> 1908 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1909 ret void 1910} 1911 1912define void @v_shuffle_v4i16_v3i16__5_5_0_0(ptr addrspace(1) inreg %ptr) { 1913; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_0: 1914; GFX900: ; %bb.0: 1915; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1916; GFX900-NEXT: ;;#ASMSTART 1917; GFX900-NEXT: ; def v[0:1] 1918; GFX900-NEXT: ;;#ASMEND 1919; GFX900-NEXT: ;;#ASMSTART 1920; GFX900-NEXT: ; def v[1:2] 1921; GFX900-NEXT: ;;#ASMEND 1922; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1923; GFX900-NEXT: v_mov_b32_e32 v3, 0 1924; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1925; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 1926; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 1927; GFX900-NEXT: s_waitcnt vmcnt(0) 1928; GFX900-NEXT: s_setpc_b64 s[30:31] 1929; 1930; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_0: 1931; GFX90A: ; %bb.0: 1932; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1933; GFX90A-NEXT: ;;#ASMSTART 1934; GFX90A-NEXT: ; def v[0:1] 1935; GFX90A-NEXT: ;;#ASMEND 1936; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1937; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1938; GFX90A-NEXT: ;;#ASMSTART 1939; GFX90A-NEXT: ; def v[2:3] 1940; GFX90A-NEXT: ;;#ASMEND 1941; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1942; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 1943; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1944; GFX90A-NEXT: s_waitcnt vmcnt(0) 1945; GFX90A-NEXT: s_setpc_b64 s[30:31] 1946; 1947; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_0: 1948; GFX940: ; %bb.0: 1949; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1950; GFX940-NEXT: ;;#ASMSTART 1951; GFX940-NEXT: ; def v[0:1] 1952; GFX940-NEXT: ;;#ASMEND 1953; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1954; GFX940-NEXT: v_mov_b32_e32 v4, 0 1955; GFX940-NEXT: ;;#ASMSTART 1956; GFX940-NEXT: ; def v[2:3] 1957; GFX940-NEXT: ;;#ASMEND 1958; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1959; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 1960; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 1961; GFX940-NEXT: s_waitcnt vmcnt(0) 1962; GFX940-NEXT: s_setpc_b64 s[30:31] 1963 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1964 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1965 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1966 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1967 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 0> 1968 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1969 ret void 1970} 1971 1972define void @v_shuffle_v4i16_v3i16__5_5_u_0(ptr addrspace(1) inreg %ptr) { 1973; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_0: 1974; GFX900: ; %bb.0: 1975; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1976; GFX900-NEXT: ;;#ASMSTART 1977; GFX900-NEXT: ; def v[0:1] 1978; GFX900-NEXT: ;;#ASMEND 1979; GFX900-NEXT: ;;#ASMSTART 1980; GFX900-NEXT: ; def v[1:2] 1981; GFX900-NEXT: ;;#ASMEND 1982; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1983; GFX900-NEXT: v_mov_b32_e32 v3, 0 1984; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 1985; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 1986; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1987; GFX900-NEXT: s_waitcnt vmcnt(0) 1988; GFX900-NEXT: s_setpc_b64 s[30:31] 1989; 1990; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_0: 1991; GFX90A: ; %bb.0: 1992; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1993; GFX90A-NEXT: ;;#ASMSTART 1994; GFX90A-NEXT: ; def v[2:3] 1995; GFX90A-NEXT: ;;#ASMEND 1996; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1997; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1998; GFX90A-NEXT: ;;#ASMSTART 1999; GFX90A-NEXT: ; def v[0:1] 2000; GFX90A-NEXT: ;;#ASMEND 2001; GFX90A-NEXT: v_perm_b32 v2, v3, v3, s4 2002; GFX90A-NEXT: v_lshlrev_b32_e32 v3, 16, v0 2003; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2004; GFX90A-NEXT: s_waitcnt vmcnt(0) 2005; GFX90A-NEXT: s_setpc_b64 s[30:31] 2006; 2007; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_0: 2008; GFX940: ; %bb.0: 2009; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2010; GFX940-NEXT: ;;#ASMSTART 2011; GFX940-NEXT: ; def v[2:3] 2012; GFX940-NEXT: ;;#ASMEND 2013; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2014; GFX940-NEXT: v_mov_b32_e32 v4, 0 2015; GFX940-NEXT: ;;#ASMSTART 2016; GFX940-NEXT: ; def v[0:1] 2017; GFX940-NEXT: ;;#ASMEND 2018; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 2019; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 2020; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2021; GFX940-NEXT: s_waitcnt vmcnt(0) 2022; GFX940-NEXT: s_setpc_b64 s[30:31] 2023 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2024 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2025 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2026 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2027 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 0> 2028 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2029 ret void 2030} 2031 2032define void @v_shuffle_v4i16_v3i16__5_5_1_0(ptr addrspace(1) inreg %ptr) { 2033; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_0: 2034; GFX900: ; %bb.0: 2035; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2036; GFX900-NEXT: ;;#ASMSTART 2037; GFX900-NEXT: ; def v[0:1] 2038; GFX900-NEXT: ;;#ASMEND 2039; GFX900-NEXT: ;;#ASMSTART 2040; GFX900-NEXT: ; def v[1:2] 2041; GFX900-NEXT: ;;#ASMEND 2042; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2043; GFX900-NEXT: v_mov_b32_e32 v3, 0 2044; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16 2045; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 2046; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2047; GFX900-NEXT: s_waitcnt vmcnt(0) 2048; GFX900-NEXT: s_setpc_b64 s[30:31] 2049; 2050; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_0: 2051; GFX90A: ; %bb.0: 2052; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2053; GFX90A-NEXT: ;;#ASMSTART 2054; GFX90A-NEXT: ; def v[0:1] 2055; GFX90A-NEXT: ;;#ASMEND 2056; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2057; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2058; GFX90A-NEXT: ;;#ASMSTART 2059; GFX90A-NEXT: ; def v[2:3] 2060; GFX90A-NEXT: ;;#ASMEND 2061; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16 2062; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2063; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2064; GFX90A-NEXT: s_waitcnt vmcnt(0) 2065; GFX90A-NEXT: s_setpc_b64 s[30:31] 2066; 2067; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_0: 2068; GFX940: ; %bb.0: 2069; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2070; GFX940-NEXT: ;;#ASMSTART 2071; GFX940-NEXT: ; def v[0:1] 2072; GFX940-NEXT: ;;#ASMEND 2073; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2074; GFX940-NEXT: v_mov_b32_e32 v4, 0 2075; GFX940-NEXT: ;;#ASMSTART 2076; GFX940-NEXT: ; def v[2:3] 2077; GFX940-NEXT: ;;#ASMEND 2078; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 2079; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 2080; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2081; GFX940-NEXT: s_waitcnt vmcnt(0) 2082; GFX940-NEXT: s_setpc_b64 s[30:31] 2083 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2084 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2085 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2086 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2087 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 0> 2088 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2089 ret void 2090} 2091 2092define void @v_shuffle_v4i16_v3i16__5_5_2_0(ptr addrspace(1) inreg %ptr) { 2093; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_0: 2094; GFX900: ; %bb.0: 2095; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2096; GFX900-NEXT: ;;#ASMSTART 2097; GFX900-NEXT: ; def v[0:1] 2098; GFX900-NEXT: ;;#ASMEND 2099; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2100; GFX900-NEXT: v_mov_b32_e32 v4, 0 2101; GFX900-NEXT: ;;#ASMSTART 2102; GFX900-NEXT: ; def v[2:3] 2103; GFX900-NEXT: ;;#ASMEND 2104; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 2105; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 2106; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2107; GFX900-NEXT: s_waitcnt vmcnt(0) 2108; GFX900-NEXT: s_setpc_b64 s[30:31] 2109; 2110; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_0: 2111; GFX90A: ; %bb.0: 2112; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2113; GFX90A-NEXT: ;;#ASMSTART 2114; GFX90A-NEXT: ; def v[0:1] 2115; GFX90A-NEXT: ;;#ASMEND 2116; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2117; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2118; GFX90A-NEXT: ;;#ASMSTART 2119; GFX90A-NEXT: ; def v[2:3] 2120; GFX90A-NEXT: ;;#ASMEND 2121; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 2122; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2123; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2124; GFX90A-NEXT: s_waitcnt vmcnt(0) 2125; GFX90A-NEXT: s_setpc_b64 s[30:31] 2126; 2127; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_0: 2128; GFX940: ; %bb.0: 2129; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2130; GFX940-NEXT: ;;#ASMSTART 2131; GFX940-NEXT: ; def v[0:1] 2132; GFX940-NEXT: ;;#ASMEND 2133; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2134; GFX940-NEXT: v_mov_b32_e32 v4, 0 2135; GFX940-NEXT: ;;#ASMSTART 2136; GFX940-NEXT: ; def v[2:3] 2137; GFX940-NEXT: ;;#ASMEND 2138; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 2139; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 2140; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2141; GFX940-NEXT: s_waitcnt vmcnt(0) 2142; GFX940-NEXT: s_setpc_b64 s[30:31] 2143 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2144 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2145 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2146 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2147 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 0> 2148 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2149 ret void 2150} 2151 2152define void @v_shuffle_v4i16_v3i16__5_5_3_0(ptr addrspace(1) inreg %ptr) { 2153; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_0: 2154; GFX900: ; %bb.0: 2155; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2156; GFX900-NEXT: ;;#ASMSTART 2157; GFX900-NEXT: ; def v[0:1] 2158; GFX900-NEXT: ;;#ASMEND 2159; GFX900-NEXT: ;;#ASMSTART 2160; GFX900-NEXT: ; def v[1:2] 2161; GFX900-NEXT: ;;#ASMEND 2162; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2163; GFX900-NEXT: v_mov_b32_e32 v3, 0 2164; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 2165; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 2166; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2167; GFX900-NEXT: s_waitcnt vmcnt(0) 2168; GFX900-NEXT: s_setpc_b64 s[30:31] 2169; 2170; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_0: 2171; GFX90A: ; %bb.0: 2172; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2173; GFX90A-NEXT: ;;#ASMSTART 2174; GFX90A-NEXT: ; def v[0:1] 2175; GFX90A-NEXT: ;;#ASMEND 2176; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2177; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2178; GFX90A-NEXT: ;;#ASMSTART 2179; GFX90A-NEXT: ; def v[2:3] 2180; GFX90A-NEXT: ;;#ASMEND 2181; GFX90A-NEXT: v_perm_b32 v1, v0, v2, s4 2182; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2183; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2184; GFX90A-NEXT: s_waitcnt vmcnt(0) 2185; GFX90A-NEXT: s_setpc_b64 s[30:31] 2186; 2187; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_0: 2188; GFX940: ; %bb.0: 2189; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2190; GFX940-NEXT: ;;#ASMSTART 2191; GFX940-NEXT: ; def v[0:1] 2192; GFX940-NEXT: ;;#ASMEND 2193; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2194; GFX940-NEXT: v_mov_b32_e32 v4, 0 2195; GFX940-NEXT: ;;#ASMSTART 2196; GFX940-NEXT: ; def v[2:3] 2197; GFX940-NEXT: ;;#ASMEND 2198; GFX940-NEXT: s_nop 0 2199; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 2200; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 2201; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2202; GFX940-NEXT: s_waitcnt vmcnt(0) 2203; GFX940-NEXT: s_setpc_b64 s[30:31] 2204 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2205 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2206 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2207 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2208 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 0> 2209 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2210 ret void 2211} 2212 2213define void @v_shuffle_v4i16_v3i16__5_5_4_0(ptr addrspace(1) inreg %ptr) { 2214; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_0: 2215; GFX900: ; %bb.0: 2216; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2217; GFX900-NEXT: ;;#ASMSTART 2218; GFX900-NEXT: ; def v[0:1] 2219; GFX900-NEXT: ;;#ASMEND 2220; GFX900-NEXT: ;;#ASMSTART 2221; GFX900-NEXT: ; def v[1:2] 2222; GFX900-NEXT: ;;#ASMEND 2223; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2224; GFX900-NEXT: v_mov_b32_e32 v3, 0 2225; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 2226; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 2227; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2228; GFX900-NEXT: s_waitcnt vmcnt(0) 2229; GFX900-NEXT: s_setpc_b64 s[30:31] 2230; 2231; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_0: 2232; GFX90A: ; %bb.0: 2233; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2234; GFX90A-NEXT: ;;#ASMSTART 2235; GFX90A-NEXT: ; def v[0:1] 2236; GFX90A-NEXT: ;;#ASMEND 2237; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2238; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2239; GFX90A-NEXT: ;;#ASMSTART 2240; GFX90A-NEXT: ; def v[2:3] 2241; GFX90A-NEXT: ;;#ASMEND 2242; GFX90A-NEXT: v_alignbit_b32 v1, v0, v2, 16 2243; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2244; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2245; GFX90A-NEXT: s_waitcnt vmcnt(0) 2246; GFX90A-NEXT: s_setpc_b64 s[30:31] 2247; 2248; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_0: 2249; GFX940: ; %bb.0: 2250; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2251; GFX940-NEXT: ;;#ASMSTART 2252; GFX940-NEXT: ; def v[0:1] 2253; GFX940-NEXT: ;;#ASMEND 2254; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2255; GFX940-NEXT: v_mov_b32_e32 v4, 0 2256; GFX940-NEXT: ;;#ASMSTART 2257; GFX940-NEXT: ; def v[2:3] 2258; GFX940-NEXT: ;;#ASMEND 2259; GFX940-NEXT: s_nop 0 2260; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 2261; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 2262; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2263; GFX940-NEXT: s_waitcnt vmcnt(0) 2264; GFX940-NEXT: s_setpc_b64 s[30:31] 2265 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2266 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2267 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2268 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2269 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 0> 2270 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2271 ret void 2272} 2273 2274define void @v_shuffle_v4i16_v3i16__u_1_1_1(ptr addrspace(1) inreg %ptr) { 2275; GFX900-LABEL: v_shuffle_v4i16_v3i16__u_1_1_1: 2276; GFX900: ; %bb.0: 2277; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2278; GFX900-NEXT: ;;#ASMSTART 2279; GFX900-NEXT: ; def v[0:1] 2280; GFX900-NEXT: ;;#ASMEND 2281; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2282; GFX900-NEXT: v_mov_b32_e32 v2, 0 2283; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2284; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 2285; GFX900-NEXT: s_waitcnt vmcnt(0) 2286; GFX900-NEXT: s_setpc_b64 s[30:31] 2287; 2288; GFX90A-LABEL: v_shuffle_v4i16_v3i16__u_1_1_1: 2289; GFX90A: ; %bb.0: 2290; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2291; GFX90A-NEXT: ;;#ASMSTART 2292; GFX90A-NEXT: ; def v[0:1] 2293; GFX90A-NEXT: ;;#ASMEND 2294; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2295; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2296; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2297; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 2298; GFX90A-NEXT: s_waitcnt vmcnt(0) 2299; GFX90A-NEXT: s_setpc_b64 s[30:31] 2300; 2301; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_1_1_1: 2302; GFX940: ; %bb.0: 2303; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2304; GFX940-NEXT: ;;#ASMSTART 2305; GFX940-NEXT: ; def v[0:1] 2306; GFX940-NEXT: ;;#ASMEND 2307; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2308; GFX940-NEXT: v_mov_b32_e32 v2, 0 2309; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2310; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 2311; GFX940-NEXT: s_waitcnt vmcnt(0) 2312; GFX940-NEXT: s_setpc_b64 s[30:31] 2313 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2314 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2315 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1> 2316 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2317 ret void 2318} 2319 2320define void @v_shuffle_v4i16_v3i16__0_1_1_1(ptr addrspace(1) inreg %ptr) { 2321; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_1_1_1: 2322; GFX900: ; %bb.0: 2323; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2324; GFX900-NEXT: ;;#ASMSTART 2325; GFX900-NEXT: ; def v[0:1] 2326; GFX900-NEXT: ;;#ASMEND 2327; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2328; GFX900-NEXT: v_mov_b32_e32 v2, 0 2329; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2330; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 2331; GFX900-NEXT: s_waitcnt vmcnt(0) 2332; GFX900-NEXT: s_setpc_b64 s[30:31] 2333; 2334; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_1_1_1: 2335; GFX90A: ; %bb.0: 2336; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2337; GFX90A-NEXT: ;;#ASMSTART 2338; GFX90A-NEXT: ; def v[0:1] 2339; GFX90A-NEXT: ;;#ASMEND 2340; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2341; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2342; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2343; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 2344; GFX90A-NEXT: s_waitcnt vmcnt(0) 2345; GFX90A-NEXT: s_setpc_b64 s[30:31] 2346; 2347; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_1_1_1: 2348; GFX940: ; %bb.0: 2349; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2350; GFX940-NEXT: ;;#ASMSTART 2351; GFX940-NEXT: ; def v[0:1] 2352; GFX940-NEXT: ;;#ASMEND 2353; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2354; GFX940-NEXT: v_mov_b32_e32 v2, 0 2355; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2356; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 2357; GFX940-NEXT: s_waitcnt vmcnt(0) 2358; GFX940-NEXT: s_setpc_b64 s[30:31] 2359 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2360 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2361 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> 2362 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2363 ret void 2364} 2365 2366define void @v_shuffle_v4i16_v3i16__1_1_1_1(ptr addrspace(1) inreg %ptr) { 2367; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_1_1_1: 2368; GFX900: ; %bb.0: 2369; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2370; GFX900-NEXT: ;;#ASMSTART 2371; GFX900-NEXT: ; def v[0:1] 2372; GFX900-NEXT: ;;#ASMEND 2373; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2374; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 2375; GFX900-NEXT: v_mov_b32_e32 v2, 0 2376; GFX900-NEXT: v_mov_b32_e32 v1, v0 2377; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 2378; GFX900-NEXT: s_waitcnt vmcnt(0) 2379; GFX900-NEXT: s_setpc_b64 s[30:31] 2380; 2381; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_1_1_1: 2382; GFX90A: ; %bb.0: 2383; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2384; GFX90A-NEXT: ;;#ASMSTART 2385; GFX90A-NEXT: ; def v[0:1] 2386; GFX90A-NEXT: ;;#ASMEND 2387; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2388; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 2389; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2390; GFX90A-NEXT: v_mov_b32_e32 v1, v0 2391; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 2392; GFX90A-NEXT: s_waitcnt vmcnt(0) 2393; GFX90A-NEXT: s_setpc_b64 s[30:31] 2394; 2395; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_1_1_1: 2396; GFX940: ; %bb.0: 2397; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2398; GFX940-NEXT: ;;#ASMSTART 2399; GFX940-NEXT: ; def v[0:1] 2400; GFX940-NEXT: ;;#ASMEND 2401; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2402; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 2403; GFX940-NEXT: v_mov_b32_e32 v2, 0 2404; GFX940-NEXT: v_mov_b32_e32 v1, v0 2405; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 2406; GFX940-NEXT: s_waitcnt vmcnt(0) 2407; GFX940-NEXT: s_setpc_b64 s[30:31] 2408 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2409 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2410 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2411 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2412 ret void 2413} 2414 2415define void @v_shuffle_v4i16_v3i16__2_1_1_1(ptr addrspace(1) inreg %ptr) { 2416; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_1_1_1: 2417; GFX900: ; %bb.0: 2418; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2419; GFX900-NEXT: ;;#ASMSTART 2420; GFX900-NEXT: ; def v[0:1] 2421; GFX900-NEXT: ;;#ASMEND 2422; GFX900-NEXT: s_mov_b32 s4, 0xffff 2423; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 2424; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2425; GFX900-NEXT: v_mov_b32_e32 v3, 0 2426; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 2427; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 2428; GFX900-NEXT: s_waitcnt vmcnt(0) 2429; GFX900-NEXT: s_setpc_b64 s[30:31] 2430; 2431; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_1_1_1: 2432; GFX90A: ; %bb.0: 2433; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2434; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2435; GFX90A-NEXT: ;;#ASMSTART 2436; GFX90A-NEXT: ; def v[0:1] 2437; GFX90A-NEXT: ;;#ASMEND 2438; GFX90A-NEXT: v_bfi_b32 v2, s4, v1, v0 2439; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2440; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2441; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 2442; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2443; GFX90A-NEXT: s_waitcnt vmcnt(0) 2444; GFX90A-NEXT: s_setpc_b64 s[30:31] 2445; 2446; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_1_1_1: 2447; GFX940: ; %bb.0: 2448; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2449; GFX940-NEXT: s_mov_b32 s2, 0xffff 2450; GFX940-NEXT: ;;#ASMSTART 2451; GFX940-NEXT: ; def v[0:1] 2452; GFX940-NEXT: ;;#ASMEND 2453; GFX940-NEXT: v_mov_b32_e32 v4, 0 2454; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 2455; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2456; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 2457; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2458; GFX940-NEXT: s_waitcnt vmcnt(0) 2459; GFX940-NEXT: s_setpc_b64 s[30:31] 2460 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2461 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2462 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 2463 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2464 ret void 2465} 2466 2467define void @v_shuffle_v4i16_v3i16__3_1_1_1(ptr addrspace(1) inreg %ptr) { 2468; GFX900-LABEL: v_shuffle_v4i16_v3i16__3_1_1_1: 2469; GFX900: ; %bb.0: 2470; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2471; GFX900-NEXT: ;;#ASMSTART 2472; GFX900-NEXT: ; def v[0:1] 2473; GFX900-NEXT: ;;#ASMEND 2474; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2475; GFX900-NEXT: v_mov_b32_e32 v2, 0 2476; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2477; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 2478; GFX900-NEXT: s_waitcnt vmcnt(0) 2479; GFX900-NEXT: s_setpc_b64 s[30:31] 2480; 2481; GFX90A-LABEL: v_shuffle_v4i16_v3i16__3_1_1_1: 2482; GFX90A: ; %bb.0: 2483; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2484; GFX90A-NEXT: ;;#ASMSTART 2485; GFX90A-NEXT: ; def v[0:1] 2486; GFX90A-NEXT: ;;#ASMEND 2487; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2488; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2489; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2490; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 2491; GFX90A-NEXT: s_waitcnt vmcnt(0) 2492; GFX90A-NEXT: s_setpc_b64 s[30:31] 2493; 2494; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_1_1_1: 2495; GFX940: ; %bb.0: 2496; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2497; GFX940-NEXT: ;;#ASMSTART 2498; GFX940-NEXT: ; def v[0:1] 2499; GFX940-NEXT: ;;#ASMEND 2500; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2501; GFX940-NEXT: v_mov_b32_e32 v2, 0 2502; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2503; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 2504; GFX940-NEXT: s_waitcnt vmcnt(0) 2505; GFX940-NEXT: s_setpc_b64 s[30:31] 2506 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2507 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2508 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 2509 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2510 ret void 2511} 2512 2513define void @v_shuffle_v4i16_v3i16__4_1_1_1(ptr addrspace(1) inreg %ptr) { 2514; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_1_1_1: 2515; GFX900: ; %bb.0: 2516; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2517; GFX900-NEXT: ;;#ASMSTART 2518; GFX900-NEXT: ; def v[0:1] 2519; GFX900-NEXT: ;;#ASMEND 2520; GFX900-NEXT: ;;#ASMSTART 2521; GFX900-NEXT: ; def v[1:2] 2522; GFX900-NEXT: ;;#ASMEND 2523; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2524; GFX900-NEXT: v_mov_b32_e32 v3, 0 2525; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 2526; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 2527; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 2528; GFX900-NEXT: s_waitcnt vmcnt(0) 2529; GFX900-NEXT: s_setpc_b64 s[30:31] 2530; 2531; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_1_1_1: 2532; GFX90A: ; %bb.0: 2533; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2534; GFX90A-NEXT: ;;#ASMSTART 2535; GFX90A-NEXT: ; def v[2:3] 2536; GFX90A-NEXT: ;;#ASMEND 2537; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2538; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2539; GFX90A-NEXT: ;;#ASMSTART 2540; GFX90A-NEXT: ; def v[0:1] 2541; GFX90A-NEXT: ;;#ASMEND 2542; GFX90A-NEXT: v_perm_b32 v2, v0, v2, s4 2543; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 2544; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2545; GFX90A-NEXT: s_waitcnt vmcnt(0) 2546; GFX90A-NEXT: s_setpc_b64 s[30:31] 2547; 2548; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_1_1_1: 2549; GFX940: ; %bb.0: 2550; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2551; GFX940-NEXT: ;;#ASMSTART 2552; GFX940-NEXT: ; def v[2:3] 2553; GFX940-NEXT: ;;#ASMEND 2554; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2555; GFX940-NEXT: v_mov_b32_e32 v4, 0 2556; GFX940-NEXT: ;;#ASMSTART 2557; GFX940-NEXT: ; def v[0:1] 2558; GFX940-NEXT: ;;#ASMEND 2559; GFX940-NEXT: s_nop 0 2560; GFX940-NEXT: v_perm_b32 v2, v0, v2, s2 2561; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 2562; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2563; GFX940-NEXT: s_waitcnt vmcnt(0) 2564; GFX940-NEXT: s_setpc_b64 s[30:31] 2565 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2566 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2567 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2568 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2569 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 1, i32 1, i32 1> 2570 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2571 ret void 2572} 2573 2574define void @v_shuffle_v4i16_v3i16__5_1_1_1(ptr addrspace(1) inreg %ptr) { 2575; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_1_1: 2576; GFX900: ; %bb.0: 2577; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2578; GFX900-NEXT: ;;#ASMSTART 2579; GFX900-NEXT: ; def v[0:1] 2580; GFX900-NEXT: ;;#ASMEND 2581; GFX900-NEXT: ;;#ASMSTART 2582; GFX900-NEXT: ; def v[1:2] 2583; GFX900-NEXT: ;;#ASMEND 2584; GFX900-NEXT: s_mov_b32 s4, 0xffff 2585; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0 2586; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2587; GFX900-NEXT: v_mov_b32_e32 v3, 0 2588; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 2589; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 2590; GFX900-NEXT: s_waitcnt vmcnt(0) 2591; GFX900-NEXT: s_setpc_b64 s[30:31] 2592; 2593; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_1_1: 2594; GFX90A: ; %bb.0: 2595; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2596; GFX90A-NEXT: ;;#ASMSTART 2597; GFX90A-NEXT: ; def v[2:3] 2598; GFX90A-NEXT: ;;#ASMEND 2599; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2600; GFX90A-NEXT: ;;#ASMSTART 2601; GFX90A-NEXT: ; def v[0:1] 2602; GFX90A-NEXT: ;;#ASMEND 2603; GFX90A-NEXT: v_bfi_b32 v2, s4, v3, v0 2604; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2605; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2606; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 2607; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2608; GFX90A-NEXT: s_waitcnt vmcnt(0) 2609; GFX90A-NEXT: s_setpc_b64 s[30:31] 2610; 2611; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_1_1: 2612; GFX940: ; %bb.0: 2613; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2614; GFX940-NEXT: ;;#ASMSTART 2615; GFX940-NEXT: ; def v[2:3] 2616; GFX940-NEXT: ;;#ASMEND 2617; GFX940-NEXT: s_mov_b32 s2, 0xffff 2618; GFX940-NEXT: ;;#ASMSTART 2619; GFX940-NEXT: ; def v[0:1] 2620; GFX940-NEXT: ;;#ASMEND 2621; GFX940-NEXT: v_mov_b32_e32 v4, 0 2622; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 2623; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2624; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 2625; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2626; GFX940-NEXT: s_waitcnt vmcnt(0) 2627; GFX940-NEXT: s_setpc_b64 s[30:31] 2628 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2629 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2630 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2631 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2632 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 1, i32 1> 2633 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2634 ret void 2635} 2636 2637define void @v_shuffle_v4i16_v3i16__5_u_1_1(ptr addrspace(1) inreg %ptr) { 2638; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_1_1: 2639; GFX900: ; %bb.0: 2640; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2641; GFX900-NEXT: ;;#ASMSTART 2642; GFX900-NEXT: ; def v[0:1] 2643; GFX900-NEXT: ;;#ASMEND 2644; GFX900-NEXT: ;;#ASMSTART 2645; GFX900-NEXT: ; def v[1:2] 2646; GFX900-NEXT: ;;#ASMEND 2647; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2648; GFX900-NEXT: v_mov_b32_e32 v3, 0 2649; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2650; GFX900-NEXT: v_mov_b32_e32 v0, v2 2651; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2652; GFX900-NEXT: s_waitcnt vmcnt(0) 2653; GFX900-NEXT: s_setpc_b64 s[30:31] 2654; 2655; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_1_1: 2656; GFX90A: ; %bb.0: 2657; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2658; GFX90A-NEXT: ;;#ASMSTART 2659; GFX90A-NEXT: ; def v[0:1] 2660; GFX90A-NEXT: ;;#ASMEND 2661; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2662; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2663; GFX90A-NEXT: ;;#ASMSTART 2664; GFX90A-NEXT: ; def v[2:3] 2665; GFX90A-NEXT: ;;#ASMEND 2666; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2667; GFX90A-NEXT: v_mov_b32_e32 v0, v3 2668; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2669; GFX90A-NEXT: s_waitcnt vmcnt(0) 2670; GFX90A-NEXT: s_setpc_b64 s[30:31] 2671; 2672; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_1_1: 2673; GFX940: ; %bb.0: 2674; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2675; GFX940-NEXT: ;;#ASMSTART 2676; GFX940-NEXT: ; def v[0:1] 2677; GFX940-NEXT: ;;#ASMEND 2678; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2679; GFX940-NEXT: v_mov_b32_e32 v4, 0 2680; GFX940-NEXT: ;;#ASMSTART 2681; GFX940-NEXT: ; def v[2:3] 2682; GFX940-NEXT: ;;#ASMEND 2683; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2684; GFX940-NEXT: v_mov_b32_e32 v0, v3 2685; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2686; GFX940-NEXT: s_waitcnt vmcnt(0) 2687; GFX940-NEXT: s_setpc_b64 s[30:31] 2688 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2689 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2690 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2691 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2692 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 1, i32 1> 2693 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2694 ret void 2695} 2696 2697define void @v_shuffle_v4i16_v3i16__5_0_1_1(ptr addrspace(1) inreg %ptr) { 2698; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_1_1: 2699; GFX900: ; %bb.0: 2700; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2701; GFX900-NEXT: ;;#ASMSTART 2702; GFX900-NEXT: ; def v[0:1] 2703; GFX900-NEXT: ;;#ASMEND 2704; GFX900-NEXT: ;;#ASMSTART 2705; GFX900-NEXT: ; def v[1:2] 2706; GFX900-NEXT: ;;#ASMEND 2707; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2708; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 2709; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2710; GFX900-NEXT: v_mov_b32_e32 v3, 0 2711; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 2712; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 2713; GFX900-NEXT: s_waitcnt vmcnt(0) 2714; GFX900-NEXT: s_setpc_b64 s[30:31] 2715; 2716; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_1_1: 2717; GFX90A: ; %bb.0: 2718; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2719; GFX90A-NEXT: ;;#ASMSTART 2720; GFX90A-NEXT: ; def v[2:3] 2721; GFX90A-NEXT: ;;#ASMEND 2722; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2723; GFX90A-NEXT: ;;#ASMSTART 2724; GFX90A-NEXT: ; def v[0:1] 2725; GFX90A-NEXT: ;;#ASMEND 2726; GFX90A-NEXT: v_perm_b32 v2, v0, v3, s4 2727; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2728; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2729; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 2730; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2731; GFX90A-NEXT: s_waitcnt vmcnt(0) 2732; GFX90A-NEXT: s_setpc_b64 s[30:31] 2733; 2734; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_1_1: 2735; GFX940: ; %bb.0: 2736; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2737; GFX940-NEXT: ;;#ASMSTART 2738; GFX940-NEXT: ; def v[2:3] 2739; GFX940-NEXT: ;;#ASMEND 2740; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2741; GFX940-NEXT: ;;#ASMSTART 2742; GFX940-NEXT: ; def v[0:1] 2743; GFX940-NEXT: ;;#ASMEND 2744; GFX940-NEXT: v_mov_b32_e32 v4, 0 2745; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 2746; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2747; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 2748; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2749; GFX940-NEXT: s_waitcnt vmcnt(0) 2750; GFX940-NEXT: s_setpc_b64 s[30:31] 2751 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2752 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2753 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2754 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2755 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 1, i32 1> 2756 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2757 ret void 2758} 2759 2760define void @v_shuffle_v4i16_v3i16__5_2_1_1(ptr addrspace(1) inreg %ptr) { 2761; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_1_1: 2762; GFX900: ; %bb.0: 2763; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2764; GFX900-NEXT: ;;#ASMSTART 2765; GFX900-NEXT: ; def v[0:1] 2766; GFX900-NEXT: ;;#ASMEND 2767; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2768; GFX900-NEXT: ;;#ASMSTART 2769; GFX900-NEXT: ; def v[2:3] 2770; GFX900-NEXT: ;;#ASMEND 2771; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 2772; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2773; GFX900-NEXT: v_mov_b32_e32 v4, 0 2774; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 2775; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 2776; GFX900-NEXT: s_waitcnt vmcnt(0) 2777; GFX900-NEXT: s_setpc_b64 s[30:31] 2778; 2779; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_1_1: 2780; GFX90A: ; %bb.0: 2781; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2782; GFX90A-NEXT: ;;#ASMSTART 2783; GFX90A-NEXT: ; def v[2:3] 2784; GFX90A-NEXT: ;;#ASMEND 2785; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2786; GFX90A-NEXT: ;;#ASMSTART 2787; GFX90A-NEXT: ; def v[0:1] 2788; GFX90A-NEXT: ;;#ASMEND 2789; GFX90A-NEXT: v_perm_b32 v2, v1, v3, s4 2790; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2791; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2792; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 2793; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2794; GFX90A-NEXT: s_waitcnt vmcnt(0) 2795; GFX90A-NEXT: s_setpc_b64 s[30:31] 2796; 2797; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_1_1: 2798; GFX940: ; %bb.0: 2799; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2800; GFX940-NEXT: ;;#ASMSTART 2801; GFX940-NEXT: ; def v[2:3] 2802; GFX940-NEXT: ;;#ASMEND 2803; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2804; GFX940-NEXT: ;;#ASMSTART 2805; GFX940-NEXT: ; def v[0:1] 2806; GFX940-NEXT: ;;#ASMEND 2807; GFX940-NEXT: v_mov_b32_e32 v4, 0 2808; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 2809; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2810; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 2811; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2812; GFX940-NEXT: s_waitcnt vmcnt(0) 2813; GFX940-NEXT: s_setpc_b64 s[30:31] 2814 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2815 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2816 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2817 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2818 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 1, i32 1> 2819 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2820 ret void 2821} 2822 2823define void @v_shuffle_v4i16_v3i16__5_3_1_1(ptr addrspace(1) inreg %ptr) { 2824; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_1_1: 2825; GFX900: ; %bb.0: 2826; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2827; GFX900-NEXT: ;;#ASMSTART 2828; GFX900-NEXT: ; def v[0:1] 2829; GFX900-NEXT: ;;#ASMEND 2830; GFX900-NEXT: ;;#ASMSTART 2831; GFX900-NEXT: ; def v[1:2] 2832; GFX900-NEXT: ;;#ASMEND 2833; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2834; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 2835; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2836; GFX900-NEXT: v_mov_b32_e32 v3, 0 2837; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 2838; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 2839; GFX900-NEXT: s_waitcnt vmcnt(0) 2840; GFX900-NEXT: s_setpc_b64 s[30:31] 2841; 2842; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_1_1: 2843; GFX90A: ; %bb.0: 2844; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2845; GFX90A-NEXT: ;;#ASMSTART 2846; GFX90A-NEXT: ; def v[2:3] 2847; GFX90A-NEXT: ;;#ASMEND 2848; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2849; GFX90A-NEXT: v_perm_b32 v2, v2, v3, s4 2850; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2851; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2852; GFX90A-NEXT: ;;#ASMSTART 2853; GFX90A-NEXT: ; def v[0:1] 2854; GFX90A-NEXT: ;;#ASMEND 2855; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 2856; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2857; GFX90A-NEXT: s_waitcnt vmcnt(0) 2858; GFX90A-NEXT: s_setpc_b64 s[30:31] 2859; 2860; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_1_1: 2861; GFX940: ; %bb.0: 2862; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2863; GFX940-NEXT: ;;#ASMSTART 2864; GFX940-NEXT: ; def v[2:3] 2865; GFX940-NEXT: ;;#ASMEND 2866; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2867; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 2868; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2869; GFX940-NEXT: v_mov_b32_e32 v4, 0 2870; GFX940-NEXT: ;;#ASMSTART 2871; GFX940-NEXT: ; def v[0:1] 2872; GFX940-NEXT: ;;#ASMEND 2873; GFX940-NEXT: s_nop 0 2874; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 2875; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2876; GFX940-NEXT: s_waitcnt vmcnt(0) 2877; GFX940-NEXT: s_setpc_b64 s[30:31] 2878 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2879 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2880 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2881 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2882 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 1, i32 1> 2883 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2884 ret void 2885} 2886 2887define void @v_shuffle_v4i16_v3i16__5_4_1_1(ptr addrspace(1) inreg %ptr) { 2888; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_1_1: 2889; GFX900: ; %bb.0: 2890; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2891; GFX900-NEXT: ;;#ASMSTART 2892; GFX900-NEXT: ; def v[0:1] 2893; GFX900-NEXT: ;;#ASMEND 2894; GFX900-NEXT: ;;#ASMSTART 2895; GFX900-NEXT: ; def v[1:2] 2896; GFX900-NEXT: ;;#ASMEND 2897; GFX900-NEXT: s_mov_b32 s4, 0xffff 2898; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v1 2899; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2900; GFX900-NEXT: v_mov_b32_e32 v3, 0 2901; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 2902; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 2903; GFX900-NEXT: s_waitcnt vmcnt(0) 2904; GFX900-NEXT: s_setpc_b64 s[30:31] 2905; 2906; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_1_1: 2907; GFX90A: ; %bb.0: 2908; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2909; GFX90A-NEXT: ;;#ASMSTART 2910; GFX90A-NEXT: ; def v[2:3] 2911; GFX90A-NEXT: ;;#ASMEND 2912; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2913; GFX90A-NEXT: v_bfi_b32 v2, s4, v3, v2 2914; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2915; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2916; GFX90A-NEXT: ;;#ASMSTART 2917; GFX90A-NEXT: ; def v[0:1] 2918; GFX90A-NEXT: ;;#ASMEND 2919; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 2920; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2921; GFX90A-NEXT: s_waitcnt vmcnt(0) 2922; GFX90A-NEXT: s_setpc_b64 s[30:31] 2923; 2924; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_1_1: 2925; GFX940: ; %bb.0: 2926; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2927; GFX940-NEXT: ;;#ASMSTART 2928; GFX940-NEXT: ; def v[2:3] 2929; GFX940-NEXT: ;;#ASMEND 2930; GFX940-NEXT: s_mov_b32 s2, 0xffff 2931; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v2 2932; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2933; GFX940-NEXT: v_mov_b32_e32 v4, 0 2934; GFX940-NEXT: ;;#ASMSTART 2935; GFX940-NEXT: ; def v[0:1] 2936; GFX940-NEXT: ;;#ASMEND 2937; GFX940-NEXT: s_nop 0 2938; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 2939; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2940; GFX940-NEXT: s_waitcnt vmcnt(0) 2941; GFX940-NEXT: s_setpc_b64 s[30:31] 2942 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2943 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2944 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2945 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2946 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 1, i32 1> 2947 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2948 ret void 2949} 2950 2951define void @v_shuffle_v4i16_v3i16__5_5_1_1(ptr addrspace(1) inreg %ptr) { 2952; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_1: 2953; GFX900: ; %bb.0: 2954; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2955; GFX900-NEXT: ;;#ASMSTART 2956; GFX900-NEXT: ; def v[0:1] 2957; GFX900-NEXT: ;;#ASMEND 2958; GFX900-NEXT: ;;#ASMSTART 2959; GFX900-NEXT: ; def v[1:2] 2960; GFX900-NEXT: ;;#ASMEND 2961; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2962; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2963; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2964; GFX900-NEXT: v_mov_b32_e32 v3, 0 2965; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 2966; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2967; GFX900-NEXT: s_waitcnt vmcnt(0) 2968; GFX900-NEXT: s_setpc_b64 s[30:31] 2969; 2970; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_1: 2971; GFX90A: ; %bb.0: 2972; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2973; GFX90A-NEXT: ;;#ASMSTART 2974; GFX90A-NEXT: ; def v[0:1] 2975; GFX90A-NEXT: ;;#ASMEND 2976; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2977; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2978; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2979; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2980; GFX90A-NEXT: ;;#ASMSTART 2981; GFX90A-NEXT: ; def v[2:3] 2982; GFX90A-NEXT: ;;#ASMEND 2983; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2984; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2985; GFX90A-NEXT: s_waitcnt vmcnt(0) 2986; GFX90A-NEXT: s_setpc_b64 s[30:31] 2987; 2988; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_1: 2989; GFX940: ; %bb.0: 2990; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2991; GFX940-NEXT: ;;#ASMSTART 2992; GFX940-NEXT: ; def v[0:1] 2993; GFX940-NEXT: ;;#ASMEND 2994; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2995; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2996; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2997; GFX940-NEXT: v_mov_b32_e32 v4, 0 2998; GFX940-NEXT: ;;#ASMSTART 2999; GFX940-NEXT: ; def v[2:3] 3000; GFX940-NEXT: ;;#ASMEND 3001; GFX940-NEXT: s_nop 0 3002; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 3003; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3004; GFX940-NEXT: s_waitcnt vmcnt(0) 3005; GFX940-NEXT: s_setpc_b64 s[30:31] 3006 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3007 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3008 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3009 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3010 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 1> 3011 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3012 ret void 3013} 3014 3015define void @v_shuffle_v4i16_v3i16__5_5_u_1(ptr addrspace(1) inreg %ptr) { 3016; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_1: 3017; GFX900: ; %bb.0: 3018; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3019; GFX900-NEXT: ;;#ASMSTART 3020; GFX900-NEXT: ; def v[0:1] 3021; GFX900-NEXT: ;;#ASMEND 3022; GFX900-NEXT: ;;#ASMSTART 3023; GFX900-NEXT: ; def v[1:2] 3024; GFX900-NEXT: ;;#ASMEND 3025; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3026; GFX900-NEXT: v_mov_b32_e32 v3, 0 3027; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 3028; GFX900-NEXT: v_mov_b32_e32 v2, v0 3029; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3030; GFX900-NEXT: s_waitcnt vmcnt(0) 3031; GFX900-NEXT: s_setpc_b64 s[30:31] 3032; 3033; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_1: 3034; GFX90A: ; %bb.0: 3035; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3036; GFX90A-NEXT: ;;#ASMSTART 3037; GFX90A-NEXT: ; def v[2:3] 3038; GFX90A-NEXT: ;;#ASMEND 3039; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3040; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3041; GFX90A-NEXT: ;;#ASMSTART 3042; GFX90A-NEXT: ; def v[0:1] 3043; GFX90A-NEXT: ;;#ASMEND 3044; GFX90A-NEXT: v_perm_b32 v2, v3, v3, s4 3045; GFX90A-NEXT: v_mov_b32_e32 v3, v0 3046; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3047; GFX90A-NEXT: s_waitcnt vmcnt(0) 3048; GFX90A-NEXT: s_setpc_b64 s[30:31] 3049; 3050; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_1: 3051; GFX940: ; %bb.0: 3052; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3053; GFX940-NEXT: ;;#ASMSTART 3054; GFX940-NEXT: ; def v[2:3] 3055; GFX940-NEXT: ;;#ASMEND 3056; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3057; GFX940-NEXT: v_mov_b32_e32 v4, 0 3058; GFX940-NEXT: ;;#ASMSTART 3059; GFX940-NEXT: ; def v[0:1] 3060; GFX940-NEXT: ;;#ASMEND 3061; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 3062; GFX940-NEXT: v_mov_b32_e32 v3, v0 3063; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3064; GFX940-NEXT: s_waitcnt vmcnt(0) 3065; GFX940-NEXT: s_setpc_b64 s[30:31] 3066 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3067 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3068 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3069 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3070 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 1> 3071 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3072 ret void 3073} 3074 3075define void @v_shuffle_v4i16_v3i16__5_5_0_1(ptr addrspace(1) inreg %ptr) { 3076; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_1: 3077; GFX900: ; %bb.0: 3078; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3079; GFX900-NEXT: ;;#ASMSTART 3080; GFX900-NEXT: ; def v[0:1] 3081; GFX900-NEXT: ;;#ASMEND 3082; GFX900-NEXT: ;;#ASMSTART 3083; GFX900-NEXT: ; def v[1:2] 3084; GFX900-NEXT: ;;#ASMEND 3085; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3086; GFX900-NEXT: v_mov_b32_e32 v3, 0 3087; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 3088; GFX900-NEXT: v_mov_b32_e32 v2, v0 3089; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3090; GFX900-NEXT: s_waitcnt vmcnt(0) 3091; GFX900-NEXT: s_setpc_b64 s[30:31] 3092; 3093; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_1: 3094; GFX90A: ; %bb.0: 3095; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3096; GFX90A-NEXT: ;;#ASMSTART 3097; GFX90A-NEXT: ; def v[2:3] 3098; GFX90A-NEXT: ;;#ASMEND 3099; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3100; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3101; GFX90A-NEXT: ;;#ASMSTART 3102; GFX90A-NEXT: ; def v[0:1] 3103; GFX90A-NEXT: ;;#ASMEND 3104; GFX90A-NEXT: v_perm_b32 v2, v3, v3, s4 3105; GFX90A-NEXT: v_mov_b32_e32 v3, v0 3106; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3107; GFX90A-NEXT: s_waitcnt vmcnt(0) 3108; GFX90A-NEXT: s_setpc_b64 s[30:31] 3109; 3110; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_1: 3111; GFX940: ; %bb.0: 3112; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3113; GFX940-NEXT: ;;#ASMSTART 3114; GFX940-NEXT: ; def v[2:3] 3115; GFX940-NEXT: ;;#ASMEND 3116; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3117; GFX940-NEXT: v_mov_b32_e32 v4, 0 3118; GFX940-NEXT: ;;#ASMSTART 3119; GFX940-NEXT: ; def v[0:1] 3120; GFX940-NEXT: ;;#ASMEND 3121; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 3122; GFX940-NEXT: v_mov_b32_e32 v3, v0 3123; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3124; GFX940-NEXT: s_waitcnt vmcnt(0) 3125; GFX940-NEXT: s_setpc_b64 s[30:31] 3126 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3127 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3128 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3129 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3130 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 1> 3131 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3132 ret void 3133} 3134 3135define void @v_shuffle_v4i16_v3i16__5_5_2_1(ptr addrspace(1) inreg %ptr) { 3136; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_1: 3137; GFX900: ; %bb.0: 3138; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3139; GFX900-NEXT: ;;#ASMSTART 3140; GFX900-NEXT: ; def v[0:1] 3141; GFX900-NEXT: ;;#ASMEND 3142; GFX900-NEXT: s_mov_b32 s4, 0xffff 3143; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 3144; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3145; GFX900-NEXT: v_mov_b32_e32 v4, 0 3146; GFX900-NEXT: ;;#ASMSTART 3147; GFX900-NEXT: ; def v[2:3] 3148; GFX900-NEXT: ;;#ASMEND 3149; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 3150; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3151; GFX900-NEXT: s_waitcnt vmcnt(0) 3152; GFX900-NEXT: s_setpc_b64 s[30:31] 3153; 3154; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_1: 3155; GFX90A: ; %bb.0: 3156; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3157; GFX90A-NEXT: ;;#ASMSTART 3158; GFX90A-NEXT: ; def v[0:1] 3159; GFX90A-NEXT: ;;#ASMEND 3160; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3161; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0 3162; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3163; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3164; GFX90A-NEXT: ;;#ASMSTART 3165; GFX90A-NEXT: ; def v[2:3] 3166; GFX90A-NEXT: ;;#ASMEND 3167; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 3168; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3169; GFX90A-NEXT: s_waitcnt vmcnt(0) 3170; GFX90A-NEXT: s_setpc_b64 s[30:31] 3171; 3172; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_1: 3173; GFX940: ; %bb.0: 3174; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3175; GFX940-NEXT: ;;#ASMSTART 3176; GFX940-NEXT: ; def v[0:1] 3177; GFX940-NEXT: ;;#ASMEND 3178; GFX940-NEXT: s_mov_b32 s2, 0xffff 3179; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 3180; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3181; GFX940-NEXT: v_mov_b32_e32 v4, 0 3182; GFX940-NEXT: ;;#ASMSTART 3183; GFX940-NEXT: ; def v[2:3] 3184; GFX940-NEXT: ;;#ASMEND 3185; GFX940-NEXT: s_nop 0 3186; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 3187; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3188; GFX940-NEXT: s_waitcnt vmcnt(0) 3189; GFX940-NEXT: s_setpc_b64 s[30:31] 3190 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3191 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3192 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3193 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3194 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 1> 3195 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3196 ret void 3197} 3198 3199define void @v_shuffle_v4i16_v3i16__5_5_3_1(ptr addrspace(1) inreg %ptr) { 3200; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_1: 3201; GFX900: ; %bb.0: 3202; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3203; GFX900-NEXT: ;;#ASMSTART 3204; GFX900-NEXT: ; def v[0:1] 3205; GFX900-NEXT: ;;#ASMEND 3206; GFX900-NEXT: ;;#ASMSTART 3207; GFX900-NEXT: ; def v[1:2] 3208; GFX900-NEXT: ;;#ASMEND 3209; GFX900-NEXT: s_mov_b32 s4, 0xffff 3210; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 3211; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3212; GFX900-NEXT: v_mov_b32_e32 v3, 0 3213; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 3214; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 3215; GFX900-NEXT: s_waitcnt vmcnt(0) 3216; GFX900-NEXT: s_setpc_b64 s[30:31] 3217; 3218; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_1: 3219; GFX90A: ; %bb.0: 3220; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3221; GFX90A-NEXT: ;;#ASMSTART 3222; GFX90A-NEXT: ; def v[0:1] 3223; GFX90A-NEXT: ;;#ASMEND 3224; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3225; GFX90A-NEXT: ;;#ASMSTART 3226; GFX90A-NEXT: ; def v[2:3] 3227; GFX90A-NEXT: ;;#ASMEND 3228; GFX90A-NEXT: v_bfi_b32 v1, s4, v2, v0 3229; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3230; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3231; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 3232; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3233; GFX90A-NEXT: s_waitcnt vmcnt(0) 3234; GFX90A-NEXT: s_setpc_b64 s[30:31] 3235; 3236; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_1: 3237; GFX940: ; %bb.0: 3238; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3239; GFX940-NEXT: ;;#ASMSTART 3240; GFX940-NEXT: ; def v[0:1] 3241; GFX940-NEXT: ;;#ASMEND 3242; GFX940-NEXT: s_mov_b32 s2, 0xffff 3243; GFX940-NEXT: ;;#ASMSTART 3244; GFX940-NEXT: ; def v[2:3] 3245; GFX940-NEXT: ;;#ASMEND 3246; GFX940-NEXT: v_mov_b32_e32 v4, 0 3247; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v0 3248; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3249; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 3250; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3251; GFX940-NEXT: s_waitcnt vmcnt(0) 3252; GFX940-NEXT: s_setpc_b64 s[30:31] 3253 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3254 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3255 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3256 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3257 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 1> 3258 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3259 ret void 3260} 3261 3262define void @v_shuffle_v4i16_v3i16__5_5_4_1(ptr addrspace(1) inreg %ptr) { 3263; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_1: 3264; GFX900: ; %bb.0: 3265; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3266; GFX900-NEXT: ;;#ASMSTART 3267; GFX900-NEXT: ; def v[0:1] 3268; GFX900-NEXT: ;;#ASMEND 3269; GFX900-NEXT: ;;#ASMSTART 3270; GFX900-NEXT: ; def v[1:2] 3271; GFX900-NEXT: ;;#ASMEND 3272; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3273; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 3274; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3275; GFX900-NEXT: v_mov_b32_e32 v3, 0 3276; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 3277; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 3278; GFX900-NEXT: s_waitcnt vmcnt(0) 3279; GFX900-NEXT: s_setpc_b64 s[30:31] 3280; 3281; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_1: 3282; GFX90A: ; %bb.0: 3283; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3284; GFX90A-NEXT: ;;#ASMSTART 3285; GFX90A-NEXT: ; def v[0:1] 3286; GFX90A-NEXT: ;;#ASMEND 3287; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3288; GFX90A-NEXT: ;;#ASMSTART 3289; GFX90A-NEXT: ; def v[2:3] 3290; GFX90A-NEXT: ;;#ASMEND 3291; GFX90A-NEXT: v_perm_b32 v1, v0, v2, s4 3292; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3293; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3294; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 3295; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3296; GFX90A-NEXT: s_waitcnt vmcnt(0) 3297; GFX90A-NEXT: s_setpc_b64 s[30:31] 3298; 3299; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_1: 3300; GFX940: ; %bb.0: 3301; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3302; GFX940-NEXT: ;;#ASMSTART 3303; GFX940-NEXT: ; def v[0:1] 3304; GFX940-NEXT: ;;#ASMEND 3305; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3306; GFX940-NEXT: ;;#ASMSTART 3307; GFX940-NEXT: ; def v[2:3] 3308; GFX940-NEXT: ;;#ASMEND 3309; GFX940-NEXT: v_mov_b32_e32 v4, 0 3310; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 3311; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3312; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 3313; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3314; GFX940-NEXT: s_waitcnt vmcnt(0) 3315; GFX940-NEXT: s_setpc_b64 s[30:31] 3316 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3317 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3318 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3319 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3320 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 1> 3321 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3322 ret void 3323} 3324 3325define void @v_shuffle_v4i16_v3i16__u_2_2_2(ptr addrspace(1) inreg %ptr) { 3326; GFX900-LABEL: v_shuffle_v4i16_v3i16__u_2_2_2: 3327; GFX900: ; %bb.0: 3328; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3329; GFX900-NEXT: ;;#ASMSTART 3330; GFX900-NEXT: ; def v[0:1] 3331; GFX900-NEXT: ;;#ASMEND 3332; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3333; GFX900-NEXT: v_mov_b32_e32 v3, 0 3334; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 3335; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3336; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3337; GFX900-NEXT: s_waitcnt vmcnt(0) 3338; GFX900-NEXT: s_setpc_b64 s[30:31] 3339; 3340; GFX90A-LABEL: v_shuffle_v4i16_v3i16__u_2_2_2: 3341; GFX90A: ; %bb.0: 3342; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3343; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3344; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3345; GFX90A-NEXT: ;;#ASMSTART 3346; GFX90A-NEXT: ; def v[0:1] 3347; GFX90A-NEXT: ;;#ASMEND 3348; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 3349; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1 3350; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3351; GFX90A-NEXT: s_waitcnt vmcnt(0) 3352; GFX90A-NEXT: s_setpc_b64 s[30:31] 3353; 3354; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_2_2_2: 3355; GFX940: ; %bb.0: 3356; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3357; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3358; GFX940-NEXT: v_mov_b32_e32 v4, 0 3359; GFX940-NEXT: ;;#ASMSTART 3360; GFX940-NEXT: ; def v[0:1] 3361; GFX940-NEXT: ;;#ASMEND 3362; GFX940-NEXT: s_nop 0 3363; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 3364; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 3365; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3366; GFX940-NEXT: s_waitcnt vmcnt(0) 3367; GFX940-NEXT: s_setpc_b64 s[30:31] 3368 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3369 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3370 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2> 3371 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3372 ret void 3373} 3374 3375define void @v_shuffle_v4i16_v3i16__0_2_2_2(ptr addrspace(1) inreg %ptr) { 3376; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_2_2_2: 3377; GFX900: ; %bb.0: 3378; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3379; GFX900-NEXT: ;;#ASMSTART 3380; GFX900-NEXT: ; def v[0:1] 3381; GFX900-NEXT: ;;#ASMEND 3382; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3383; GFX900-NEXT: v_mov_b32_e32 v2, 0 3384; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 3385; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 3386; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 3387; GFX900-NEXT: s_waitcnt vmcnt(0) 3388; GFX900-NEXT: s_setpc_b64 s[30:31] 3389; 3390; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_2_2_2: 3391; GFX90A: ; %bb.0: 3392; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3393; GFX90A-NEXT: ;;#ASMSTART 3394; GFX90A-NEXT: ; def v[0:1] 3395; GFX90A-NEXT: ;;#ASMEND 3396; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3397; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3398; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 3399; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 3400; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 3401; GFX90A-NEXT: s_waitcnt vmcnt(0) 3402; GFX90A-NEXT: s_setpc_b64 s[30:31] 3403; 3404; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_2_2_2: 3405; GFX940: ; %bb.0: 3406; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3407; GFX940-NEXT: ;;#ASMSTART 3408; GFX940-NEXT: ; def v[0:1] 3409; GFX940-NEXT: ;;#ASMEND 3410; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3411; GFX940-NEXT: v_mov_b32_e32 v2, 0 3412; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 3413; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 3414; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 3415; GFX940-NEXT: s_waitcnt vmcnt(0) 3416; GFX940-NEXT: s_setpc_b64 s[30:31] 3417 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3418 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3419 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2> 3420 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3421 ret void 3422} 3423 3424define void @v_shuffle_v4i16_v3i16__1_2_2_2(ptr addrspace(1) inreg %ptr) { 3425; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_2_2_2: 3426; GFX900: ; %bb.0: 3427; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3428; GFX900-NEXT: ;;#ASMSTART 3429; GFX900-NEXT: ; def v[0:1] 3430; GFX900-NEXT: ;;#ASMEND 3431; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3432; GFX900-NEXT: v_mov_b32_e32 v3, 0 3433; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 3434; GFX900-NEXT: v_alignbit_b32 v1, v1, v0, 16 3435; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3436; GFX900-NEXT: s_waitcnt vmcnt(0) 3437; GFX900-NEXT: s_setpc_b64 s[30:31] 3438; 3439; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_2_2_2: 3440; GFX90A: ; %bb.0: 3441; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3442; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3443; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3444; GFX90A-NEXT: ;;#ASMSTART 3445; GFX90A-NEXT: ; def v[0:1] 3446; GFX90A-NEXT: ;;#ASMEND 3447; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 3448; GFX90A-NEXT: v_alignbit_b32 v2, v1, v0, 16 3449; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3450; GFX90A-NEXT: s_waitcnt vmcnt(0) 3451; GFX90A-NEXT: s_setpc_b64 s[30:31] 3452; 3453; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_2_2_2: 3454; GFX940: ; %bb.0: 3455; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3456; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3457; GFX940-NEXT: v_mov_b32_e32 v4, 0 3458; GFX940-NEXT: ;;#ASMSTART 3459; GFX940-NEXT: ; def v[0:1] 3460; GFX940-NEXT: ;;#ASMEND 3461; GFX940-NEXT: s_nop 0 3462; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 3463; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 3464; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3465; GFX940-NEXT: s_waitcnt vmcnt(0) 3466; GFX940-NEXT: s_setpc_b64 s[30:31] 3467 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3468 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3469 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2> 3470 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3471 ret void 3472} 3473 3474define void @v_shuffle_v4i16_v3i16__2_2_2_2(ptr addrspace(1) inreg %ptr) { 3475; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_2_2_2: 3476; GFX900: ; %bb.0: 3477; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3478; GFX900-NEXT: ;;#ASMSTART 3479; GFX900-NEXT: ; def v[0:1] 3480; GFX900-NEXT: ;;#ASMEND 3481; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3482; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 3483; GFX900-NEXT: v_mov_b32_e32 v2, 0 3484; GFX900-NEXT: v_mov_b32_e32 v1, v0 3485; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 3486; GFX900-NEXT: s_waitcnt vmcnt(0) 3487; GFX900-NEXT: s_setpc_b64 s[30:31] 3488; 3489; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_2_2_2: 3490; GFX90A: ; %bb.0: 3491; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3492; GFX90A-NEXT: ;;#ASMSTART 3493; GFX90A-NEXT: ; def v[0:1] 3494; GFX90A-NEXT: ;;#ASMEND 3495; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3496; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 3497; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3498; GFX90A-NEXT: v_mov_b32_e32 v1, v0 3499; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 3500; GFX90A-NEXT: s_waitcnt vmcnt(0) 3501; GFX90A-NEXT: s_setpc_b64 s[30:31] 3502; 3503; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_2_2_2: 3504; GFX940: ; %bb.0: 3505; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3506; GFX940-NEXT: ;;#ASMSTART 3507; GFX940-NEXT: ; def v[0:1] 3508; GFX940-NEXT: ;;#ASMEND 3509; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3510; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 3511; GFX940-NEXT: v_mov_b32_e32 v2, 0 3512; GFX940-NEXT: v_mov_b32_e32 v1, v0 3513; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 3514; GFX940-NEXT: s_waitcnt vmcnt(0) 3515; GFX940-NEXT: s_setpc_b64 s[30:31] 3516 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3517 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3518 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 3519 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3520 ret void 3521} 3522 3523define void @v_shuffle_v4i16_v3i16__3_2_2_2(ptr addrspace(1) inreg %ptr) { 3524; GFX900-LABEL: v_shuffle_v4i16_v3i16__3_2_2_2: 3525; GFX900: ; %bb.0: 3526; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3527; GFX900-NEXT: ;;#ASMSTART 3528; GFX900-NEXT: ; def v[0:1] 3529; GFX900-NEXT: ;;#ASMEND 3530; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3531; GFX900-NEXT: v_mov_b32_e32 v3, 0 3532; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 3533; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3534; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3535; GFX900-NEXT: s_waitcnt vmcnt(0) 3536; GFX900-NEXT: s_setpc_b64 s[30:31] 3537; 3538; GFX90A-LABEL: v_shuffle_v4i16_v3i16__3_2_2_2: 3539; GFX90A: ; %bb.0: 3540; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3541; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3542; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3543; GFX90A-NEXT: ;;#ASMSTART 3544; GFX90A-NEXT: ; def v[0:1] 3545; GFX90A-NEXT: ;;#ASMEND 3546; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 3547; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1 3548; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3549; GFX90A-NEXT: s_waitcnt vmcnt(0) 3550; GFX90A-NEXT: s_setpc_b64 s[30:31] 3551; 3552; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_2_2_2: 3553; GFX940: ; %bb.0: 3554; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3555; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3556; GFX940-NEXT: v_mov_b32_e32 v4, 0 3557; GFX940-NEXT: ;;#ASMSTART 3558; GFX940-NEXT: ; def v[0:1] 3559; GFX940-NEXT: ;;#ASMEND 3560; GFX940-NEXT: s_nop 0 3561; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 3562; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 3563; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3564; GFX940-NEXT: s_waitcnt vmcnt(0) 3565; GFX940-NEXT: s_setpc_b64 s[30:31] 3566 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3567 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3568 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2> 3569 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3570 ret void 3571} 3572 3573define void @v_shuffle_v4i16_v3i16__4_2_2_2(ptr addrspace(1) inreg %ptr) { 3574; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_2_2_2: 3575; GFX900: ; %bb.0: 3576; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3577; GFX900-NEXT: ;;#ASMSTART 3578; GFX900-NEXT: ; def v[2:3] 3579; GFX900-NEXT: ;;#ASMEND 3580; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3581; GFX900-NEXT: v_mov_b32_e32 v4, 0 3582; GFX900-NEXT: ;;#ASMSTART 3583; GFX900-NEXT: ; def v[0:1] 3584; GFX900-NEXT: ;;#ASMEND 3585; GFX900-NEXT: v_perm_b32 v3, v1, v1, s4 3586; GFX900-NEXT: v_alignbit_b32 v2, v1, v2, 16 3587; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3588; GFX900-NEXT: s_waitcnt vmcnt(0) 3589; GFX900-NEXT: s_setpc_b64 s[30:31] 3590; 3591; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_2_2_2: 3592; GFX90A: ; %bb.0: 3593; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3594; GFX90A-NEXT: ;;#ASMSTART 3595; GFX90A-NEXT: ; def v[2:3] 3596; GFX90A-NEXT: ;;#ASMEND 3597; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3598; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3599; GFX90A-NEXT: ;;#ASMSTART 3600; GFX90A-NEXT: ; def v[0:1] 3601; GFX90A-NEXT: ;;#ASMEND 3602; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 3603; GFX90A-NEXT: v_alignbit_b32 v2, v1, v2, 16 3604; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3605; GFX90A-NEXT: s_waitcnt vmcnt(0) 3606; GFX90A-NEXT: s_setpc_b64 s[30:31] 3607; 3608; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_2_2_2: 3609; GFX940: ; %bb.0: 3610; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3611; GFX940-NEXT: ;;#ASMSTART 3612; GFX940-NEXT: ; def v[2:3] 3613; GFX940-NEXT: ;;#ASMEND 3614; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3615; GFX940-NEXT: v_mov_b32_e32 v4, 0 3616; GFX940-NEXT: ;;#ASMSTART 3617; GFX940-NEXT: ; def v[0:1] 3618; GFX940-NEXT: ;;#ASMEND 3619; GFX940-NEXT: s_nop 0 3620; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 3621; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 3622; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3623; GFX940-NEXT: s_waitcnt vmcnt(0) 3624; GFX940-NEXT: s_setpc_b64 s[30:31] 3625 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3626 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3627 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3628 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3629 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 2, i32 2, i32 2> 3630 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3631 ret void 3632} 3633 3634define void @v_shuffle_v4i16_v3i16__5_2_2_2(ptr addrspace(1) inreg %ptr) { 3635; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_2_2: 3636; GFX900: ; %bb.0: 3637; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3638; GFX900-NEXT: ;;#ASMSTART 3639; GFX900-NEXT: ; def v[0:1] 3640; GFX900-NEXT: ;;#ASMEND 3641; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3642; GFX900-NEXT: v_mov_b32_e32 v4, 0 3643; GFX900-NEXT: ;;#ASMSTART 3644; GFX900-NEXT: ; def v[2:3] 3645; GFX900-NEXT: ;;#ASMEND 3646; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 3647; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 3648; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3649; GFX900-NEXT: s_waitcnt vmcnt(0) 3650; GFX900-NEXT: s_setpc_b64 s[30:31] 3651; 3652; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_2_2: 3653; GFX90A: ; %bb.0: 3654; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3655; GFX90A-NEXT: ;;#ASMSTART 3656; GFX90A-NEXT: ; def v[0:1] 3657; GFX90A-NEXT: ;;#ASMEND 3658; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3659; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3660; GFX90A-NEXT: ;;#ASMSTART 3661; GFX90A-NEXT: ; def v[2:3] 3662; GFX90A-NEXT: ;;#ASMEND 3663; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 3664; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 3665; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3666; GFX90A-NEXT: s_waitcnt vmcnt(0) 3667; GFX90A-NEXT: s_setpc_b64 s[30:31] 3668; 3669; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_2_2: 3670; GFX940: ; %bb.0: 3671; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3672; GFX940-NEXT: ;;#ASMSTART 3673; GFX940-NEXT: ; def v[0:1] 3674; GFX940-NEXT: ;;#ASMEND 3675; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3676; GFX940-NEXT: v_mov_b32_e32 v4, 0 3677; GFX940-NEXT: ;;#ASMSTART 3678; GFX940-NEXT: ; def v[2:3] 3679; GFX940-NEXT: ;;#ASMEND 3680; GFX940-NEXT: s_nop 0 3681; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 3682; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 3683; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3684; GFX940-NEXT: s_waitcnt vmcnt(0) 3685; GFX940-NEXT: s_setpc_b64 s[30:31] 3686 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3687 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3688 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3689 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3690 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 2, i32 2> 3691 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3692 ret void 3693} 3694 3695define void @v_shuffle_v4i16_v3i16__5_u_2_2(ptr addrspace(1) inreg %ptr) { 3696; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_2_2: 3697; GFX900: ; %bb.0: 3698; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3699; GFX900-NEXT: ;;#ASMSTART 3700; GFX900-NEXT: ; def v[0:1] 3701; GFX900-NEXT: ;;#ASMEND 3702; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3703; GFX900-NEXT: v_mov_b32_e32 v4, 0 3704; GFX900-NEXT: ;;#ASMSTART 3705; GFX900-NEXT: ; def v[2:3] 3706; GFX900-NEXT: ;;#ASMEND 3707; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 3708; GFX900-NEXT: v_mov_b32_e32 v0, v3 3709; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3710; GFX900-NEXT: s_waitcnt vmcnt(0) 3711; GFX900-NEXT: s_setpc_b64 s[30:31] 3712; 3713; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_2_2: 3714; GFX90A: ; %bb.0: 3715; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3716; GFX90A-NEXT: ;;#ASMSTART 3717; GFX90A-NEXT: ; def v[0:1] 3718; GFX90A-NEXT: ;;#ASMEND 3719; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3720; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3721; GFX90A-NEXT: ;;#ASMSTART 3722; GFX90A-NEXT: ; def v[2:3] 3723; GFX90A-NEXT: ;;#ASMEND 3724; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 3725; GFX90A-NEXT: v_mov_b32_e32 v0, v3 3726; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3727; GFX90A-NEXT: s_waitcnt vmcnt(0) 3728; GFX90A-NEXT: s_setpc_b64 s[30:31] 3729; 3730; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_2_2: 3731; GFX940: ; %bb.0: 3732; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3733; GFX940-NEXT: ;;#ASMSTART 3734; GFX940-NEXT: ; def v[0:1] 3735; GFX940-NEXT: ;;#ASMEND 3736; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3737; GFX940-NEXT: v_mov_b32_e32 v4, 0 3738; GFX940-NEXT: ;;#ASMSTART 3739; GFX940-NEXT: ; def v[2:3] 3740; GFX940-NEXT: ;;#ASMEND 3741; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 3742; GFX940-NEXT: v_mov_b32_e32 v0, v3 3743; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3744; GFX940-NEXT: s_waitcnt vmcnt(0) 3745; GFX940-NEXT: s_setpc_b64 s[30:31] 3746 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3747 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3748 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3749 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3750 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 2, i32 2> 3751 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3752 ret void 3753} 3754 3755define void @v_shuffle_v4i16_v3i16__5_0_2_2(ptr addrspace(1) inreg %ptr) { 3756; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_2_2: 3757; GFX900: ; %bb.0: 3758; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3759; GFX900-NEXT: ;;#ASMSTART 3760; GFX900-NEXT: ; def v[0:1] 3761; GFX900-NEXT: ;;#ASMEND 3762; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3763; GFX900-NEXT: v_mov_b32_e32 v4, 0 3764; GFX900-NEXT: ;;#ASMSTART 3765; GFX900-NEXT: ; def v[2:3] 3766; GFX900-NEXT: ;;#ASMEND 3767; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 3768; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 3769; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3770; GFX900-NEXT: s_waitcnt vmcnt(0) 3771; GFX900-NEXT: s_setpc_b64 s[30:31] 3772; 3773; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_2_2: 3774; GFX90A: ; %bb.0: 3775; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3776; GFX90A-NEXT: ;;#ASMSTART 3777; GFX90A-NEXT: ; def v[0:1] 3778; GFX90A-NEXT: ;;#ASMEND 3779; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3780; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3781; GFX90A-NEXT: ;;#ASMSTART 3782; GFX90A-NEXT: ; def v[2:3] 3783; GFX90A-NEXT: ;;#ASMEND 3784; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 3785; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 3786; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3787; GFX90A-NEXT: s_waitcnt vmcnt(0) 3788; GFX90A-NEXT: s_setpc_b64 s[30:31] 3789; 3790; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_2_2: 3791; GFX940: ; %bb.0: 3792; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3793; GFX940-NEXT: ;;#ASMSTART 3794; GFX940-NEXT: ; def v[0:1] 3795; GFX940-NEXT: ;;#ASMEND 3796; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3797; GFX940-NEXT: v_mov_b32_e32 v4, 0 3798; GFX940-NEXT: ;;#ASMSTART 3799; GFX940-NEXT: ; def v[2:3] 3800; GFX940-NEXT: ;;#ASMEND 3801; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 3802; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 3803; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3804; GFX940-NEXT: s_waitcnt vmcnt(0) 3805; GFX940-NEXT: s_setpc_b64 s[30:31] 3806 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3807 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3808 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3809 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3810 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 2, i32 2> 3811 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3812 ret void 3813} 3814 3815define void @v_shuffle_v4i16_v3i16__5_1_2_2(ptr addrspace(1) inreg %ptr) { 3816; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_2_2: 3817; GFX900: ; %bb.0: 3818; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3819; GFX900-NEXT: ;;#ASMSTART 3820; GFX900-NEXT: ; def v[0:1] 3821; GFX900-NEXT: ;;#ASMEND 3822; GFX900-NEXT: s_mov_b32 s4, 0xffff 3823; GFX900-NEXT: ;;#ASMSTART 3824; GFX900-NEXT: ; def v[2:3] 3825; GFX900-NEXT: ;;#ASMEND 3826; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v0 3827; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3828; GFX900-NEXT: v_mov_b32_e32 v4, 0 3829; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 3830; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3831; GFX900-NEXT: s_waitcnt vmcnt(0) 3832; GFX900-NEXT: s_setpc_b64 s[30:31] 3833; 3834; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_2_2: 3835; GFX90A: ; %bb.0: 3836; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3837; GFX90A-NEXT: ;;#ASMSTART 3838; GFX90A-NEXT: ; def v[0:1] 3839; GFX90A-NEXT: ;;#ASMEND 3840; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3841; GFX90A-NEXT: ;;#ASMSTART 3842; GFX90A-NEXT: ; def v[2:3] 3843; GFX90A-NEXT: ;;#ASMEND 3844; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 3845; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3846; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3847; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 3848; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3849; GFX90A-NEXT: s_waitcnt vmcnt(0) 3850; GFX90A-NEXT: s_setpc_b64 s[30:31] 3851; 3852; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_2_2: 3853; GFX940: ; %bb.0: 3854; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3855; GFX940-NEXT: ;;#ASMSTART 3856; GFX940-NEXT: ; def v[0:1] 3857; GFX940-NEXT: ;;#ASMEND 3858; GFX940-NEXT: s_mov_b32 s2, 0xffff 3859; GFX940-NEXT: ;;#ASMSTART 3860; GFX940-NEXT: ; def v[2:3] 3861; GFX940-NEXT: ;;#ASMEND 3862; GFX940-NEXT: v_mov_b32_e32 v4, 0 3863; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 3864; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3865; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 3866; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3867; GFX940-NEXT: s_waitcnt vmcnt(0) 3868; GFX940-NEXT: s_setpc_b64 s[30:31] 3869 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3870 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3871 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3872 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3873 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 2, i32 2> 3874 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3875 ret void 3876} 3877 3878define void @v_shuffle_v4i16_v3i16__5_3_2_2(ptr addrspace(1) inreg %ptr) { 3879; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_2_2: 3880; GFX900: ; %bb.0: 3881; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3882; GFX900-NEXT: ;;#ASMSTART 3883; GFX900-NEXT: ; def v[0:1] 3884; GFX900-NEXT: ;;#ASMEND 3885; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3886; GFX900-NEXT: v_mov_b32_e32 v4, 0 3887; GFX900-NEXT: ;;#ASMSTART 3888; GFX900-NEXT: ; def v[2:3] 3889; GFX900-NEXT: ;;#ASMEND 3890; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 3891; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 3892; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3893; GFX900-NEXT: s_waitcnt vmcnt(0) 3894; GFX900-NEXT: s_setpc_b64 s[30:31] 3895; 3896; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_2_2: 3897; GFX90A: ; %bb.0: 3898; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3899; GFX90A-NEXT: ;;#ASMSTART 3900; GFX90A-NEXT: ; def v[0:1] 3901; GFX90A-NEXT: ;;#ASMEND 3902; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3903; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3904; GFX90A-NEXT: ;;#ASMSTART 3905; GFX90A-NEXT: ; def v[2:3] 3906; GFX90A-NEXT: ;;#ASMEND 3907; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 3908; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 3909; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3910; GFX90A-NEXT: s_waitcnt vmcnt(0) 3911; GFX90A-NEXT: s_setpc_b64 s[30:31] 3912; 3913; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_2_2: 3914; GFX940: ; %bb.0: 3915; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3916; GFX940-NEXT: ;;#ASMSTART 3917; GFX940-NEXT: ; def v[0:1] 3918; GFX940-NEXT: ;;#ASMEND 3919; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3920; GFX940-NEXT: v_mov_b32_e32 v4, 0 3921; GFX940-NEXT: ;;#ASMSTART 3922; GFX940-NEXT: ; def v[2:3] 3923; GFX940-NEXT: ;;#ASMEND 3924; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 3925; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 3926; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3927; GFX940-NEXT: s_waitcnt vmcnt(0) 3928; GFX940-NEXT: s_setpc_b64 s[30:31] 3929 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3930 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3931 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3932 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3933 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 2, i32 2> 3934 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3935 ret void 3936} 3937 3938define void @v_shuffle_v4i16_v3i16__5_4_2_2(ptr addrspace(1) inreg %ptr) { 3939; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_2_2: 3940; GFX900: ; %bb.0: 3941; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3942; GFX900-NEXT: ;;#ASMSTART 3943; GFX900-NEXT: ; def v[0:1] 3944; GFX900-NEXT: ;;#ASMEND 3945; GFX900-NEXT: s_mov_b32 s4, 0xffff 3946; GFX900-NEXT: ;;#ASMSTART 3947; GFX900-NEXT: ; def v[2:3] 3948; GFX900-NEXT: ;;#ASMEND 3949; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v2 3950; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3951; GFX900-NEXT: v_mov_b32_e32 v4, 0 3952; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 3953; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3954; GFX900-NEXT: s_waitcnt vmcnt(0) 3955; GFX900-NEXT: s_setpc_b64 s[30:31] 3956; 3957; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_2_2: 3958; GFX90A: ; %bb.0: 3959; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3960; GFX90A-NEXT: ;;#ASMSTART 3961; GFX90A-NEXT: ; def v[0:1] 3962; GFX90A-NEXT: ;;#ASMEND 3963; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3964; GFX90A-NEXT: ;;#ASMSTART 3965; GFX90A-NEXT: ; def v[2:3] 3966; GFX90A-NEXT: ;;#ASMEND 3967; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v2 3968; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3969; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3970; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 3971; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3972; GFX90A-NEXT: s_waitcnt vmcnt(0) 3973; GFX90A-NEXT: s_setpc_b64 s[30:31] 3974; 3975; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_2_2: 3976; GFX940: ; %bb.0: 3977; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3978; GFX940-NEXT: ;;#ASMSTART 3979; GFX940-NEXT: ; def v[0:1] 3980; GFX940-NEXT: ;;#ASMEND 3981; GFX940-NEXT: s_mov_b32 s2, 0xffff 3982; GFX940-NEXT: ;;#ASMSTART 3983; GFX940-NEXT: ; def v[2:3] 3984; GFX940-NEXT: ;;#ASMEND 3985; GFX940-NEXT: v_mov_b32_e32 v4, 0 3986; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 3987; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3988; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 3989; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3990; GFX940-NEXT: s_waitcnt vmcnt(0) 3991; GFX940-NEXT: s_setpc_b64 s[30:31] 3992 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3993 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3994 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3995 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3996 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 2, i32 2> 3997 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3998 ret void 3999} 4000 4001define void @v_shuffle_v4i16_v3i16__5_5_2_2(ptr addrspace(1) inreg %ptr) { 4002; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_2: 4003; GFX900: ; %bb.0: 4004; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4005; GFX900-NEXT: ;;#ASMSTART 4006; GFX900-NEXT: ; def v[0:1] 4007; GFX900-NEXT: ;;#ASMEND 4008; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4009; GFX900-NEXT: v_mov_b32_e32 v4, 0 4010; GFX900-NEXT: ;;#ASMSTART 4011; GFX900-NEXT: ; def v[2:3] 4012; GFX900-NEXT: ;;#ASMEND 4013; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 4014; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 4015; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4016; GFX900-NEXT: s_waitcnt vmcnt(0) 4017; GFX900-NEXT: s_setpc_b64 s[30:31] 4018; 4019; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_2: 4020; GFX90A: ; %bb.0: 4021; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4022; GFX90A-NEXT: ;;#ASMSTART 4023; GFX90A-NEXT: ; def v[0:1] 4024; GFX90A-NEXT: ;;#ASMEND 4025; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4026; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4027; GFX90A-NEXT: ;;#ASMSTART 4028; GFX90A-NEXT: ; def v[2:3] 4029; GFX90A-NEXT: ;;#ASMEND 4030; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 4031; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 4032; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4033; GFX90A-NEXT: s_waitcnt vmcnt(0) 4034; GFX90A-NEXT: s_setpc_b64 s[30:31] 4035; 4036; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_2: 4037; GFX940: ; %bb.0: 4038; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4039; GFX940-NEXT: ;;#ASMSTART 4040; GFX940-NEXT: ; def v[0:1] 4041; GFX940-NEXT: ;;#ASMEND 4042; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4043; GFX940-NEXT: v_mov_b32_e32 v4, 0 4044; GFX940-NEXT: ;;#ASMSTART 4045; GFX940-NEXT: ; def v[2:3] 4046; GFX940-NEXT: ;;#ASMEND 4047; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 4048; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 4049; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4050; GFX940-NEXT: s_waitcnt vmcnt(0) 4051; GFX940-NEXT: s_setpc_b64 s[30:31] 4052 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4053 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4054 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4055 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4056 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 2> 4057 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4058 ret void 4059} 4060 4061define void @v_shuffle_v4i16_v3i16__5_5_u_2(ptr addrspace(1) inreg %ptr) { 4062; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_2: 4063; GFX900: ; %bb.0: 4064; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4065; GFX900-NEXT: ;;#ASMSTART 4066; GFX900-NEXT: ; def v[0:1] 4067; GFX900-NEXT: ;;#ASMEND 4068; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4069; GFX900-NEXT: v_mov_b32_e32 v4, 0 4070; GFX900-NEXT: ;;#ASMSTART 4071; GFX900-NEXT: ; def v[2:3] 4072; GFX900-NEXT: ;;#ASMEND 4073; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 4074; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4075; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4076; GFX900-NEXT: s_waitcnt vmcnt(0) 4077; GFX900-NEXT: s_setpc_b64 s[30:31] 4078; 4079; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_2: 4080; GFX90A: ; %bb.0: 4081; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4082; GFX90A-NEXT: ;;#ASMSTART 4083; GFX90A-NEXT: ; def v[0:1] 4084; GFX90A-NEXT: ;;#ASMEND 4085; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4086; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4087; GFX90A-NEXT: ;;#ASMSTART 4088; GFX90A-NEXT: ; def v[2:3] 4089; GFX90A-NEXT: ;;#ASMEND 4090; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 4091; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4092; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4093; GFX90A-NEXT: s_waitcnt vmcnt(0) 4094; GFX90A-NEXT: s_setpc_b64 s[30:31] 4095; 4096; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_2: 4097; GFX940: ; %bb.0: 4098; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4099; GFX940-NEXT: ;;#ASMSTART 4100; GFX940-NEXT: ; def v[0:1] 4101; GFX940-NEXT: ;;#ASMEND 4102; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4103; GFX940-NEXT: v_mov_b32_e32 v4, 0 4104; GFX940-NEXT: ;;#ASMSTART 4105; GFX940-NEXT: ; def v[2:3] 4106; GFX940-NEXT: ;;#ASMEND 4107; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4108; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 4109; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4110; GFX940-NEXT: s_waitcnt vmcnt(0) 4111; GFX940-NEXT: s_setpc_b64 s[30:31] 4112 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4113 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4114 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4115 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4116 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 2> 4117 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4118 ret void 4119} 4120 4121define void @v_shuffle_v4i16_v3i16__5_5_0_2(ptr addrspace(1) inreg %ptr) { 4122; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_2: 4123; GFX900: ; %bb.0: 4124; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4125; GFX900-NEXT: ;;#ASMSTART 4126; GFX900-NEXT: ; def v[0:1] 4127; GFX900-NEXT: ;;#ASMEND 4128; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4129; GFX900-NEXT: v_mov_b32_e32 v4, 0 4130; GFX900-NEXT: ;;#ASMSTART 4131; GFX900-NEXT: ; def v[2:3] 4132; GFX900-NEXT: ;;#ASMEND 4133; GFX900-NEXT: v_perm_b32 v1, v1, v0, s4 4134; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 4135; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4136; GFX900-NEXT: s_waitcnt vmcnt(0) 4137; GFX900-NEXT: s_setpc_b64 s[30:31] 4138; 4139; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_2: 4140; GFX90A: ; %bb.0: 4141; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4142; GFX90A-NEXT: ;;#ASMSTART 4143; GFX90A-NEXT: ; def v[0:1] 4144; GFX90A-NEXT: ;;#ASMEND 4145; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4146; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4147; GFX90A-NEXT: ;;#ASMSTART 4148; GFX90A-NEXT: ; def v[2:3] 4149; GFX90A-NEXT: ;;#ASMEND 4150; GFX90A-NEXT: v_perm_b32 v1, v1, v0, s4 4151; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 4152; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4153; GFX90A-NEXT: s_waitcnt vmcnt(0) 4154; GFX90A-NEXT: s_setpc_b64 s[30:31] 4155; 4156; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_2: 4157; GFX940: ; %bb.0: 4158; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4159; GFX940-NEXT: ;;#ASMSTART 4160; GFX940-NEXT: ; def v[0:1] 4161; GFX940-NEXT: ;;#ASMEND 4162; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4163; GFX940-NEXT: v_mov_b32_e32 v4, 0 4164; GFX940-NEXT: ;;#ASMSTART 4165; GFX940-NEXT: ; def v[2:3] 4166; GFX940-NEXT: ;;#ASMEND 4167; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 4168; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 4169; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4170; GFX940-NEXT: s_waitcnt vmcnt(0) 4171; GFX940-NEXT: s_setpc_b64 s[30:31] 4172 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4173 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4174 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4175 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4176 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 2> 4177 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4178 ret void 4179} 4180 4181define void @v_shuffle_v4i16_v3i16__5_5_1_2(ptr addrspace(1) inreg %ptr) { 4182; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_2: 4183; GFX900: ; %bb.0: 4184; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4185; GFX900-NEXT: ;;#ASMSTART 4186; GFX900-NEXT: ; def v[0:1] 4187; GFX900-NEXT: ;;#ASMEND 4188; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4189; GFX900-NEXT: v_mov_b32_e32 v4, 0 4190; GFX900-NEXT: ;;#ASMSTART 4191; GFX900-NEXT: ; def v[2:3] 4192; GFX900-NEXT: ;;#ASMEND 4193; GFX900-NEXT: v_alignbit_b32 v1, v1, v0, 16 4194; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 4195; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4196; GFX900-NEXT: s_waitcnt vmcnt(0) 4197; GFX900-NEXT: s_setpc_b64 s[30:31] 4198; 4199; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_2: 4200; GFX90A: ; %bb.0: 4201; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4202; GFX90A-NEXT: ;;#ASMSTART 4203; GFX90A-NEXT: ; def v[0:1] 4204; GFX90A-NEXT: ;;#ASMEND 4205; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4206; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4207; GFX90A-NEXT: ;;#ASMSTART 4208; GFX90A-NEXT: ; def v[2:3] 4209; GFX90A-NEXT: ;;#ASMEND 4210; GFX90A-NEXT: v_alignbit_b32 v1, v1, v0, 16 4211; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 4212; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4213; GFX90A-NEXT: s_waitcnt vmcnt(0) 4214; GFX90A-NEXT: s_setpc_b64 s[30:31] 4215; 4216; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_2: 4217; GFX940: ; %bb.0: 4218; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4219; GFX940-NEXT: ;;#ASMSTART 4220; GFX940-NEXT: ; def v[0:1] 4221; GFX940-NEXT: ;;#ASMEND 4222; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4223; GFX940-NEXT: v_mov_b32_e32 v4, 0 4224; GFX940-NEXT: ;;#ASMSTART 4225; GFX940-NEXT: ; def v[2:3] 4226; GFX940-NEXT: ;;#ASMEND 4227; GFX940-NEXT: v_alignbit_b32 v1, v1, v0, 16 4228; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 4229; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4230; GFX940-NEXT: s_waitcnt vmcnt(0) 4231; GFX940-NEXT: s_setpc_b64 s[30:31] 4232 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4233 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4234 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4235 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4236 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 2> 4237 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4238 ret void 4239} 4240 4241define void @v_shuffle_v4i16_v3i16__5_5_3_2(ptr addrspace(1) inreg %ptr) { 4242; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_2: 4243; GFX900: ; %bb.0: 4244; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4245; GFX900-NEXT: ;;#ASMSTART 4246; GFX900-NEXT: ; def v[0:1] 4247; GFX900-NEXT: ;;#ASMEND 4248; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4249; GFX900-NEXT: v_mov_b32_e32 v4, 0 4250; GFX900-NEXT: ;;#ASMSTART 4251; GFX900-NEXT: ; def v[2:3] 4252; GFX900-NEXT: ;;#ASMEND 4253; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 4254; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 4255; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4256; GFX900-NEXT: s_waitcnt vmcnt(0) 4257; GFX900-NEXT: s_setpc_b64 s[30:31] 4258; 4259; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_2: 4260; GFX90A: ; %bb.0: 4261; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4262; GFX90A-NEXT: ;;#ASMSTART 4263; GFX90A-NEXT: ; def v[0:1] 4264; GFX90A-NEXT: ;;#ASMEND 4265; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4266; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4267; GFX90A-NEXT: ;;#ASMSTART 4268; GFX90A-NEXT: ; def v[2:3] 4269; GFX90A-NEXT: ;;#ASMEND 4270; GFX90A-NEXT: v_perm_b32 v1, v1, v2, s4 4271; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 4272; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4273; GFX90A-NEXT: s_waitcnt vmcnt(0) 4274; GFX90A-NEXT: s_setpc_b64 s[30:31] 4275; 4276; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_2: 4277; GFX940: ; %bb.0: 4278; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4279; GFX940-NEXT: ;;#ASMSTART 4280; GFX940-NEXT: ; def v[0:1] 4281; GFX940-NEXT: ;;#ASMEND 4282; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4283; GFX940-NEXT: v_mov_b32_e32 v4, 0 4284; GFX940-NEXT: ;;#ASMSTART 4285; GFX940-NEXT: ; def v[2:3] 4286; GFX940-NEXT: ;;#ASMEND 4287; GFX940-NEXT: s_nop 0 4288; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 4289; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 4290; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4291; GFX940-NEXT: s_waitcnt vmcnt(0) 4292; GFX940-NEXT: s_setpc_b64 s[30:31] 4293 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4294 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4295 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4296 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4297 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 2> 4298 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4299 ret void 4300} 4301 4302define void @v_shuffle_v4i16_v3i16__5_5_4_2(ptr addrspace(1) inreg %ptr) { 4303; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_2: 4304; GFX900: ; %bb.0: 4305; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4306; GFX900-NEXT: ;;#ASMSTART 4307; GFX900-NEXT: ; def v[0:1] 4308; GFX900-NEXT: ;;#ASMEND 4309; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4310; GFX900-NEXT: v_mov_b32_e32 v4, 0 4311; GFX900-NEXT: ;;#ASMSTART 4312; GFX900-NEXT: ; def v[2:3] 4313; GFX900-NEXT: ;;#ASMEND 4314; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16 4315; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 4316; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4317; GFX900-NEXT: s_waitcnt vmcnt(0) 4318; GFX900-NEXT: s_setpc_b64 s[30:31] 4319; 4320; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_2: 4321; GFX90A: ; %bb.0: 4322; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4323; GFX90A-NEXT: ;;#ASMSTART 4324; GFX90A-NEXT: ; def v[0:1] 4325; GFX90A-NEXT: ;;#ASMEND 4326; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4327; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4328; GFX90A-NEXT: ;;#ASMSTART 4329; GFX90A-NEXT: ; def v[2:3] 4330; GFX90A-NEXT: ;;#ASMEND 4331; GFX90A-NEXT: v_alignbit_b32 v1, v1, v2, 16 4332; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 4333; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4334; GFX90A-NEXT: s_waitcnt vmcnt(0) 4335; GFX90A-NEXT: s_setpc_b64 s[30:31] 4336; 4337; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_2: 4338; GFX940: ; %bb.0: 4339; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4340; GFX940-NEXT: ;;#ASMSTART 4341; GFX940-NEXT: ; def v[0:1] 4342; GFX940-NEXT: ;;#ASMEND 4343; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4344; GFX940-NEXT: v_mov_b32_e32 v4, 0 4345; GFX940-NEXT: ;;#ASMSTART 4346; GFX940-NEXT: ; def v[2:3] 4347; GFX940-NEXT: ;;#ASMEND 4348; GFX940-NEXT: s_nop 0 4349; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 4350; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 4351; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4352; GFX940-NEXT: s_waitcnt vmcnt(0) 4353; GFX940-NEXT: s_setpc_b64 s[30:31] 4354 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4355 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4356 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4357 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4358 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 2> 4359 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4360 ret void 4361} 4362 4363define void @v_shuffle_v4i16_v3i16__u_3_3_3(ptr addrspace(1) inreg %ptr) { 4364; GFX9-LABEL: v_shuffle_v4i16_v3i16__u_3_3_3: 4365; GFX9: ; %bb.0: 4366; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4367; GFX9-NEXT: s_setpc_b64 s[30:31] 4368 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4369 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4370 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3> 4371 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4372 ret void 4373} 4374 4375define void @v_shuffle_v4i16_v3i16__0_3_3_3(ptr addrspace(1) inreg %ptr) { 4376; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_3_3_3: 4377; GFX900: ; %bb.0: 4378; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4379; GFX900-NEXT: v_mov_b32_e32 v2, 0 4380; GFX900-NEXT: ;;#ASMSTART 4381; GFX900-NEXT: ; def v[0:1] 4382; GFX900-NEXT: ;;#ASMEND 4383; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4384; GFX900-NEXT: s_waitcnt vmcnt(0) 4385; GFX900-NEXT: s_setpc_b64 s[30:31] 4386; 4387; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_3_3_3: 4388; GFX90A: ; %bb.0: 4389; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4390; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4391; GFX90A-NEXT: ;;#ASMSTART 4392; GFX90A-NEXT: ; def v[0:1] 4393; GFX90A-NEXT: ;;#ASMEND 4394; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4395; GFX90A-NEXT: s_waitcnt vmcnt(0) 4396; GFX90A-NEXT: s_setpc_b64 s[30:31] 4397; 4398; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_3_3_3: 4399; GFX940: ; %bb.0: 4400; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4401; GFX940-NEXT: v_mov_b32_e32 v2, 0 4402; GFX940-NEXT: ;;#ASMSTART 4403; GFX940-NEXT: ; def v[0:1] 4404; GFX940-NEXT: ;;#ASMEND 4405; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 4406; GFX940-NEXT: s_waitcnt vmcnt(0) 4407; GFX940-NEXT: s_setpc_b64 s[30:31] 4408 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4409 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4410 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3> 4411 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4412 ret void 4413} 4414 4415define void @v_shuffle_v4i16_v3i16__1_3_3_3(ptr addrspace(1) inreg %ptr) { 4416; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_3_3_3: 4417; GFX900: ; %bb.0: 4418; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4419; GFX900-NEXT: ;;#ASMSTART 4420; GFX900-NEXT: ; def v[0:1] 4421; GFX900-NEXT: ;;#ASMEND 4422; GFX900-NEXT: v_mov_b32_e32 v2, 0 4423; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 4424; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4425; GFX900-NEXT: s_waitcnt vmcnt(0) 4426; GFX900-NEXT: s_setpc_b64 s[30:31] 4427; 4428; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_3_3_3: 4429; GFX90A: ; %bb.0: 4430; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4431; GFX90A-NEXT: ;;#ASMSTART 4432; GFX90A-NEXT: ; def v[0:1] 4433; GFX90A-NEXT: ;;#ASMEND 4434; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4435; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 4436; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4437; GFX90A-NEXT: s_waitcnt vmcnt(0) 4438; GFX90A-NEXT: s_setpc_b64 s[30:31] 4439; 4440; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_3_3_3: 4441; GFX940: ; %bb.0: 4442; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4443; GFX940-NEXT: ;;#ASMSTART 4444; GFX940-NEXT: ; def v[0:1] 4445; GFX940-NEXT: ;;#ASMEND 4446; GFX940-NEXT: v_mov_b32_e32 v2, 0 4447; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 4448; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 4449; GFX940-NEXT: s_waitcnt vmcnt(0) 4450; GFX940-NEXT: s_setpc_b64 s[30:31] 4451 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4452 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4453 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3> 4454 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4455 ret void 4456} 4457 4458define void @v_shuffle_v4i16_v3i16__2_3_3_3(ptr addrspace(1) inreg %ptr) { 4459; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_3_3_3: 4460; GFX900: ; %bb.0: 4461; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4462; GFX900-NEXT: ;;#ASMSTART 4463; GFX900-NEXT: ; def v[0:1] 4464; GFX900-NEXT: ;;#ASMEND 4465; GFX900-NEXT: v_mov_b32_e32 v2, 0 4466; GFX900-NEXT: v_mov_b32_e32 v0, v1 4467; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4468; GFX900-NEXT: s_waitcnt vmcnt(0) 4469; GFX900-NEXT: s_setpc_b64 s[30:31] 4470; 4471; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_3_3_3: 4472; GFX90A: ; %bb.0: 4473; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4474; GFX90A-NEXT: ;;#ASMSTART 4475; GFX90A-NEXT: ; def v[0:1] 4476; GFX90A-NEXT: ;;#ASMEND 4477; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4478; GFX90A-NEXT: v_mov_b32_e32 v0, v1 4479; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4480; GFX90A-NEXT: s_waitcnt vmcnt(0) 4481; GFX90A-NEXT: s_setpc_b64 s[30:31] 4482; 4483; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_3_3_3: 4484; GFX940: ; %bb.0: 4485; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4486; GFX940-NEXT: ;;#ASMSTART 4487; GFX940-NEXT: ; def v[0:1] 4488; GFX940-NEXT: ;;#ASMEND 4489; GFX940-NEXT: v_mov_b32_e32 v2, 0 4490; GFX940-NEXT: v_mov_b32_e32 v0, v1 4491; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 4492; GFX940-NEXT: s_waitcnt vmcnt(0) 4493; GFX940-NEXT: s_setpc_b64 s[30:31] 4494 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4495 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4496 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3> 4497 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4498 ret void 4499} 4500 4501define void @v_shuffle_v4i16_v3i16__3_3_3_3(ptr addrspace(1) inreg %ptr) { 4502; GFX9-LABEL: v_shuffle_v4i16_v3i16__3_3_3_3: 4503; GFX9: ; %bb.0: 4504; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4505; GFX9-NEXT: s_setpc_b64 s[30:31] 4506 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4507 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4508 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 4509 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4510 ret void 4511} 4512 4513define void @v_shuffle_v4i16_v3i16__4_3_3_3(ptr addrspace(1) inreg %ptr) { 4514; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_3_3_3: 4515; GFX900: ; %bb.0: 4516; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4517; GFX900-NEXT: ;;#ASMSTART 4518; GFX900-NEXT: ; def v[0:1] 4519; GFX900-NEXT: ;;#ASMEND 4520; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4521; GFX900-NEXT: v_mov_b32_e32 v2, 0 4522; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 4523; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16 4524; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4525; GFX900-NEXT: s_waitcnt vmcnt(0) 4526; GFX900-NEXT: s_setpc_b64 s[30:31] 4527; 4528; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_3_3_3: 4529; GFX90A: ; %bb.0: 4530; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4531; GFX90A-NEXT: ;;#ASMSTART 4532; GFX90A-NEXT: ; def v[0:1] 4533; GFX90A-NEXT: ;;#ASMEND 4534; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4535; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4536; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 4537; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16 4538; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4539; GFX90A-NEXT: s_waitcnt vmcnt(0) 4540; GFX90A-NEXT: s_setpc_b64 s[30:31] 4541; 4542; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_3_3_3: 4543; GFX940: ; %bb.0: 4544; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4545; GFX940-NEXT: ;;#ASMSTART 4546; GFX940-NEXT: ; def v[0:1] 4547; GFX940-NEXT: ;;#ASMEND 4548; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4549; GFX940-NEXT: v_mov_b32_e32 v2, 0 4550; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 4551; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 4552; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 4553; GFX940-NEXT: s_waitcnt vmcnt(0) 4554; GFX940-NEXT: s_setpc_b64 s[30:31] 4555 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4556 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4557 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4558 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4559 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 3, i32 3, i32 3> 4560 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4561 ret void 4562} 4563 4564define void @v_shuffle_v4i16_v3i16__5_3_3_3(ptr addrspace(1) inreg %ptr) { 4565; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_3_3: 4566; GFX900: ; %bb.0: 4567; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4568; GFX900-NEXT: ;;#ASMSTART 4569; GFX900-NEXT: ; def v[0:1] 4570; GFX900-NEXT: ;;#ASMEND 4571; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4572; GFX900-NEXT: v_mov_b32_e32 v3, 0 4573; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 4574; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 4575; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 4576; GFX900-NEXT: s_waitcnt vmcnt(0) 4577; GFX900-NEXT: s_setpc_b64 s[30:31] 4578; 4579; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_3_3: 4580; GFX90A: ; %bb.0: 4581; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4582; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4583; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4584; GFX90A-NEXT: ;;#ASMSTART 4585; GFX90A-NEXT: ; def v[0:1] 4586; GFX90A-NEXT: ;;#ASMEND 4587; GFX90A-NEXT: v_perm_b32 v2, v0, v1, s4 4588; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 4589; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 4590; GFX90A-NEXT: s_waitcnt vmcnt(0) 4591; GFX90A-NEXT: s_setpc_b64 s[30:31] 4592; 4593; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_3_3: 4594; GFX940: ; %bb.0: 4595; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4596; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4597; GFX940-NEXT: v_mov_b32_e32 v4, 0 4598; GFX940-NEXT: ;;#ASMSTART 4599; GFX940-NEXT: ; def v[0:1] 4600; GFX940-NEXT: ;;#ASMEND 4601; GFX940-NEXT: s_nop 0 4602; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 4603; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 4604; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 4605; GFX940-NEXT: s_waitcnt vmcnt(0) 4606; GFX940-NEXT: s_setpc_b64 s[30:31] 4607 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4608 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4609 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4610 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4611 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 3, i32 3> 4612 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4613 ret void 4614} 4615 4616define void @v_shuffle_v4i16_v3i16__5_u_3_3(ptr addrspace(1) inreg %ptr) { 4617; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_3_3: 4618; GFX900: ; %bb.0: 4619; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4620; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4621; GFX900-NEXT: v_mov_b32_e32 v3, 0 4622; GFX900-NEXT: ;;#ASMSTART 4623; GFX900-NEXT: ; def v[0:1] 4624; GFX900-NEXT: ;;#ASMEND 4625; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 4626; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 4627; GFX900-NEXT: s_waitcnt vmcnt(0) 4628; GFX900-NEXT: s_setpc_b64 s[30:31] 4629; 4630; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_3_3: 4631; GFX90A: ; %bb.0: 4632; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4633; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4634; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4635; GFX90A-NEXT: ;;#ASMSTART 4636; GFX90A-NEXT: ; def v[0:1] 4637; GFX90A-NEXT: ;;#ASMEND 4638; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 4639; GFX90A-NEXT: v_mov_b32_e32 v2, v1 4640; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 4641; GFX90A-NEXT: s_waitcnt vmcnt(0) 4642; GFX90A-NEXT: s_setpc_b64 s[30:31] 4643; 4644; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_3_3: 4645; GFX940: ; %bb.0: 4646; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4647; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4648; GFX940-NEXT: v_mov_b32_e32 v4, 0 4649; GFX940-NEXT: ;;#ASMSTART 4650; GFX940-NEXT: ; def v[0:1] 4651; GFX940-NEXT: ;;#ASMEND 4652; GFX940-NEXT: s_nop 0 4653; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 4654; GFX940-NEXT: v_mov_b32_e32 v2, v1 4655; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 4656; GFX940-NEXT: s_waitcnt vmcnt(0) 4657; GFX940-NEXT: s_setpc_b64 s[30:31] 4658 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4659 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4660 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4661 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4662 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 3, i32 3> 4663 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4664 ret void 4665} 4666 4667define void @v_shuffle_v4i16_v3i16__5_0_3_3(ptr addrspace(1) inreg %ptr) { 4668; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_3_3: 4669; GFX900: ; %bb.0: 4670; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4671; GFX900-NEXT: ;;#ASMSTART 4672; GFX900-NEXT: ; def v[0:1] 4673; GFX900-NEXT: ;;#ASMEND 4674; GFX900-NEXT: ;;#ASMSTART 4675; GFX900-NEXT: ; def v[1:2] 4676; GFX900-NEXT: ;;#ASMEND 4677; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4678; GFX900-NEXT: v_mov_b32_e32 v3, 0 4679; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 4680; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 4681; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 4682; GFX900-NEXT: s_waitcnt vmcnt(0) 4683; GFX900-NEXT: s_setpc_b64 s[30:31] 4684; 4685; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_3_3: 4686; GFX90A: ; %bb.0: 4687; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4688; GFX90A-NEXT: ;;#ASMSTART 4689; GFX90A-NEXT: ; def v[0:1] 4690; GFX90A-NEXT: ;;#ASMEND 4691; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4692; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4693; GFX90A-NEXT: ;;#ASMSTART 4694; GFX90A-NEXT: ; def v[2:3] 4695; GFX90A-NEXT: ;;#ASMEND 4696; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 4697; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 4698; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4699; GFX90A-NEXT: s_waitcnt vmcnt(0) 4700; GFX90A-NEXT: s_setpc_b64 s[30:31] 4701; 4702; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_3_3: 4703; GFX940: ; %bb.0: 4704; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4705; GFX940-NEXT: ;;#ASMSTART 4706; GFX940-NEXT: ; def v[0:1] 4707; GFX940-NEXT: ;;#ASMEND 4708; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4709; GFX940-NEXT: v_mov_b32_e32 v4, 0 4710; GFX940-NEXT: ;;#ASMSTART 4711; GFX940-NEXT: ; def v[2:3] 4712; GFX940-NEXT: ;;#ASMEND 4713; GFX940-NEXT: s_nop 0 4714; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 4715; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 4716; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4717; GFX940-NEXT: s_waitcnt vmcnt(0) 4718; GFX940-NEXT: s_setpc_b64 s[30:31] 4719 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4720 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4721 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4722 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4723 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 3, i32 3> 4724 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4725 ret void 4726} 4727 4728define void @v_shuffle_v4i16_v3i16__5_1_3_3(ptr addrspace(1) inreg %ptr) { 4729; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_3_3: 4730; GFX900: ; %bb.0: 4731; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4732; GFX900-NEXT: ;;#ASMSTART 4733; GFX900-NEXT: ; def v[0:1] 4734; GFX900-NEXT: ;;#ASMEND 4735; GFX900-NEXT: s_mov_b32 s4, 0xffff 4736; GFX900-NEXT: ;;#ASMSTART 4737; GFX900-NEXT: ; def v[1:2] 4738; GFX900-NEXT: ;;#ASMEND 4739; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 4740; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4741; GFX900-NEXT: v_mov_b32_e32 v3, 0 4742; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 4743; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 4744; GFX900-NEXT: s_waitcnt vmcnt(0) 4745; GFX900-NEXT: s_setpc_b64 s[30:31] 4746; 4747; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_3_3: 4748; GFX90A: ; %bb.0: 4749; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4750; GFX90A-NEXT: ;;#ASMSTART 4751; GFX90A-NEXT: ; def v[0:1] 4752; GFX90A-NEXT: ;;#ASMEND 4753; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4754; GFX90A-NEXT: ;;#ASMSTART 4755; GFX90A-NEXT: ; def v[2:3] 4756; GFX90A-NEXT: ;;#ASMEND 4757; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 4758; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4759; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4760; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 4761; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4762; GFX90A-NEXT: s_waitcnt vmcnt(0) 4763; GFX90A-NEXT: s_setpc_b64 s[30:31] 4764; 4765; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_3_3: 4766; GFX940: ; %bb.0: 4767; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4768; GFX940-NEXT: ;;#ASMSTART 4769; GFX940-NEXT: ; def v[0:1] 4770; GFX940-NEXT: ;;#ASMEND 4771; GFX940-NEXT: s_mov_b32 s2, 0xffff 4772; GFX940-NEXT: ;;#ASMSTART 4773; GFX940-NEXT: ; def v[2:3] 4774; GFX940-NEXT: ;;#ASMEND 4775; GFX940-NEXT: v_mov_b32_e32 v4, 0 4776; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 4777; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4778; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 4779; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4780; GFX940-NEXT: s_waitcnt vmcnt(0) 4781; GFX940-NEXT: s_setpc_b64 s[30:31] 4782 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4783 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4784 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4785 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4786 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 3, i32 3> 4787 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4788 ret void 4789} 4790 4791define void @v_shuffle_v4i16_v3i16__5_2_3_3(ptr addrspace(1) inreg %ptr) { 4792; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_3_3: 4793; GFX900: ; %bb.0: 4794; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4795; GFX900-NEXT: ;;#ASMSTART 4796; GFX900-NEXT: ; def v[0:1] 4797; GFX900-NEXT: ;;#ASMEND 4798; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4799; GFX900-NEXT: v_mov_b32_e32 v4, 0 4800; GFX900-NEXT: ;;#ASMSTART 4801; GFX900-NEXT: ; def v[2:3] 4802; GFX900-NEXT: ;;#ASMEND 4803; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 4804; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 4805; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4806; GFX900-NEXT: s_waitcnt vmcnt(0) 4807; GFX900-NEXT: s_setpc_b64 s[30:31] 4808; 4809; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_3_3: 4810; GFX90A: ; %bb.0: 4811; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4812; GFX90A-NEXT: ;;#ASMSTART 4813; GFX90A-NEXT: ; def v[0:1] 4814; GFX90A-NEXT: ;;#ASMEND 4815; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4816; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4817; GFX90A-NEXT: ;;#ASMSTART 4818; GFX90A-NEXT: ; def v[2:3] 4819; GFX90A-NEXT: ;;#ASMEND 4820; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 4821; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 4822; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4823; GFX90A-NEXT: s_waitcnt vmcnt(0) 4824; GFX90A-NEXT: s_setpc_b64 s[30:31] 4825; 4826; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_3_3: 4827; GFX940: ; %bb.0: 4828; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4829; GFX940-NEXT: ;;#ASMSTART 4830; GFX940-NEXT: ; def v[0:1] 4831; GFX940-NEXT: ;;#ASMEND 4832; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4833; GFX940-NEXT: v_mov_b32_e32 v4, 0 4834; GFX940-NEXT: ;;#ASMSTART 4835; GFX940-NEXT: ; def v[2:3] 4836; GFX940-NEXT: ;;#ASMEND 4837; GFX940-NEXT: s_nop 0 4838; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 4839; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 4840; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4841; GFX940-NEXT: s_waitcnt vmcnt(0) 4842; GFX940-NEXT: s_setpc_b64 s[30:31] 4843 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4844 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4845 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4846 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4847 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 3, i32 3> 4848 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4849 ret void 4850} 4851 4852define void @v_shuffle_v4i16_v3i16__5_4_3_3(ptr addrspace(1) inreg %ptr) { 4853; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_3_3: 4854; GFX900: ; %bb.0: 4855; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4856; GFX900-NEXT: ;;#ASMSTART 4857; GFX900-NEXT: ; def v[0:1] 4858; GFX900-NEXT: ;;#ASMEND 4859; GFX900-NEXT: s_mov_b32 s4, 0xffff 4860; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 4861; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4862; GFX900-NEXT: v_mov_b32_e32 v3, 0 4863; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 4864; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 4865; GFX900-NEXT: s_waitcnt vmcnt(0) 4866; GFX900-NEXT: s_setpc_b64 s[30:31] 4867; 4868; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_3_3: 4869; GFX90A: ; %bb.0: 4870; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4871; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4872; GFX90A-NEXT: ;;#ASMSTART 4873; GFX90A-NEXT: ; def v[0:1] 4874; GFX90A-NEXT: ;;#ASMEND 4875; GFX90A-NEXT: v_bfi_b32 v2, s4, v1, v0 4876; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4877; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4878; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 4879; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 4880; GFX90A-NEXT: s_waitcnt vmcnt(0) 4881; GFX90A-NEXT: s_setpc_b64 s[30:31] 4882; 4883; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_3_3: 4884; GFX940: ; %bb.0: 4885; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4886; GFX940-NEXT: s_mov_b32 s2, 0xffff 4887; GFX940-NEXT: ;;#ASMSTART 4888; GFX940-NEXT: ; def v[0:1] 4889; GFX940-NEXT: ;;#ASMEND 4890; GFX940-NEXT: v_mov_b32_e32 v4, 0 4891; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 4892; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4893; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 4894; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 4895; GFX940-NEXT: s_waitcnt vmcnt(0) 4896; GFX940-NEXT: s_setpc_b64 s[30:31] 4897 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4898 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4899 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4900 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4901 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 3, i32 3> 4902 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4903 ret void 4904} 4905 4906define void @v_shuffle_v4i16_v3i16__5_5_3_3(ptr addrspace(1) inreg %ptr) { 4907; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_3: 4908; GFX900: ; %bb.0: 4909; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4910; GFX900-NEXT: ;;#ASMSTART 4911; GFX900-NEXT: ; def v[0:1] 4912; GFX900-NEXT: ;;#ASMEND 4913; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4914; GFX900-NEXT: v_mov_b32_e32 v3, 0 4915; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 4916; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 4917; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 4918; GFX900-NEXT: s_waitcnt vmcnt(0) 4919; GFX900-NEXT: s_setpc_b64 s[30:31] 4920; 4921; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_3: 4922; GFX90A: ; %bb.0: 4923; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4924; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4925; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4926; GFX90A-NEXT: ;;#ASMSTART 4927; GFX90A-NEXT: ; def v[0:1] 4928; GFX90A-NEXT: ;;#ASMEND 4929; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 4930; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 4931; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 4932; GFX90A-NEXT: s_waitcnt vmcnt(0) 4933; GFX90A-NEXT: s_setpc_b64 s[30:31] 4934; 4935; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_3: 4936; GFX940: ; %bb.0: 4937; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4938; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4939; GFX940-NEXT: v_mov_b32_e32 v4, 0 4940; GFX940-NEXT: ;;#ASMSTART 4941; GFX940-NEXT: ; def v[0:1] 4942; GFX940-NEXT: ;;#ASMEND 4943; GFX940-NEXT: s_nop 0 4944; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 4945; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 4946; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 4947; GFX940-NEXT: s_waitcnt vmcnt(0) 4948; GFX940-NEXT: s_setpc_b64 s[30:31] 4949 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4950 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4951 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4952 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4953 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 3> 4954 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4955 ret void 4956} 4957 4958define void @v_shuffle_v4i16_v3i16__5_5_u_3(ptr addrspace(1) inreg %ptr) { 4959; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_3: 4960; GFX900: ; %bb.0: 4961; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4962; GFX900-NEXT: ;;#ASMSTART 4963; GFX900-NEXT: ; def v[0:1] 4964; GFX900-NEXT: ;;#ASMEND 4965; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4966; GFX900-NEXT: v_mov_b32_e32 v3, 0 4967; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 4968; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 4969; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 4970; GFX900-NEXT: s_waitcnt vmcnt(0) 4971; GFX900-NEXT: s_setpc_b64 s[30:31] 4972; 4973; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_3: 4974; GFX90A: ; %bb.0: 4975; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4976; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4977; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4978; GFX90A-NEXT: ;;#ASMSTART 4979; GFX90A-NEXT: ; def v[0:1] 4980; GFX90A-NEXT: ;;#ASMEND 4981; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 4982; GFX90A-NEXT: v_lshlrev_b32_e32 v3, 16, v0 4983; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 4984; GFX90A-NEXT: s_waitcnt vmcnt(0) 4985; GFX90A-NEXT: s_setpc_b64 s[30:31] 4986; 4987; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_3: 4988; GFX940: ; %bb.0: 4989; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4990; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4991; GFX940-NEXT: v_mov_b32_e32 v4, 0 4992; GFX940-NEXT: ;;#ASMSTART 4993; GFX940-NEXT: ; def v[0:1] 4994; GFX940-NEXT: ;;#ASMEND 4995; GFX940-NEXT: s_nop 0 4996; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 4997; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 4998; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 4999; GFX940-NEXT: s_waitcnt vmcnt(0) 5000; GFX940-NEXT: s_setpc_b64 s[30:31] 5001 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5002 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5003 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5004 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5005 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 3> 5006 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5007 ret void 5008} 5009 5010define void @v_shuffle_v4i16_v3i16__5_5_0_3(ptr addrspace(1) inreg %ptr) { 5011; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_3: 5012; GFX900: ; %bb.0: 5013; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5014; GFX900-NEXT: ;;#ASMSTART 5015; GFX900-NEXT: ; def v[0:1] 5016; GFX900-NEXT: ;;#ASMEND 5017; GFX900-NEXT: ;;#ASMSTART 5018; GFX900-NEXT: ; def v[1:2] 5019; GFX900-NEXT: ;;#ASMEND 5020; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5021; GFX900-NEXT: v_mov_b32_e32 v3, 0 5022; GFX900-NEXT: v_perm_b32 v1, v1, v0, s4 5023; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 5024; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 5025; GFX900-NEXT: s_waitcnt vmcnt(0) 5026; GFX900-NEXT: s_setpc_b64 s[30:31] 5027; 5028; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_3: 5029; GFX90A: ; %bb.0: 5030; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5031; GFX90A-NEXT: ;;#ASMSTART 5032; GFX90A-NEXT: ; def v[0:1] 5033; GFX90A-NEXT: ;;#ASMEND 5034; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5035; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5036; GFX90A-NEXT: ;;#ASMSTART 5037; GFX90A-NEXT: ; def v[2:3] 5038; GFX90A-NEXT: ;;#ASMEND 5039; GFX90A-NEXT: v_perm_b32 v1, v2, v0, s4 5040; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 5041; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5042; GFX90A-NEXT: s_waitcnt vmcnt(0) 5043; GFX90A-NEXT: s_setpc_b64 s[30:31] 5044; 5045; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_3: 5046; GFX940: ; %bb.0: 5047; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5048; GFX940-NEXT: ;;#ASMSTART 5049; GFX940-NEXT: ; def v[0:1] 5050; GFX940-NEXT: ;;#ASMEND 5051; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5052; GFX940-NEXT: v_mov_b32_e32 v4, 0 5053; GFX940-NEXT: ;;#ASMSTART 5054; GFX940-NEXT: ; def v[2:3] 5055; GFX940-NEXT: ;;#ASMEND 5056; GFX940-NEXT: s_nop 0 5057; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 5058; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 5059; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5060; GFX940-NEXT: s_waitcnt vmcnt(0) 5061; GFX940-NEXT: s_setpc_b64 s[30:31] 5062 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5063 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5064 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5065 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5066 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 3> 5067 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5068 ret void 5069} 5070 5071define void @v_shuffle_v4i16_v3i16__5_5_1_3(ptr addrspace(1) inreg %ptr) { 5072; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_3: 5073; GFX900: ; %bb.0: 5074; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5075; GFX900-NEXT: ;;#ASMSTART 5076; GFX900-NEXT: ; def v[0:1] 5077; GFX900-NEXT: ;;#ASMEND 5078; GFX900-NEXT: ;;#ASMSTART 5079; GFX900-NEXT: ; def v[1:2] 5080; GFX900-NEXT: ;;#ASMEND 5081; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5082; GFX900-NEXT: v_mov_b32_e32 v3, 0 5083; GFX900-NEXT: v_alignbit_b32 v1, v1, v0, 16 5084; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 5085; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 5086; GFX900-NEXT: s_waitcnt vmcnt(0) 5087; GFX900-NEXT: s_setpc_b64 s[30:31] 5088; 5089; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_3: 5090; GFX90A: ; %bb.0: 5091; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5092; GFX90A-NEXT: ;;#ASMSTART 5093; GFX90A-NEXT: ; def v[0:1] 5094; GFX90A-NEXT: ;;#ASMEND 5095; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5096; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5097; GFX90A-NEXT: ;;#ASMSTART 5098; GFX90A-NEXT: ; def v[2:3] 5099; GFX90A-NEXT: ;;#ASMEND 5100; GFX90A-NEXT: v_alignbit_b32 v1, v2, v0, 16 5101; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 5102; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5103; GFX90A-NEXT: s_waitcnt vmcnt(0) 5104; GFX90A-NEXT: s_setpc_b64 s[30:31] 5105; 5106; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_3: 5107; GFX940: ; %bb.0: 5108; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5109; GFX940-NEXT: ;;#ASMSTART 5110; GFX940-NEXT: ; def v[0:1] 5111; GFX940-NEXT: ;;#ASMEND 5112; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5113; GFX940-NEXT: v_mov_b32_e32 v4, 0 5114; GFX940-NEXT: ;;#ASMSTART 5115; GFX940-NEXT: ; def v[2:3] 5116; GFX940-NEXT: ;;#ASMEND 5117; GFX940-NEXT: s_nop 0 5118; GFX940-NEXT: v_alignbit_b32 v1, v2, v0, 16 5119; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 5120; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5121; GFX940-NEXT: s_waitcnt vmcnt(0) 5122; GFX940-NEXT: s_setpc_b64 s[30:31] 5123 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5124 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5125 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5126 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5127 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 3> 5128 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5129 ret void 5130} 5131 5132define void @v_shuffle_v4i16_v3i16__5_5_2_3(ptr addrspace(1) inreg %ptr) { 5133; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_3: 5134; GFX900: ; %bb.0: 5135; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5136; GFX900-NEXT: ;;#ASMSTART 5137; GFX900-NEXT: ; def v[0:1] 5138; GFX900-NEXT: ;;#ASMEND 5139; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5140; GFX900-NEXT: v_mov_b32_e32 v4, 0 5141; GFX900-NEXT: ;;#ASMSTART 5142; GFX900-NEXT: ; def v[2:3] 5143; GFX900-NEXT: ;;#ASMEND 5144; GFX900-NEXT: v_perm_b32 v1, v2, v1, s4 5145; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 5146; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5147; GFX900-NEXT: s_waitcnt vmcnt(0) 5148; GFX900-NEXT: s_setpc_b64 s[30:31] 5149; 5150; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_3: 5151; GFX90A: ; %bb.0: 5152; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5153; GFX90A-NEXT: ;;#ASMSTART 5154; GFX90A-NEXT: ; def v[0:1] 5155; GFX90A-NEXT: ;;#ASMEND 5156; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5157; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5158; GFX90A-NEXT: ;;#ASMSTART 5159; GFX90A-NEXT: ; def v[2:3] 5160; GFX90A-NEXT: ;;#ASMEND 5161; GFX90A-NEXT: v_perm_b32 v1, v2, v1, s4 5162; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 5163; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5164; GFX90A-NEXT: s_waitcnt vmcnt(0) 5165; GFX90A-NEXT: s_setpc_b64 s[30:31] 5166; 5167; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_3: 5168; GFX940: ; %bb.0: 5169; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5170; GFX940-NEXT: ;;#ASMSTART 5171; GFX940-NEXT: ; def v[0:1] 5172; GFX940-NEXT: ;;#ASMEND 5173; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5174; GFX940-NEXT: v_mov_b32_e32 v4, 0 5175; GFX940-NEXT: ;;#ASMSTART 5176; GFX940-NEXT: ; def v[2:3] 5177; GFX940-NEXT: ;;#ASMEND 5178; GFX940-NEXT: s_nop 0 5179; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 5180; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 5181; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5182; GFX940-NEXT: s_waitcnt vmcnt(0) 5183; GFX940-NEXT: s_setpc_b64 s[30:31] 5184 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5185 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5186 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5187 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5188 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 3> 5189 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5190 ret void 5191} 5192 5193define void @v_shuffle_v4i16_v3i16__5_5_4_3(ptr addrspace(1) inreg %ptr) { 5194; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_3: 5195; GFX900: ; %bb.0: 5196; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5197; GFX900-NEXT: ;;#ASMSTART 5198; GFX900-NEXT: ; def v[0:1] 5199; GFX900-NEXT: ;;#ASMEND 5200; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5201; GFX900-NEXT: v_mov_b32_e32 v3, 0 5202; GFX900-NEXT: v_alignbit_b32 v2, v0, v0, 16 5203; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5204; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 5205; GFX900-NEXT: s_waitcnt vmcnt(0) 5206; GFX900-NEXT: s_setpc_b64 s[30:31] 5207; 5208; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_3: 5209; GFX90A: ; %bb.0: 5210; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5211; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5212; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5213; GFX90A-NEXT: ;;#ASMSTART 5214; GFX90A-NEXT: ; def v[0:1] 5215; GFX90A-NEXT: ;;#ASMEND 5216; GFX90A-NEXT: v_alignbit_b32 v3, v0, v0, 16 5217; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 5218; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 5219; GFX90A-NEXT: s_waitcnt vmcnt(0) 5220; GFX90A-NEXT: s_setpc_b64 s[30:31] 5221; 5222; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_3: 5223; GFX940: ; %bb.0: 5224; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5225; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5226; GFX940-NEXT: v_mov_b32_e32 v4, 0 5227; GFX940-NEXT: ;;#ASMSTART 5228; GFX940-NEXT: ; def v[0:1] 5229; GFX940-NEXT: ;;#ASMEND 5230; GFX940-NEXT: s_nop 0 5231; GFX940-NEXT: v_alignbit_b32 v3, v0, v0, 16 5232; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 5233; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 5234; GFX940-NEXT: s_waitcnt vmcnt(0) 5235; GFX940-NEXT: s_setpc_b64 s[30:31] 5236 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5237 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5238 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5239 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5240 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 3> 5241 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5242 ret void 5243} 5244 5245define void @v_shuffle_v4i16_v3i16__u_4_4_4(ptr addrspace(1) inreg %ptr) { 5246; GFX900-LABEL: v_shuffle_v4i16_v3i16__u_4_4_4: 5247; GFX900: ; %bb.0: 5248; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5249; GFX900-NEXT: ;;#ASMSTART 5250; GFX900-NEXT: ; def v[0:1] 5251; GFX900-NEXT: ;;#ASMEND 5252; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5253; GFX900-NEXT: v_mov_b32_e32 v2, 0 5254; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 5255; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5256; GFX900-NEXT: s_waitcnt vmcnt(0) 5257; GFX900-NEXT: s_setpc_b64 s[30:31] 5258; 5259; GFX90A-LABEL: v_shuffle_v4i16_v3i16__u_4_4_4: 5260; GFX90A: ; %bb.0: 5261; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5262; GFX90A-NEXT: ;;#ASMSTART 5263; GFX90A-NEXT: ; def v[0:1] 5264; GFX90A-NEXT: ;;#ASMEND 5265; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5266; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5267; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 5268; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5269; GFX90A-NEXT: s_waitcnt vmcnt(0) 5270; GFX90A-NEXT: s_setpc_b64 s[30:31] 5271; 5272; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_4_4_4: 5273; GFX940: ; %bb.0: 5274; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5275; GFX940-NEXT: ;;#ASMSTART 5276; GFX940-NEXT: ; def v[0:1] 5277; GFX940-NEXT: ;;#ASMEND 5278; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5279; GFX940-NEXT: v_mov_b32_e32 v2, 0 5280; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 5281; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 5282; GFX940-NEXT: s_waitcnt vmcnt(0) 5283; GFX940-NEXT: s_setpc_b64 s[30:31] 5284 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5285 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5286 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5287 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5288 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 poison, i32 4, i32 4, i32 4> 5289 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5290 ret void 5291} 5292 5293define void @v_shuffle_v4i16_v3i16__0_4_4_4(ptr addrspace(1) inreg %ptr) { 5294; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_4_4_4: 5295; GFX900: ; %bb.0: 5296; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5297; GFX900-NEXT: ;;#ASMSTART 5298; GFX900-NEXT: ; def v[0:1] 5299; GFX900-NEXT: ;;#ASMEND 5300; GFX900-NEXT: s_mov_b32 s4, 0xffff 5301; GFX900-NEXT: ;;#ASMSTART 5302; GFX900-NEXT: ; def v[1:2] 5303; GFX900-NEXT: ;;#ASMEND 5304; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 5305; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5306; GFX900-NEXT: v_mov_b32_e32 v3, 0 5307; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5308; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 5309; GFX900-NEXT: s_waitcnt vmcnt(0) 5310; GFX900-NEXT: s_setpc_b64 s[30:31] 5311; 5312; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_4_4_4: 5313; GFX90A: ; %bb.0: 5314; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5315; GFX90A-NEXT: ;;#ASMSTART 5316; GFX90A-NEXT: ; def v[0:1] 5317; GFX90A-NEXT: ;;#ASMEND 5318; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5319; GFX90A-NEXT: ;;#ASMSTART 5320; GFX90A-NEXT: ; def v[2:3] 5321; GFX90A-NEXT: ;;#ASMEND 5322; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2 5323; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5324; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5325; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 5326; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5327; GFX90A-NEXT: s_waitcnt vmcnt(0) 5328; GFX90A-NEXT: s_setpc_b64 s[30:31] 5329; 5330; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_4_4_4: 5331; GFX940: ; %bb.0: 5332; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5333; GFX940-NEXT: ;;#ASMSTART 5334; GFX940-NEXT: ; def v[0:1] 5335; GFX940-NEXT: ;;#ASMEND 5336; GFX940-NEXT: s_mov_b32 s2, 0xffff 5337; GFX940-NEXT: ;;#ASMSTART 5338; GFX940-NEXT: ; def v[2:3] 5339; GFX940-NEXT: ;;#ASMEND 5340; GFX940-NEXT: v_mov_b32_e32 v4, 0 5341; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 5342; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5343; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 5344; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5345; GFX940-NEXT: s_waitcnt vmcnt(0) 5346; GFX940-NEXT: s_setpc_b64 s[30:31] 5347 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5348 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5349 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5350 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5351 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 5352 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5353 ret void 5354} 5355 5356define void @v_shuffle_v4i16_v3i16__1_4_4_4(ptr addrspace(1) inreg %ptr) { 5357; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_4_4_4: 5358; GFX900: ; %bb.0: 5359; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5360; GFX900-NEXT: ;;#ASMSTART 5361; GFX900-NEXT: ; def v[0:1] 5362; GFX900-NEXT: ;;#ASMEND 5363; GFX900-NEXT: ;;#ASMSTART 5364; GFX900-NEXT: ; def v[1:2] 5365; GFX900-NEXT: ;;#ASMEND 5366; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5367; GFX900-NEXT: v_mov_b32_e32 v3, 0 5368; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 5369; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5370; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 5371; GFX900-NEXT: s_waitcnt vmcnt(0) 5372; GFX900-NEXT: s_setpc_b64 s[30:31] 5373; 5374; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_4_4_4: 5375; GFX90A: ; %bb.0: 5376; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5377; GFX90A-NEXT: ;;#ASMSTART 5378; GFX90A-NEXT: ; def v[0:1] 5379; GFX90A-NEXT: ;;#ASMEND 5380; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5381; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5382; GFX90A-NEXT: ;;#ASMSTART 5383; GFX90A-NEXT: ; def v[2:3] 5384; GFX90A-NEXT: ;;#ASMEND 5385; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 5386; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 5387; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5388; GFX90A-NEXT: s_waitcnt vmcnt(0) 5389; GFX90A-NEXT: s_setpc_b64 s[30:31] 5390; 5391; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_4_4_4: 5392; GFX940: ; %bb.0: 5393; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5394; GFX940-NEXT: ;;#ASMSTART 5395; GFX940-NEXT: ; def v[0:1] 5396; GFX940-NEXT: ;;#ASMEND 5397; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5398; GFX940-NEXT: v_mov_b32_e32 v4, 0 5399; GFX940-NEXT: ;;#ASMSTART 5400; GFX940-NEXT: ; def v[2:3] 5401; GFX940-NEXT: ;;#ASMEND 5402; GFX940-NEXT: s_nop 0 5403; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 5404; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 5405; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5406; GFX940-NEXT: s_waitcnt vmcnt(0) 5407; GFX940-NEXT: s_setpc_b64 s[30:31] 5408 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5409 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5410 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5411 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5412 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 1, i32 4, i32 4, i32 4> 5413 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5414 ret void 5415} 5416 5417define void @v_shuffle_v4i16_v3i16__2_4_4_4(ptr addrspace(1) inreg %ptr) { 5418; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_4_4_4: 5419; GFX900: ; %bb.0: 5420; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5421; GFX900-NEXT: ;;#ASMSTART 5422; GFX900-NEXT: ; def v[0:1] 5423; GFX900-NEXT: ;;#ASMEND 5424; GFX900-NEXT: s_mov_b32 s4, 0xffff 5425; GFX900-NEXT: ;;#ASMSTART 5426; GFX900-NEXT: ; def v[2:3] 5427; GFX900-NEXT: ;;#ASMEND 5428; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2 5429; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5430; GFX900-NEXT: v_mov_b32_e32 v4, 0 5431; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 5432; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5433; GFX900-NEXT: s_waitcnt vmcnt(0) 5434; GFX900-NEXT: s_setpc_b64 s[30:31] 5435; 5436; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_4_4_4: 5437; GFX90A: ; %bb.0: 5438; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5439; GFX90A-NEXT: ;;#ASMSTART 5440; GFX90A-NEXT: ; def v[0:1] 5441; GFX90A-NEXT: ;;#ASMEND 5442; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5443; GFX90A-NEXT: ;;#ASMSTART 5444; GFX90A-NEXT: ; def v[2:3] 5445; GFX90A-NEXT: ;;#ASMEND 5446; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2 5447; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5448; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5449; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 5450; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5451; GFX90A-NEXT: s_waitcnt vmcnt(0) 5452; GFX90A-NEXT: s_setpc_b64 s[30:31] 5453; 5454; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_4_4_4: 5455; GFX940: ; %bb.0: 5456; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5457; GFX940-NEXT: ;;#ASMSTART 5458; GFX940-NEXT: ; def v[0:1] 5459; GFX940-NEXT: ;;#ASMEND 5460; GFX940-NEXT: s_mov_b32 s2, 0xffff 5461; GFX940-NEXT: ;;#ASMSTART 5462; GFX940-NEXT: ; def v[2:3] 5463; GFX940-NEXT: ;;#ASMEND 5464; GFX940-NEXT: v_mov_b32_e32 v4, 0 5465; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 5466; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5467; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 5468; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5469; GFX940-NEXT: s_waitcnt vmcnt(0) 5470; GFX940-NEXT: s_setpc_b64 s[30:31] 5471 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5472 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5473 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5474 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5475 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 2, i32 4, i32 4, i32 4> 5476 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5477 ret void 5478} 5479 5480define void @v_shuffle_v4i16_v3i16__3_4_4_4(ptr addrspace(1) inreg %ptr) { 5481; GFX900-LABEL: v_shuffle_v4i16_v3i16__3_4_4_4: 5482; GFX900: ; %bb.0: 5483; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5484; GFX900-NEXT: ;;#ASMSTART 5485; GFX900-NEXT: ; def v[0:1] 5486; GFX900-NEXT: ;;#ASMEND 5487; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5488; GFX900-NEXT: v_mov_b32_e32 v2, 0 5489; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 5490; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5491; GFX900-NEXT: s_waitcnt vmcnt(0) 5492; GFX900-NEXT: s_setpc_b64 s[30:31] 5493; 5494; GFX90A-LABEL: v_shuffle_v4i16_v3i16__3_4_4_4: 5495; GFX90A: ; %bb.0: 5496; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5497; GFX90A-NEXT: ;;#ASMSTART 5498; GFX90A-NEXT: ; def v[0:1] 5499; GFX90A-NEXT: ;;#ASMEND 5500; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5501; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5502; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 5503; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5504; GFX90A-NEXT: s_waitcnt vmcnt(0) 5505; GFX90A-NEXT: s_setpc_b64 s[30:31] 5506; 5507; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_4_4_4: 5508; GFX940: ; %bb.0: 5509; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5510; GFX940-NEXT: ;;#ASMSTART 5511; GFX940-NEXT: ; def v[0:1] 5512; GFX940-NEXT: ;;#ASMEND 5513; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5514; GFX940-NEXT: v_mov_b32_e32 v2, 0 5515; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 5516; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 5517; GFX940-NEXT: s_waitcnt vmcnt(0) 5518; GFX940-NEXT: s_setpc_b64 s[30:31] 5519 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5520 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5521 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5522 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5523 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 3, i32 4, i32 4, i32 4> 5524 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5525 ret void 5526} 5527 5528define void @v_shuffle_v4i16_v3i16__4_4_4_4(ptr addrspace(1) inreg %ptr) { 5529; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_4_4_4: 5530; GFX900: ; %bb.0: 5531; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5532; GFX900-NEXT: ;;#ASMSTART 5533; GFX900-NEXT: ; def v[0:1] 5534; GFX900-NEXT: ;;#ASMEND 5535; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5536; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 5537; GFX900-NEXT: v_mov_b32_e32 v2, 0 5538; GFX900-NEXT: v_mov_b32_e32 v1, v0 5539; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5540; GFX900-NEXT: s_waitcnt vmcnt(0) 5541; GFX900-NEXT: s_setpc_b64 s[30:31] 5542; 5543; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_4_4_4: 5544; GFX90A: ; %bb.0: 5545; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5546; GFX90A-NEXT: ;;#ASMSTART 5547; GFX90A-NEXT: ; def v[0:1] 5548; GFX90A-NEXT: ;;#ASMEND 5549; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5550; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 5551; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5552; GFX90A-NEXT: v_mov_b32_e32 v1, v0 5553; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5554; GFX90A-NEXT: s_waitcnt vmcnt(0) 5555; GFX90A-NEXT: s_setpc_b64 s[30:31] 5556; 5557; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_4_4_4: 5558; GFX940: ; %bb.0: 5559; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5560; GFX940-NEXT: ;;#ASMSTART 5561; GFX940-NEXT: ; def v[0:1] 5562; GFX940-NEXT: ;;#ASMEND 5563; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5564; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 5565; GFX940-NEXT: v_mov_b32_e32 v2, 0 5566; GFX940-NEXT: v_mov_b32_e32 v1, v0 5567; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 5568; GFX940-NEXT: s_waitcnt vmcnt(0) 5569; GFX940-NEXT: s_setpc_b64 s[30:31] 5570 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5571 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5572 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5573 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5574 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 4, i32 4, i32 4> 5575 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5576 ret void 5577} 5578 5579define void @v_shuffle_v4i16_v3i16__5_4_4_4(ptr addrspace(1) inreg %ptr) { 5580; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_4_4: 5581; GFX900: ; %bb.0: 5582; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5583; GFX900-NEXT: ;;#ASMSTART 5584; GFX900-NEXT: ; def v[0:1] 5585; GFX900-NEXT: ;;#ASMEND 5586; GFX900-NEXT: s_mov_b32 s4, 0xffff 5587; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 5588; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5589; GFX900-NEXT: v_mov_b32_e32 v3, 0 5590; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 5591; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 5592; GFX900-NEXT: s_waitcnt vmcnt(0) 5593; GFX900-NEXT: s_setpc_b64 s[30:31] 5594; 5595; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_4_4: 5596; GFX90A: ; %bb.0: 5597; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5598; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5599; GFX90A-NEXT: ;;#ASMSTART 5600; GFX90A-NEXT: ; def v[0:1] 5601; GFX90A-NEXT: ;;#ASMEND 5602; GFX90A-NEXT: v_bfi_b32 v2, s4, v1, v0 5603; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5604; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5605; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 5606; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 5607; GFX90A-NEXT: s_waitcnt vmcnt(0) 5608; GFX90A-NEXT: s_setpc_b64 s[30:31] 5609; 5610; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_4_4: 5611; GFX940: ; %bb.0: 5612; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5613; GFX940-NEXT: s_mov_b32 s2, 0xffff 5614; GFX940-NEXT: ;;#ASMSTART 5615; GFX940-NEXT: ; def v[0:1] 5616; GFX940-NEXT: ;;#ASMEND 5617; GFX940-NEXT: v_mov_b32_e32 v4, 0 5618; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 5619; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5620; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 5621; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 5622; GFX940-NEXT: s_waitcnt vmcnt(0) 5623; GFX940-NEXT: s_setpc_b64 s[30:31] 5624 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5625 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5626 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5627 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5628 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 4, i32 4> 5629 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5630 ret void 5631} 5632 5633define void @v_shuffle_v4i16_v3i16__5_u_4_4(ptr addrspace(1) inreg %ptr) { 5634; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_4_4: 5635; GFX900: ; %bb.0: 5636; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5637; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5638; GFX900-NEXT: v_mov_b32_e32 v3, 0 5639; GFX900-NEXT: ;;#ASMSTART 5640; GFX900-NEXT: ; def v[0:1] 5641; GFX900-NEXT: ;;#ASMEND 5642; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 5643; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 5644; GFX900-NEXT: s_waitcnt vmcnt(0) 5645; GFX900-NEXT: s_setpc_b64 s[30:31] 5646; 5647; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_4_4: 5648; GFX90A: ; %bb.0: 5649; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5650; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5651; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5652; GFX90A-NEXT: ;;#ASMSTART 5653; GFX90A-NEXT: ; def v[0:1] 5654; GFX90A-NEXT: ;;#ASMEND 5655; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 5656; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5657; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 5658; GFX90A-NEXT: s_waitcnt vmcnt(0) 5659; GFX90A-NEXT: s_setpc_b64 s[30:31] 5660; 5661; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_4_4: 5662; GFX940: ; %bb.0: 5663; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5664; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5665; GFX940-NEXT: v_mov_b32_e32 v4, 0 5666; GFX940-NEXT: ;;#ASMSTART 5667; GFX940-NEXT: ; def v[0:1] 5668; GFX940-NEXT: ;;#ASMEND 5669; GFX940-NEXT: s_nop 0 5670; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 5671; GFX940-NEXT: v_mov_b32_e32 v2, v1 5672; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 5673; GFX940-NEXT: s_waitcnt vmcnt(0) 5674; GFX940-NEXT: s_setpc_b64 s[30:31] 5675 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5676 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5677 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5678 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5679 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 4, i32 4> 5680 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5681 ret void 5682} 5683 5684define void @v_shuffle_v4i16_v3i16__5_0_4_4(ptr addrspace(1) inreg %ptr) { 5685; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_4_4: 5686; GFX900: ; %bb.0: 5687; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5688; GFX900-NEXT: ;;#ASMSTART 5689; GFX900-NEXT: ; def v[0:1] 5690; GFX900-NEXT: ;;#ASMEND 5691; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5692; GFX900-NEXT: ;;#ASMSTART 5693; GFX900-NEXT: ; def v[1:2] 5694; GFX900-NEXT: ;;#ASMEND 5695; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 5696; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5697; GFX900-NEXT: v_mov_b32_e32 v3, 0 5698; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5699; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 5700; GFX900-NEXT: s_waitcnt vmcnt(0) 5701; GFX900-NEXT: s_setpc_b64 s[30:31] 5702; 5703; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_4_4: 5704; GFX90A: ; %bb.0: 5705; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5706; GFX90A-NEXT: ;;#ASMSTART 5707; GFX90A-NEXT: ; def v[0:1] 5708; GFX90A-NEXT: ;;#ASMEND 5709; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5710; GFX90A-NEXT: ;;#ASMSTART 5711; GFX90A-NEXT: ; def v[2:3] 5712; GFX90A-NEXT: ;;#ASMEND 5713; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 5714; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5715; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5716; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 5717; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5718; GFX90A-NEXT: s_waitcnt vmcnt(0) 5719; GFX90A-NEXT: s_setpc_b64 s[30:31] 5720; 5721; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_4_4: 5722; GFX940: ; %bb.0: 5723; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5724; GFX940-NEXT: ;;#ASMSTART 5725; GFX940-NEXT: ; def v[0:1] 5726; GFX940-NEXT: ;;#ASMEND 5727; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5728; GFX940-NEXT: ;;#ASMSTART 5729; GFX940-NEXT: ; def v[2:3] 5730; GFX940-NEXT: ;;#ASMEND 5731; GFX940-NEXT: v_mov_b32_e32 v4, 0 5732; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 5733; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5734; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 5735; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5736; GFX940-NEXT: s_waitcnt vmcnt(0) 5737; GFX940-NEXT: s_setpc_b64 s[30:31] 5738 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5739 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5740 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5741 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5742 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 4, i32 4> 5743 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5744 ret void 5745} 5746 5747define void @v_shuffle_v4i16_v3i16__5_1_4_4(ptr addrspace(1) inreg %ptr) { 5748; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_4_4: 5749; GFX900: ; %bb.0: 5750; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5751; GFX900-NEXT: ;;#ASMSTART 5752; GFX900-NEXT: ; def v[0:1] 5753; GFX900-NEXT: ;;#ASMEND 5754; GFX900-NEXT: s_mov_b32 s4, 0xffff 5755; GFX900-NEXT: ;;#ASMSTART 5756; GFX900-NEXT: ; def v[1:2] 5757; GFX900-NEXT: ;;#ASMEND 5758; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 5759; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5760; GFX900-NEXT: v_mov_b32_e32 v3, 0 5761; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5762; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 5763; GFX900-NEXT: s_waitcnt vmcnt(0) 5764; GFX900-NEXT: s_setpc_b64 s[30:31] 5765; 5766; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_4_4: 5767; GFX90A: ; %bb.0: 5768; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5769; GFX90A-NEXT: ;;#ASMSTART 5770; GFX90A-NEXT: ; def v[0:1] 5771; GFX90A-NEXT: ;;#ASMEND 5772; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5773; GFX90A-NEXT: ;;#ASMSTART 5774; GFX90A-NEXT: ; def v[2:3] 5775; GFX90A-NEXT: ;;#ASMEND 5776; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 5777; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5778; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5779; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 5780; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5781; GFX90A-NEXT: s_waitcnt vmcnt(0) 5782; GFX90A-NEXT: s_setpc_b64 s[30:31] 5783; 5784; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_4_4: 5785; GFX940: ; %bb.0: 5786; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5787; GFX940-NEXT: ;;#ASMSTART 5788; GFX940-NEXT: ; def v[0:1] 5789; GFX940-NEXT: ;;#ASMEND 5790; GFX940-NEXT: s_mov_b32 s2, 0xffff 5791; GFX940-NEXT: ;;#ASMSTART 5792; GFX940-NEXT: ; def v[2:3] 5793; GFX940-NEXT: ;;#ASMEND 5794; GFX940-NEXT: v_mov_b32_e32 v4, 0 5795; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 5796; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5797; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 5798; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5799; GFX940-NEXT: s_waitcnt vmcnt(0) 5800; GFX940-NEXT: s_setpc_b64 s[30:31] 5801 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5802 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5803 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5804 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5805 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 4, i32 4> 5806 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5807 ret void 5808} 5809 5810define void @v_shuffle_v4i16_v3i16__5_2_4_4(ptr addrspace(1) inreg %ptr) { 5811; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_4_4: 5812; GFX900: ; %bb.0: 5813; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5814; GFX900-NEXT: ;;#ASMSTART 5815; GFX900-NEXT: ; def v[0:1] 5816; GFX900-NEXT: ;;#ASMEND 5817; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5818; GFX900-NEXT: ;;#ASMSTART 5819; GFX900-NEXT: ; def v[2:3] 5820; GFX900-NEXT: ;;#ASMEND 5821; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 5822; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5823; GFX900-NEXT: v_mov_b32_e32 v4, 0 5824; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 5825; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5826; GFX900-NEXT: s_waitcnt vmcnt(0) 5827; GFX900-NEXT: s_setpc_b64 s[30:31] 5828; 5829; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_4_4: 5830; GFX90A: ; %bb.0: 5831; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5832; GFX90A-NEXT: ;;#ASMSTART 5833; GFX90A-NEXT: ; def v[0:1] 5834; GFX90A-NEXT: ;;#ASMEND 5835; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5836; GFX90A-NEXT: ;;#ASMSTART 5837; GFX90A-NEXT: ; def v[2:3] 5838; GFX90A-NEXT: ;;#ASMEND 5839; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 5840; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5841; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5842; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 5843; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5844; GFX90A-NEXT: s_waitcnt vmcnt(0) 5845; GFX90A-NEXT: s_setpc_b64 s[30:31] 5846; 5847; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_4_4: 5848; GFX940: ; %bb.0: 5849; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5850; GFX940-NEXT: ;;#ASMSTART 5851; GFX940-NEXT: ; def v[0:1] 5852; GFX940-NEXT: ;;#ASMEND 5853; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5854; GFX940-NEXT: ;;#ASMSTART 5855; GFX940-NEXT: ; def v[2:3] 5856; GFX940-NEXT: ;;#ASMEND 5857; GFX940-NEXT: v_mov_b32_e32 v4, 0 5858; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 5859; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5860; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 5861; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5862; GFX940-NEXT: s_waitcnt vmcnt(0) 5863; GFX940-NEXT: s_setpc_b64 s[30:31] 5864 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5865 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5866 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5867 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5868 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 4, i32 4> 5869 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5870 ret void 5871} 5872 5873define void @v_shuffle_v4i16_v3i16__5_3_4_4(ptr addrspace(1) inreg %ptr) { 5874; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_4_4: 5875; GFX900: ; %bb.0: 5876; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5877; GFX900-NEXT: ;;#ASMSTART 5878; GFX900-NEXT: ; def v[0:1] 5879; GFX900-NEXT: ;;#ASMEND 5880; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5881; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 5882; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5883; GFX900-NEXT: v_mov_b32_e32 v3, 0 5884; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 5885; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 5886; GFX900-NEXT: s_waitcnt vmcnt(0) 5887; GFX900-NEXT: s_setpc_b64 s[30:31] 5888; 5889; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_4_4: 5890; GFX90A: ; %bb.0: 5891; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5892; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5893; GFX90A-NEXT: ;;#ASMSTART 5894; GFX90A-NEXT: ; def v[0:1] 5895; GFX90A-NEXT: ;;#ASMEND 5896; GFX90A-NEXT: v_perm_b32 v2, v0, v1, s4 5897; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5898; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5899; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 5900; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 5901; GFX90A-NEXT: s_waitcnt vmcnt(0) 5902; GFX90A-NEXT: s_setpc_b64 s[30:31] 5903; 5904; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_4_4: 5905; GFX940: ; %bb.0: 5906; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5907; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5908; GFX940-NEXT: ;;#ASMSTART 5909; GFX940-NEXT: ; def v[0:1] 5910; GFX940-NEXT: ;;#ASMEND 5911; GFX940-NEXT: v_mov_b32_e32 v4, 0 5912; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 5913; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5914; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 5915; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 5916; GFX940-NEXT: s_waitcnt vmcnt(0) 5917; GFX940-NEXT: s_setpc_b64 s[30:31] 5918 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5919 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5920 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5921 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5922 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 4, i32 4> 5923 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5924 ret void 5925} 5926 5927define void @v_shuffle_v4i16_v3i16__5_5_4_4(ptr addrspace(1) inreg %ptr) { 5928; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_4: 5929; GFX900: ; %bb.0: 5930; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5931; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5932; GFX900-NEXT: ;;#ASMSTART 5933; GFX900-NEXT: ; def v[0:1] 5934; GFX900-NEXT: ;;#ASMEND 5935; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 5936; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5937; GFX900-NEXT: v_mov_b32_e32 v3, 0 5938; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5939; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 5940; GFX900-NEXT: s_waitcnt vmcnt(0) 5941; GFX900-NEXT: s_setpc_b64 s[30:31] 5942; 5943; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_4: 5944; GFX90A: ; %bb.0: 5945; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5946; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5947; GFX90A-NEXT: ;;#ASMSTART 5948; GFX90A-NEXT: ; def v[0:1] 5949; GFX90A-NEXT: ;;#ASMEND 5950; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 5951; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5952; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5953; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 5954; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 5955; GFX90A-NEXT: s_waitcnt vmcnt(0) 5956; GFX90A-NEXT: s_setpc_b64 s[30:31] 5957; 5958; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_4: 5959; GFX940: ; %bb.0: 5960; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5961; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5962; GFX940-NEXT: ;;#ASMSTART 5963; GFX940-NEXT: ; def v[0:1] 5964; GFX940-NEXT: ;;#ASMEND 5965; GFX940-NEXT: v_mov_b32_e32 v4, 0 5966; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 5967; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5968; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 5969; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 5970; GFX940-NEXT: s_waitcnt vmcnt(0) 5971; GFX940-NEXT: s_setpc_b64 s[30:31] 5972 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5973 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5974 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5975 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5976 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 4> 5977 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5978 ret void 5979} 5980 5981define void @v_shuffle_v4i16_v3i16__5_5_u_4(ptr addrspace(1) inreg %ptr) { 5982; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_4: 5983; GFX900: ; %bb.0: 5984; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5985; GFX900-NEXT: ;;#ASMSTART 5986; GFX900-NEXT: ; def v[0:1] 5987; GFX900-NEXT: ;;#ASMEND 5988; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5989; GFX900-NEXT: v_mov_b32_e32 v3, 0 5990; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5991; GFX900-NEXT: v_mov_b32_e32 v2, v0 5992; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 5993; GFX900-NEXT: s_waitcnt vmcnt(0) 5994; GFX900-NEXT: s_setpc_b64 s[30:31] 5995; 5996; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_4: 5997; GFX90A: ; %bb.0: 5998; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5999; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6000; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6001; GFX90A-NEXT: ;;#ASMSTART 6002; GFX90A-NEXT: ; def v[0:1] 6003; GFX90A-NEXT: ;;#ASMEND 6004; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 6005; GFX90A-NEXT: v_mov_b32_e32 v3, v0 6006; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 6007; GFX90A-NEXT: s_waitcnt vmcnt(0) 6008; GFX90A-NEXT: s_setpc_b64 s[30:31] 6009; 6010; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_4: 6011; GFX940: ; %bb.0: 6012; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6013; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6014; GFX940-NEXT: v_mov_b32_e32 v4, 0 6015; GFX940-NEXT: ;;#ASMSTART 6016; GFX940-NEXT: ; def v[0:1] 6017; GFX940-NEXT: ;;#ASMEND 6018; GFX940-NEXT: s_nop 0 6019; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 6020; GFX940-NEXT: v_mov_b32_e32 v3, v0 6021; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 6022; GFX940-NEXT: s_waitcnt vmcnt(0) 6023; GFX940-NEXT: s_setpc_b64 s[30:31] 6024 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6025 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6026 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6027 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6028 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 4> 6029 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6030 ret void 6031} 6032 6033define void @v_shuffle_v4i16_v3i16__5_5_0_4(ptr addrspace(1) inreg %ptr) { 6034; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_4: 6035; GFX900: ; %bb.0: 6036; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6037; GFX900-NEXT: ;;#ASMSTART 6038; GFX900-NEXT: ; def v[0:1] 6039; GFX900-NEXT: ;;#ASMEND 6040; GFX900-NEXT: ;;#ASMSTART 6041; GFX900-NEXT: ; def v[1:2] 6042; GFX900-NEXT: ;;#ASMEND 6043; GFX900-NEXT: s_mov_b32 s4, 0xffff 6044; GFX900-NEXT: v_bfi_b32 v1, s4, v0, v1 6045; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6046; GFX900-NEXT: v_mov_b32_e32 v3, 0 6047; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 6048; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 6049; GFX900-NEXT: s_waitcnt vmcnt(0) 6050; GFX900-NEXT: s_setpc_b64 s[30:31] 6051; 6052; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_4: 6053; GFX90A: ; %bb.0: 6054; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6055; GFX90A-NEXT: ;;#ASMSTART 6056; GFX90A-NEXT: ; def v[0:1] 6057; GFX90A-NEXT: ;;#ASMEND 6058; GFX90A-NEXT: s_mov_b32 s4, 0xffff 6059; GFX90A-NEXT: ;;#ASMSTART 6060; GFX90A-NEXT: ; def v[2:3] 6061; GFX90A-NEXT: ;;#ASMEND 6062; GFX90A-NEXT: v_bfi_b32 v1, s4, v0, v2 6063; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6064; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6065; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6066; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6067; GFX90A-NEXT: s_waitcnt vmcnt(0) 6068; GFX90A-NEXT: s_setpc_b64 s[30:31] 6069; 6070; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_4: 6071; GFX940: ; %bb.0: 6072; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6073; GFX940-NEXT: ;;#ASMSTART 6074; GFX940-NEXT: ; def v[0:1] 6075; GFX940-NEXT: ;;#ASMEND 6076; GFX940-NEXT: s_mov_b32 s2, 0xffff 6077; GFX940-NEXT: ;;#ASMSTART 6078; GFX940-NEXT: ; def v[2:3] 6079; GFX940-NEXT: ;;#ASMEND 6080; GFX940-NEXT: v_mov_b32_e32 v4, 0 6081; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v2 6082; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6083; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6084; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6085; GFX940-NEXT: s_waitcnt vmcnt(0) 6086; GFX940-NEXT: s_setpc_b64 s[30:31] 6087 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6088 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6089 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6090 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6091 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 4> 6092 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6093 ret void 6094} 6095 6096define void @v_shuffle_v4i16_v3i16__5_5_1_4(ptr addrspace(1) inreg %ptr) { 6097; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_4: 6098; GFX900: ; %bb.0: 6099; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6100; GFX900-NEXT: ;;#ASMSTART 6101; GFX900-NEXT: ; def v[0:1] 6102; GFX900-NEXT: ;;#ASMEND 6103; GFX900-NEXT: ;;#ASMSTART 6104; GFX900-NEXT: ; def v[1:2] 6105; GFX900-NEXT: ;;#ASMEND 6106; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6107; GFX900-NEXT: v_perm_b32 v1, v1, v0, s4 6108; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6109; GFX900-NEXT: v_mov_b32_e32 v3, 0 6110; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 6111; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 6112; GFX900-NEXT: s_waitcnt vmcnt(0) 6113; GFX900-NEXT: s_setpc_b64 s[30:31] 6114; 6115; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_4: 6116; GFX90A: ; %bb.0: 6117; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6118; GFX90A-NEXT: ;;#ASMSTART 6119; GFX90A-NEXT: ; def v[0:1] 6120; GFX90A-NEXT: ;;#ASMEND 6121; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6122; GFX90A-NEXT: ;;#ASMSTART 6123; GFX90A-NEXT: ; def v[2:3] 6124; GFX90A-NEXT: ;;#ASMEND 6125; GFX90A-NEXT: v_perm_b32 v1, v2, v0, s4 6126; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6127; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6128; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6129; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6130; GFX90A-NEXT: s_waitcnt vmcnt(0) 6131; GFX90A-NEXT: s_setpc_b64 s[30:31] 6132; 6133; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_4: 6134; GFX940: ; %bb.0: 6135; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6136; GFX940-NEXT: ;;#ASMSTART 6137; GFX940-NEXT: ; def v[0:1] 6138; GFX940-NEXT: ;;#ASMEND 6139; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6140; GFX940-NEXT: ;;#ASMSTART 6141; GFX940-NEXT: ; def v[2:3] 6142; GFX940-NEXT: ;;#ASMEND 6143; GFX940-NEXT: v_mov_b32_e32 v4, 0 6144; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 6145; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6146; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6147; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6148; GFX940-NEXT: s_waitcnt vmcnt(0) 6149; GFX940-NEXT: s_setpc_b64 s[30:31] 6150 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6151 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6152 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6153 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6154 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 4> 6155 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6156 ret void 6157} 6158 6159define void @v_shuffle_v4i16_v3i16__5_5_2_4(ptr addrspace(1) inreg %ptr) { 6160; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_4: 6161; GFX900: ; %bb.0: 6162; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6163; GFX900-NEXT: ;;#ASMSTART 6164; GFX900-NEXT: ; def v[0:1] 6165; GFX900-NEXT: ;;#ASMEND 6166; GFX900-NEXT: s_mov_b32 s4, 0xffff 6167; GFX900-NEXT: ;;#ASMSTART 6168; GFX900-NEXT: ; def v[2:3] 6169; GFX900-NEXT: ;;#ASMEND 6170; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v2 6171; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6172; GFX900-NEXT: v_mov_b32_e32 v4, 0 6173; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 6174; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6175; GFX900-NEXT: s_waitcnt vmcnt(0) 6176; GFX900-NEXT: s_setpc_b64 s[30:31] 6177; 6178; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_4: 6179; GFX90A: ; %bb.0: 6180; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6181; GFX90A-NEXT: ;;#ASMSTART 6182; GFX90A-NEXT: ; def v[0:1] 6183; GFX90A-NEXT: ;;#ASMEND 6184; GFX90A-NEXT: s_mov_b32 s4, 0xffff 6185; GFX90A-NEXT: ;;#ASMSTART 6186; GFX90A-NEXT: ; def v[2:3] 6187; GFX90A-NEXT: ;;#ASMEND 6188; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v2 6189; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6190; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6191; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6192; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6193; GFX90A-NEXT: s_waitcnt vmcnt(0) 6194; GFX90A-NEXT: s_setpc_b64 s[30:31] 6195; 6196; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_4: 6197; GFX940: ; %bb.0: 6198; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6199; GFX940-NEXT: ;;#ASMSTART 6200; GFX940-NEXT: ; def v[0:1] 6201; GFX940-NEXT: ;;#ASMEND 6202; GFX940-NEXT: s_mov_b32 s2, 0xffff 6203; GFX940-NEXT: ;;#ASMSTART 6204; GFX940-NEXT: ; def v[2:3] 6205; GFX940-NEXT: ;;#ASMEND 6206; GFX940-NEXT: v_mov_b32_e32 v4, 0 6207; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 6208; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6209; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6210; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6211; GFX940-NEXT: s_waitcnt vmcnt(0) 6212; GFX940-NEXT: s_setpc_b64 s[30:31] 6213 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6214 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6215 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6216 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6217 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 4> 6218 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6219 ret void 6220} 6221 6222define void @v_shuffle_v4i16_v3i16__5_5_3_4(ptr addrspace(1) inreg %ptr) { 6223; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_4: 6224; GFX900: ; %bb.0: 6225; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6226; GFX900-NEXT: ;;#ASMSTART 6227; GFX900-NEXT: ; def v[0:1] 6228; GFX900-NEXT: ;;#ASMEND 6229; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6230; GFX900-NEXT: v_mov_b32_e32 v3, 0 6231; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6232; GFX900-NEXT: v_mov_b32_e32 v2, v0 6233; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 6234; GFX900-NEXT: s_waitcnt vmcnt(0) 6235; GFX900-NEXT: s_setpc_b64 s[30:31] 6236; 6237; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_4: 6238; GFX90A: ; %bb.0: 6239; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6240; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6241; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6242; GFX90A-NEXT: ;;#ASMSTART 6243; GFX90A-NEXT: ; def v[0:1] 6244; GFX90A-NEXT: ;;#ASMEND 6245; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 6246; GFX90A-NEXT: v_mov_b32_e32 v3, v0 6247; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 6248; GFX90A-NEXT: s_waitcnt vmcnt(0) 6249; GFX90A-NEXT: s_setpc_b64 s[30:31] 6250; 6251; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_4: 6252; GFX940: ; %bb.0: 6253; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6254; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6255; GFX940-NEXT: v_mov_b32_e32 v4, 0 6256; GFX940-NEXT: ;;#ASMSTART 6257; GFX940-NEXT: ; def v[0:1] 6258; GFX940-NEXT: ;;#ASMEND 6259; GFX940-NEXT: s_nop 0 6260; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 6261; GFX940-NEXT: v_mov_b32_e32 v3, v0 6262; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 6263; GFX940-NEXT: s_waitcnt vmcnt(0) 6264; GFX940-NEXT: s_setpc_b64 s[30:31] 6265 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6266 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6267 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6268 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6269 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 4> 6270 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6271 ret void 6272} 6273 6274define void @v_shuffle_v4i16_v3i16__u_5_5_5(ptr addrspace(1) inreg %ptr) { 6275; GFX900-LABEL: v_shuffle_v4i16_v3i16__u_5_5_5: 6276; GFX900: ; %bb.0: 6277; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6278; GFX900-NEXT: ;;#ASMSTART 6279; GFX900-NEXT: ; def v[0:1] 6280; GFX900-NEXT: ;;#ASMEND 6281; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6282; GFX900-NEXT: v_mov_b32_e32 v3, 0 6283; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 6284; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1 6285; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 6286; GFX900-NEXT: s_waitcnt vmcnt(0) 6287; GFX900-NEXT: s_setpc_b64 s[30:31] 6288; 6289; GFX90A-LABEL: v_shuffle_v4i16_v3i16__u_5_5_5: 6290; GFX90A: ; %bb.0: 6291; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6292; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6293; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6294; GFX90A-NEXT: ;;#ASMSTART 6295; GFX90A-NEXT: ; def v[0:1] 6296; GFX90A-NEXT: ;;#ASMEND 6297; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 6298; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1 6299; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 6300; GFX90A-NEXT: s_waitcnt vmcnt(0) 6301; GFX90A-NEXT: s_setpc_b64 s[30:31] 6302; 6303; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_5_5_5: 6304; GFX940: ; %bb.0: 6305; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6306; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6307; GFX940-NEXT: v_mov_b32_e32 v4, 0 6308; GFX940-NEXT: ;;#ASMSTART 6309; GFX940-NEXT: ; def v[0:1] 6310; GFX940-NEXT: ;;#ASMEND 6311; GFX940-NEXT: s_nop 0 6312; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 6313; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 6314; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 6315; GFX940-NEXT: s_waitcnt vmcnt(0) 6316; GFX940-NEXT: s_setpc_b64 s[30:31] 6317 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6318 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6319 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6320 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6321 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 poison, i32 5, i32 5, i32 5> 6322 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6323 ret void 6324} 6325 6326define void @v_shuffle_v4i16_v3i16__0_5_5_5(ptr addrspace(1) inreg %ptr) { 6327; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_5_5_5: 6328; GFX900: ; %bb.0: 6329; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6330; GFX900-NEXT: ;;#ASMSTART 6331; GFX900-NEXT: ; def v[0:1] 6332; GFX900-NEXT: ;;#ASMEND 6333; GFX900-NEXT: ;;#ASMSTART 6334; GFX900-NEXT: ; def v[1:2] 6335; GFX900-NEXT: ;;#ASMEND 6336; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6337; GFX900-NEXT: v_mov_b32_e32 v3, 0 6338; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 6339; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 6340; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 6341; GFX900-NEXT: s_waitcnt vmcnt(0) 6342; GFX900-NEXT: s_setpc_b64 s[30:31] 6343; 6344; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_5_5_5: 6345; GFX90A: ; %bb.0: 6346; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6347; GFX90A-NEXT: ;;#ASMSTART 6348; GFX90A-NEXT: ; def v[0:1] 6349; GFX90A-NEXT: ;;#ASMEND 6350; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6351; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6352; GFX90A-NEXT: ;;#ASMSTART 6353; GFX90A-NEXT: ; def v[2:3] 6354; GFX90A-NEXT: ;;#ASMEND 6355; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 6356; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 6357; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6358; GFX90A-NEXT: s_waitcnt vmcnt(0) 6359; GFX90A-NEXT: s_setpc_b64 s[30:31] 6360; 6361; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_5_5_5: 6362; GFX940: ; %bb.0: 6363; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6364; GFX940-NEXT: ;;#ASMSTART 6365; GFX940-NEXT: ; def v[0:1] 6366; GFX940-NEXT: ;;#ASMEND 6367; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6368; GFX940-NEXT: v_mov_b32_e32 v4, 0 6369; GFX940-NEXT: ;;#ASMSTART 6370; GFX940-NEXT: ; def v[2:3] 6371; GFX940-NEXT: ;;#ASMEND 6372; GFX940-NEXT: s_nop 0 6373; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 6374; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 6375; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6376; GFX940-NEXT: s_waitcnt vmcnt(0) 6377; GFX940-NEXT: s_setpc_b64 s[30:31] 6378 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6379 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6380 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6381 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6382 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 0, i32 5, i32 5, i32 5> 6383 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6384 ret void 6385} 6386 6387define void @v_shuffle_v4i16_v3i16__1_5_5_5(ptr addrspace(1) inreg %ptr) { 6388; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_5_5_5: 6389; GFX900: ; %bb.0: 6390; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6391; GFX900-NEXT: ;;#ASMSTART 6392; GFX900-NEXT: ; def v[0:1] 6393; GFX900-NEXT: ;;#ASMEND 6394; GFX900-NEXT: ;;#ASMSTART 6395; GFX900-NEXT: ; def v[1:2] 6396; GFX900-NEXT: ;;#ASMEND 6397; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6398; GFX900-NEXT: v_mov_b32_e32 v3, 0 6399; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 6400; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16 6401; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 6402; GFX900-NEXT: s_waitcnt vmcnt(0) 6403; GFX900-NEXT: s_setpc_b64 s[30:31] 6404; 6405; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_5_5_5: 6406; GFX90A: ; %bb.0: 6407; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6408; GFX90A-NEXT: ;;#ASMSTART 6409; GFX90A-NEXT: ; def v[0:1] 6410; GFX90A-NEXT: ;;#ASMEND 6411; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6412; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6413; GFX90A-NEXT: ;;#ASMSTART 6414; GFX90A-NEXT: ; def v[2:3] 6415; GFX90A-NEXT: ;;#ASMEND 6416; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 6417; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16 6418; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6419; GFX90A-NEXT: s_waitcnt vmcnt(0) 6420; GFX90A-NEXT: s_setpc_b64 s[30:31] 6421; 6422; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_5_5_5: 6423; GFX940: ; %bb.0: 6424; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6425; GFX940-NEXT: ;;#ASMSTART 6426; GFX940-NEXT: ; def v[0:1] 6427; GFX940-NEXT: ;;#ASMEND 6428; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6429; GFX940-NEXT: v_mov_b32_e32 v4, 0 6430; GFX940-NEXT: ;;#ASMSTART 6431; GFX940-NEXT: ; def v[2:3] 6432; GFX940-NEXT: ;;#ASMEND 6433; GFX940-NEXT: s_nop 0 6434; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 6435; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 6436; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6437; GFX940-NEXT: s_waitcnt vmcnt(0) 6438; GFX940-NEXT: s_setpc_b64 s[30:31] 6439 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6440 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6441 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6442 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6443 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 1, i32 5, i32 5, i32 5> 6444 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6445 ret void 6446} 6447 6448define void @v_shuffle_v4i16_v3i16__2_5_5_5(ptr addrspace(1) inreg %ptr) { 6449; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_5_5_5: 6450; GFX900: ; %bb.0: 6451; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6452; GFX900-NEXT: ;;#ASMSTART 6453; GFX900-NEXT: ; def v[0:1] 6454; GFX900-NEXT: ;;#ASMEND 6455; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6456; GFX900-NEXT: v_mov_b32_e32 v4, 0 6457; GFX900-NEXT: ;;#ASMSTART 6458; GFX900-NEXT: ; def v[2:3] 6459; GFX900-NEXT: ;;#ASMEND 6460; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 6461; GFX900-NEXT: v_perm_b32 v1, v3, v3, s4 6462; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6463; GFX900-NEXT: s_waitcnt vmcnt(0) 6464; GFX900-NEXT: s_setpc_b64 s[30:31] 6465; 6466; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_5_5_5: 6467; GFX90A: ; %bb.0: 6468; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6469; GFX90A-NEXT: ;;#ASMSTART 6470; GFX90A-NEXT: ; def v[0:1] 6471; GFX90A-NEXT: ;;#ASMEND 6472; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6473; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6474; GFX90A-NEXT: ;;#ASMSTART 6475; GFX90A-NEXT: ; def v[2:3] 6476; GFX90A-NEXT: ;;#ASMEND 6477; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 6478; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 6479; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6480; GFX90A-NEXT: s_waitcnt vmcnt(0) 6481; GFX90A-NEXT: s_setpc_b64 s[30:31] 6482; 6483; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_5_5_5: 6484; GFX940: ; %bb.0: 6485; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6486; GFX940-NEXT: ;;#ASMSTART 6487; GFX940-NEXT: ; def v[0:1] 6488; GFX940-NEXT: ;;#ASMEND 6489; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6490; GFX940-NEXT: v_mov_b32_e32 v4, 0 6491; GFX940-NEXT: ;;#ASMSTART 6492; GFX940-NEXT: ; def v[2:3] 6493; GFX940-NEXT: ;;#ASMEND 6494; GFX940-NEXT: s_nop 0 6495; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 6496; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 6497; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6498; GFX940-NEXT: s_waitcnt vmcnt(0) 6499; GFX940-NEXT: s_setpc_b64 s[30:31] 6500 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6501 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6502 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6503 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6504 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 2, i32 5, i32 5, i32 5> 6505 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6506 ret void 6507} 6508 6509define void @v_shuffle_v4i16_v3i16__3_5_5_5(ptr addrspace(1) inreg %ptr) { 6510; GFX900-LABEL: v_shuffle_v4i16_v3i16__3_5_5_5: 6511; GFX900: ; %bb.0: 6512; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6513; GFX900-NEXT: ;;#ASMSTART 6514; GFX900-NEXT: ; def v[0:1] 6515; GFX900-NEXT: ;;#ASMEND 6516; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6517; GFX900-NEXT: v_mov_b32_e32 v2, 0 6518; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 6519; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6520; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 6521; GFX900-NEXT: s_waitcnt vmcnt(0) 6522; GFX900-NEXT: s_setpc_b64 s[30:31] 6523; 6524; GFX90A-LABEL: v_shuffle_v4i16_v3i16__3_5_5_5: 6525; GFX90A: ; %bb.0: 6526; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6527; GFX90A-NEXT: ;;#ASMSTART 6528; GFX90A-NEXT: ; def v[0:1] 6529; GFX90A-NEXT: ;;#ASMEND 6530; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6531; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6532; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 6533; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6534; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 6535; GFX90A-NEXT: s_waitcnt vmcnt(0) 6536; GFX90A-NEXT: s_setpc_b64 s[30:31] 6537; 6538; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_5_5_5: 6539; GFX940: ; %bb.0: 6540; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6541; GFX940-NEXT: ;;#ASMSTART 6542; GFX940-NEXT: ; def v[0:1] 6543; GFX940-NEXT: ;;#ASMEND 6544; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6545; GFX940-NEXT: v_mov_b32_e32 v2, 0 6546; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 6547; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6548; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 6549; GFX940-NEXT: s_waitcnt vmcnt(0) 6550; GFX940-NEXT: s_setpc_b64 s[30:31] 6551 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6552 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6553 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6554 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6555 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 3, i32 5, i32 5, i32 5> 6556 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6557 ret void 6558} 6559 6560define void @v_shuffle_v4i16_v3i16__4_5_5_5(ptr addrspace(1) inreg %ptr) { 6561; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_5_5_5: 6562; GFX900: ; %bb.0: 6563; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6564; GFX900-NEXT: ;;#ASMSTART 6565; GFX900-NEXT: ; def v[0:1] 6566; GFX900-NEXT: ;;#ASMEND 6567; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6568; GFX900-NEXT: v_mov_b32_e32 v3, 0 6569; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 6570; GFX900-NEXT: v_alignbit_b32 v1, v1, v0, 16 6571; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 6572; GFX900-NEXT: s_waitcnt vmcnt(0) 6573; GFX900-NEXT: s_setpc_b64 s[30:31] 6574; 6575; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_5_5_5: 6576; GFX90A: ; %bb.0: 6577; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6578; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6579; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6580; GFX90A-NEXT: ;;#ASMSTART 6581; GFX90A-NEXT: ; def v[0:1] 6582; GFX90A-NEXT: ;;#ASMEND 6583; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 6584; GFX90A-NEXT: v_alignbit_b32 v2, v1, v0, 16 6585; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 6586; GFX90A-NEXT: s_waitcnt vmcnt(0) 6587; GFX90A-NEXT: s_setpc_b64 s[30:31] 6588; 6589; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_5_5_5: 6590; GFX940: ; %bb.0: 6591; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6592; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6593; GFX940-NEXT: v_mov_b32_e32 v4, 0 6594; GFX940-NEXT: ;;#ASMSTART 6595; GFX940-NEXT: ; def v[0:1] 6596; GFX940-NEXT: ;;#ASMEND 6597; GFX940-NEXT: s_nop 0 6598; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 6599; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 6600; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 6601; GFX940-NEXT: s_waitcnt vmcnt(0) 6602; GFX940-NEXT: s_setpc_b64 s[30:31] 6603 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6604 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6605 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6606 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6607 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 5, i32 5, i32 5> 6608 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6609 ret void 6610} 6611 6612define void @v_shuffle_v4i16_v3i16__5_u_5_5(ptr addrspace(1) inreg %ptr) { 6613; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_5_5: 6614; GFX900: ; %bb.0: 6615; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6616; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6617; GFX900-NEXT: v_mov_b32_e32 v3, 0 6618; GFX900-NEXT: ;;#ASMSTART 6619; GFX900-NEXT: ; def v[0:1] 6620; GFX900-NEXT: ;;#ASMEND 6621; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 6622; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 6623; GFX900-NEXT: s_waitcnt vmcnt(0) 6624; GFX900-NEXT: s_setpc_b64 s[30:31] 6625; 6626; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_5_5: 6627; GFX90A: ; %bb.0: 6628; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6629; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6630; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6631; GFX90A-NEXT: ;;#ASMSTART 6632; GFX90A-NEXT: ; def v[0:1] 6633; GFX90A-NEXT: ;;#ASMEND 6634; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 6635; GFX90A-NEXT: v_mov_b32_e32 v2, v1 6636; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 6637; GFX90A-NEXT: s_waitcnt vmcnt(0) 6638; GFX90A-NEXT: s_setpc_b64 s[30:31] 6639; 6640; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_5_5: 6641; GFX940: ; %bb.0: 6642; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6643; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6644; GFX940-NEXT: v_mov_b32_e32 v4, 0 6645; GFX940-NEXT: ;;#ASMSTART 6646; GFX940-NEXT: ; def v[0:1] 6647; GFX940-NEXT: ;;#ASMEND 6648; GFX940-NEXT: s_nop 0 6649; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 6650; GFX940-NEXT: v_mov_b32_e32 v2, v1 6651; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 6652; GFX940-NEXT: s_waitcnt vmcnt(0) 6653; GFX940-NEXT: s_setpc_b64 s[30:31] 6654 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6655 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6656 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6657 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6658 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 5, i32 5> 6659 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6660 ret void 6661} 6662 6663define void @v_shuffle_v4i16_v3i16__5_0_5_5(ptr addrspace(1) inreg %ptr) { 6664; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_5_5: 6665; GFX900: ; %bb.0: 6666; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6667; GFX900-NEXT: ;;#ASMSTART 6668; GFX900-NEXT: ; def v[0:1] 6669; GFX900-NEXT: ;;#ASMEND 6670; GFX900-NEXT: ;;#ASMSTART 6671; GFX900-NEXT: ; def v[1:2] 6672; GFX900-NEXT: ;;#ASMEND 6673; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6674; GFX900-NEXT: v_mov_b32_e32 v3, 0 6675; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 6676; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 6677; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 6678; GFX900-NEXT: s_waitcnt vmcnt(0) 6679; GFX900-NEXT: s_setpc_b64 s[30:31] 6680; 6681; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_5_5: 6682; GFX90A: ; %bb.0: 6683; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6684; GFX90A-NEXT: ;;#ASMSTART 6685; GFX90A-NEXT: ; def v[0:1] 6686; GFX90A-NEXT: ;;#ASMEND 6687; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6688; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6689; GFX90A-NEXT: ;;#ASMSTART 6690; GFX90A-NEXT: ; def v[2:3] 6691; GFX90A-NEXT: ;;#ASMEND 6692; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 6693; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 6694; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6695; GFX90A-NEXT: s_waitcnt vmcnt(0) 6696; GFX90A-NEXT: s_setpc_b64 s[30:31] 6697; 6698; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_5_5: 6699; GFX940: ; %bb.0: 6700; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6701; GFX940-NEXT: ;;#ASMSTART 6702; GFX940-NEXT: ; def v[0:1] 6703; GFX940-NEXT: ;;#ASMEND 6704; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6705; GFX940-NEXT: v_mov_b32_e32 v4, 0 6706; GFX940-NEXT: ;;#ASMSTART 6707; GFX940-NEXT: ; def v[2:3] 6708; GFX940-NEXT: ;;#ASMEND 6709; GFX940-NEXT: s_nop 0 6710; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 6711; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 6712; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6713; GFX940-NEXT: s_waitcnt vmcnt(0) 6714; GFX940-NEXT: s_setpc_b64 s[30:31] 6715 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6716 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6717 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6718 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6719 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 5, i32 5> 6720 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6721 ret void 6722} 6723 6724define void @v_shuffle_v4i16_v3i16__5_1_5_5(ptr addrspace(1) inreg %ptr) { 6725; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_5_5: 6726; GFX900: ; %bb.0: 6727; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6728; GFX900-NEXT: ;;#ASMSTART 6729; GFX900-NEXT: ; def v[0:1] 6730; GFX900-NEXT: ;;#ASMEND 6731; GFX900-NEXT: s_mov_b32 s4, 0xffff 6732; GFX900-NEXT: ;;#ASMSTART 6733; GFX900-NEXT: ; def v[1:2] 6734; GFX900-NEXT: ;;#ASMEND 6735; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 6736; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6737; GFX900-NEXT: v_mov_b32_e32 v3, 0 6738; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 6739; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 6740; GFX900-NEXT: s_waitcnt vmcnt(0) 6741; GFX900-NEXT: s_setpc_b64 s[30:31] 6742; 6743; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_5_5: 6744; GFX90A: ; %bb.0: 6745; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6746; GFX90A-NEXT: ;;#ASMSTART 6747; GFX90A-NEXT: ; def v[0:1] 6748; GFX90A-NEXT: ;;#ASMEND 6749; GFX90A-NEXT: s_mov_b32 s4, 0xffff 6750; GFX90A-NEXT: ;;#ASMSTART 6751; GFX90A-NEXT: ; def v[2:3] 6752; GFX90A-NEXT: ;;#ASMEND 6753; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 6754; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6755; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6756; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 6757; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6758; GFX90A-NEXT: s_waitcnt vmcnt(0) 6759; GFX90A-NEXT: s_setpc_b64 s[30:31] 6760; 6761; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_5_5: 6762; GFX940: ; %bb.0: 6763; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6764; GFX940-NEXT: ;;#ASMSTART 6765; GFX940-NEXT: ; def v[0:1] 6766; GFX940-NEXT: ;;#ASMEND 6767; GFX940-NEXT: s_mov_b32 s2, 0xffff 6768; GFX940-NEXT: ;;#ASMSTART 6769; GFX940-NEXT: ; def v[2:3] 6770; GFX940-NEXT: ;;#ASMEND 6771; GFX940-NEXT: v_mov_b32_e32 v4, 0 6772; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 6773; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6774; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 6775; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6776; GFX940-NEXT: s_waitcnt vmcnt(0) 6777; GFX940-NEXT: s_setpc_b64 s[30:31] 6778 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6779 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6780 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6781 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6782 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 5, i32 5> 6783 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6784 ret void 6785} 6786 6787define void @v_shuffle_v4i16_v3i16__5_2_5_5(ptr addrspace(1) inreg %ptr) { 6788; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_5_5: 6789; GFX900: ; %bb.0: 6790; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6791; GFX900-NEXT: ;;#ASMSTART 6792; GFX900-NEXT: ; def v[0:1] 6793; GFX900-NEXT: ;;#ASMEND 6794; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6795; GFX900-NEXT: v_mov_b32_e32 v4, 0 6796; GFX900-NEXT: ;;#ASMSTART 6797; GFX900-NEXT: ; def v[2:3] 6798; GFX900-NEXT: ;;#ASMEND 6799; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 6800; GFX900-NEXT: v_perm_b32 v1, v3, v3, s4 6801; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6802; GFX900-NEXT: s_waitcnt vmcnt(0) 6803; GFX900-NEXT: s_setpc_b64 s[30:31] 6804; 6805; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_5_5: 6806; GFX90A: ; %bb.0: 6807; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6808; GFX90A-NEXT: ;;#ASMSTART 6809; GFX90A-NEXT: ; def v[0:1] 6810; GFX90A-NEXT: ;;#ASMEND 6811; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6812; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6813; GFX90A-NEXT: ;;#ASMSTART 6814; GFX90A-NEXT: ; def v[2:3] 6815; GFX90A-NEXT: ;;#ASMEND 6816; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 6817; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 6818; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6819; GFX90A-NEXT: s_waitcnt vmcnt(0) 6820; GFX90A-NEXT: s_setpc_b64 s[30:31] 6821; 6822; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_5_5: 6823; GFX940: ; %bb.0: 6824; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6825; GFX940-NEXT: ;;#ASMSTART 6826; GFX940-NEXT: ; def v[0:1] 6827; GFX940-NEXT: ;;#ASMEND 6828; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6829; GFX940-NEXT: v_mov_b32_e32 v4, 0 6830; GFX940-NEXT: ;;#ASMSTART 6831; GFX940-NEXT: ; def v[2:3] 6832; GFX940-NEXT: ;;#ASMEND 6833; GFX940-NEXT: s_nop 0 6834; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 6835; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 6836; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6837; GFX940-NEXT: s_waitcnt vmcnt(0) 6838; GFX940-NEXT: s_setpc_b64 s[30:31] 6839 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6840 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6841 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6842 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6843 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 5, i32 5> 6844 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6845 ret void 6846} 6847 6848define void @v_shuffle_v4i16_v3i16__5_3_5_5(ptr addrspace(1) inreg %ptr) { 6849; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_5_5: 6850; GFX900: ; %bb.0: 6851; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6852; GFX900-NEXT: ;;#ASMSTART 6853; GFX900-NEXT: ; def v[0:1] 6854; GFX900-NEXT: ;;#ASMEND 6855; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6856; GFX900-NEXT: v_mov_b32_e32 v2, 0 6857; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 6858; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6859; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 6860; GFX900-NEXT: s_waitcnt vmcnt(0) 6861; GFX900-NEXT: s_setpc_b64 s[30:31] 6862; 6863; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_5_5: 6864; GFX90A: ; %bb.0: 6865; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6866; GFX90A-NEXT: ;;#ASMSTART 6867; GFX90A-NEXT: ; def v[0:1] 6868; GFX90A-NEXT: ;;#ASMEND 6869; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6870; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6871; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 6872; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6873; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 6874; GFX90A-NEXT: s_waitcnt vmcnt(0) 6875; GFX90A-NEXT: s_setpc_b64 s[30:31] 6876; 6877; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_5_5: 6878; GFX940: ; %bb.0: 6879; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6880; GFX940-NEXT: ;;#ASMSTART 6881; GFX940-NEXT: ; def v[0:1] 6882; GFX940-NEXT: ;;#ASMEND 6883; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6884; GFX940-NEXT: v_mov_b32_e32 v2, 0 6885; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 6886; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6887; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 6888; GFX940-NEXT: s_waitcnt vmcnt(0) 6889; GFX940-NEXT: s_setpc_b64 s[30:31] 6890 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6891 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6892 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6893 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6894 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 5, i32 5> 6895 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6896 ret void 6897} 6898 6899define void @v_shuffle_v4i16_v3i16__5_4_5_5(ptr addrspace(1) inreg %ptr) { 6900; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_5_5: 6901; GFX900: ; %bb.0: 6902; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6903; GFX900-NEXT: ;;#ASMSTART 6904; GFX900-NEXT: ; def v[0:1] 6905; GFX900-NEXT: ;;#ASMEND 6906; GFX900-NEXT: s_mov_b32 s4, 0xffff 6907; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0 6908; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6909; GFX900-NEXT: v_mov_b32_e32 v2, 0 6910; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6911; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 6912; GFX900-NEXT: s_waitcnt vmcnt(0) 6913; GFX900-NEXT: s_setpc_b64 s[30:31] 6914; 6915; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_5_5: 6916; GFX90A: ; %bb.0: 6917; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6918; GFX90A-NEXT: ;;#ASMSTART 6919; GFX90A-NEXT: ; def v[0:1] 6920; GFX90A-NEXT: ;;#ASMEND 6921; GFX90A-NEXT: s_mov_b32 s4, 0xffff 6922; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0 6923; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6924; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6925; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6926; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 6927; GFX90A-NEXT: s_waitcnt vmcnt(0) 6928; GFX90A-NEXT: s_setpc_b64 s[30:31] 6929; 6930; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_5_5: 6931; GFX940: ; %bb.0: 6932; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6933; GFX940-NEXT: ;;#ASMSTART 6934; GFX940-NEXT: ; def v[0:1] 6935; GFX940-NEXT: ;;#ASMEND 6936; GFX940-NEXT: s_mov_b32 s2, 0xffff 6937; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 6938; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6939; GFX940-NEXT: v_mov_b32_e32 v2, 0 6940; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6941; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 6942; GFX940-NEXT: s_waitcnt vmcnt(0) 6943; GFX940-NEXT: s_setpc_b64 s[30:31] 6944 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6945 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6946 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6947 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6948 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 5, i32 5> 6949 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6950 ret void 6951} 6952 6953define void @v_shuffle_v4i16_v3i16__5_5_u_5(ptr addrspace(1) inreg %ptr) { 6954; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_5: 6955; GFX900: ; %bb.0: 6956; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6957; GFX900-NEXT: ;;#ASMSTART 6958; GFX900-NEXT: ; def v[0:1] 6959; GFX900-NEXT: ;;#ASMEND 6960; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6961; GFX900-NEXT: v_mov_b32_e32 v2, 0 6962; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 6963; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1 6964; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 6965; GFX900-NEXT: s_waitcnt vmcnt(0) 6966; GFX900-NEXT: s_setpc_b64 s[30:31] 6967; 6968; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_5: 6969; GFX90A: ; %bb.0: 6970; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6971; GFX90A-NEXT: ;;#ASMSTART 6972; GFX90A-NEXT: ; def v[0:1] 6973; GFX90A-NEXT: ;;#ASMEND 6974; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6975; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6976; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 6977; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v1 6978; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 6979; GFX90A-NEXT: s_waitcnt vmcnt(0) 6980; GFX90A-NEXT: s_setpc_b64 s[30:31] 6981; 6982; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_5: 6983; GFX940: ; %bb.0: 6984; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6985; GFX940-NEXT: ;;#ASMSTART 6986; GFX940-NEXT: ; def v[0:1] 6987; GFX940-NEXT: ;;#ASMEND 6988; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6989; GFX940-NEXT: v_mov_b32_e32 v2, 0 6990; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 6991; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 6992; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 6993; GFX940-NEXT: s_waitcnt vmcnt(0) 6994; GFX940-NEXT: s_setpc_b64 s[30:31] 6995 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6996 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6997 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6998 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6999 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 5> 7000 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7001 ret void 7002} 7003 7004define void @v_shuffle_v4i16_v3i16__5_5_0_5(ptr addrspace(1) inreg %ptr) { 7005; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_5: 7006; GFX900: ; %bb.0: 7007; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7008; GFX900-NEXT: ;;#ASMSTART 7009; GFX900-NEXT: ; def v[0:1] 7010; GFX900-NEXT: ;;#ASMEND 7011; GFX900-NEXT: ;;#ASMSTART 7012; GFX900-NEXT: ; def v[1:2] 7013; GFX900-NEXT: ;;#ASMEND 7014; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7015; GFX900-NEXT: v_mov_b32_e32 v3, 0 7016; GFX900-NEXT: v_perm_b32 v1, v2, v0, s4 7017; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 7018; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 7019; GFX900-NEXT: s_waitcnt vmcnt(0) 7020; GFX900-NEXT: s_setpc_b64 s[30:31] 7021; 7022; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_5: 7023; GFX90A: ; %bb.0: 7024; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7025; GFX90A-NEXT: ;;#ASMSTART 7026; GFX90A-NEXT: ; def v[0:1] 7027; GFX90A-NEXT: ;;#ASMEND 7028; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7029; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7030; GFX90A-NEXT: ;;#ASMSTART 7031; GFX90A-NEXT: ; def v[2:3] 7032; GFX90A-NEXT: ;;#ASMEND 7033; GFX90A-NEXT: v_perm_b32 v1, v3, v0, s4 7034; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 7035; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7036; GFX90A-NEXT: s_waitcnt vmcnt(0) 7037; GFX90A-NEXT: s_setpc_b64 s[30:31] 7038; 7039; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_5: 7040; GFX940: ; %bb.0: 7041; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7042; GFX940-NEXT: ;;#ASMSTART 7043; GFX940-NEXT: ; def v[0:1] 7044; GFX940-NEXT: ;;#ASMEND 7045; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7046; GFX940-NEXT: v_mov_b32_e32 v4, 0 7047; GFX940-NEXT: ;;#ASMSTART 7048; GFX940-NEXT: ; def v[2:3] 7049; GFX940-NEXT: ;;#ASMEND 7050; GFX940-NEXT: s_nop 0 7051; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 7052; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 7053; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 7054; GFX940-NEXT: s_waitcnt vmcnt(0) 7055; GFX940-NEXT: s_setpc_b64 s[30:31] 7056 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7057 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7058 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7059 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7060 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 5> 7061 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7062 ret void 7063} 7064 7065define void @v_shuffle_v4i16_v3i16__5_5_1_5(ptr addrspace(1) inreg %ptr) { 7066; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_5: 7067; GFX900: ; %bb.0: 7068; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7069; GFX900-NEXT: ;;#ASMSTART 7070; GFX900-NEXT: ; def v[0:1] 7071; GFX900-NEXT: ;;#ASMEND 7072; GFX900-NEXT: ;;#ASMSTART 7073; GFX900-NEXT: ; def v[1:2] 7074; GFX900-NEXT: ;;#ASMEND 7075; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7076; GFX900-NEXT: v_mov_b32_e32 v3, 0 7077; GFX900-NEXT: v_alignbit_b32 v1, v2, v0, 16 7078; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 7079; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 7080; GFX900-NEXT: s_waitcnt vmcnt(0) 7081; GFX900-NEXT: s_setpc_b64 s[30:31] 7082; 7083; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_5: 7084; GFX90A: ; %bb.0: 7085; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7086; GFX90A-NEXT: ;;#ASMSTART 7087; GFX90A-NEXT: ; def v[0:1] 7088; GFX90A-NEXT: ;;#ASMEND 7089; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7090; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7091; GFX90A-NEXT: ;;#ASMSTART 7092; GFX90A-NEXT: ; def v[2:3] 7093; GFX90A-NEXT: ;;#ASMEND 7094; GFX90A-NEXT: v_alignbit_b32 v1, v3, v0, 16 7095; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 7096; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7097; GFX90A-NEXT: s_waitcnt vmcnt(0) 7098; GFX90A-NEXT: s_setpc_b64 s[30:31] 7099; 7100; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_5: 7101; GFX940: ; %bb.0: 7102; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7103; GFX940-NEXT: ;;#ASMSTART 7104; GFX940-NEXT: ; def v[0:1] 7105; GFX940-NEXT: ;;#ASMEND 7106; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7107; GFX940-NEXT: v_mov_b32_e32 v4, 0 7108; GFX940-NEXT: ;;#ASMSTART 7109; GFX940-NEXT: ; def v[2:3] 7110; GFX940-NEXT: ;;#ASMEND 7111; GFX940-NEXT: s_nop 0 7112; GFX940-NEXT: v_alignbit_b32 v1, v3, v0, 16 7113; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 7114; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 7115; GFX940-NEXT: s_waitcnt vmcnt(0) 7116; GFX940-NEXT: s_setpc_b64 s[30:31] 7117 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7118 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7119 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7120 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7121 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 5> 7122 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7123 ret void 7124} 7125 7126define void @v_shuffle_v4i16_v3i16__5_5_2_5(ptr addrspace(1) inreg %ptr) { 7127; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_5: 7128; GFX900: ; %bb.0: 7129; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7130; GFX900-NEXT: ;;#ASMSTART 7131; GFX900-NEXT: ; def v[0:1] 7132; GFX900-NEXT: ;;#ASMEND 7133; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7134; GFX900-NEXT: v_mov_b32_e32 v4, 0 7135; GFX900-NEXT: ;;#ASMSTART 7136; GFX900-NEXT: ; def v[2:3] 7137; GFX900-NEXT: ;;#ASMEND 7138; GFX900-NEXT: v_perm_b32 v1, v3, v1, s4 7139; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 7140; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7141; GFX900-NEXT: s_waitcnt vmcnt(0) 7142; GFX900-NEXT: s_setpc_b64 s[30:31] 7143; 7144; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_5: 7145; GFX90A: ; %bb.0: 7146; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7147; GFX90A-NEXT: ;;#ASMSTART 7148; GFX90A-NEXT: ; def v[0:1] 7149; GFX90A-NEXT: ;;#ASMEND 7150; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7151; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7152; GFX90A-NEXT: ;;#ASMSTART 7153; GFX90A-NEXT: ; def v[2:3] 7154; GFX90A-NEXT: ;;#ASMEND 7155; GFX90A-NEXT: v_perm_b32 v1, v3, v1, s4 7156; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 7157; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7158; GFX90A-NEXT: s_waitcnt vmcnt(0) 7159; GFX90A-NEXT: s_setpc_b64 s[30:31] 7160; 7161; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_5: 7162; GFX940: ; %bb.0: 7163; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7164; GFX940-NEXT: ;;#ASMSTART 7165; GFX940-NEXT: ; def v[0:1] 7166; GFX940-NEXT: ;;#ASMEND 7167; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7168; GFX940-NEXT: v_mov_b32_e32 v4, 0 7169; GFX940-NEXT: ;;#ASMSTART 7170; GFX940-NEXT: ; def v[2:3] 7171; GFX940-NEXT: ;;#ASMEND 7172; GFX940-NEXT: s_nop 0 7173; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 7174; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 7175; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 7176; GFX940-NEXT: s_waitcnt vmcnt(0) 7177; GFX940-NEXT: s_setpc_b64 s[30:31] 7178 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7179 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7180 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7181 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7182 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 5> 7183 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7184 ret void 7185} 7186 7187define void @v_shuffle_v4i16_v3i16__5_5_3_5(ptr addrspace(1) inreg %ptr) { 7188; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_5: 7189; GFX900: ; %bb.0: 7190; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7191; GFX900-NEXT: ;;#ASMSTART 7192; GFX900-NEXT: ; def v[0:1] 7193; GFX900-NEXT: ;;#ASMEND 7194; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7195; GFX900-NEXT: v_mov_b32_e32 v3, 0 7196; GFX900-NEXT: v_perm_b32 v2, v1, v0, s4 7197; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 7198; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 7199; GFX900-NEXT: s_waitcnt vmcnt(0) 7200; GFX900-NEXT: s_setpc_b64 s[30:31] 7201; 7202; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_5: 7203; GFX90A: ; %bb.0: 7204; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7205; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7206; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7207; GFX90A-NEXT: ;;#ASMSTART 7208; GFX90A-NEXT: ; def v[0:1] 7209; GFX90A-NEXT: ;;#ASMEND 7210; GFX90A-NEXT: v_perm_b32 v3, v1, v0, s4 7211; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 7212; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 7213; GFX90A-NEXT: s_waitcnt vmcnt(0) 7214; GFX90A-NEXT: s_setpc_b64 s[30:31] 7215; 7216; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_5: 7217; GFX940: ; %bb.0: 7218; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7219; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7220; GFX940-NEXT: v_mov_b32_e32 v4, 0 7221; GFX940-NEXT: ;;#ASMSTART 7222; GFX940-NEXT: ; def v[0:1] 7223; GFX940-NEXT: ;;#ASMEND 7224; GFX940-NEXT: s_nop 0 7225; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 7226; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 7227; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 7228; GFX940-NEXT: s_waitcnt vmcnt(0) 7229; GFX940-NEXT: s_setpc_b64 s[30:31] 7230 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7231 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7232 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7233 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7234 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 5> 7235 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7236 ret void 7237} 7238 7239define void @v_shuffle_v4i16_v3i16__5_5_4_5(ptr addrspace(1) inreg %ptr) { 7240; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_5: 7241; GFX900: ; %bb.0: 7242; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7243; GFX900-NEXT: ;;#ASMSTART 7244; GFX900-NEXT: ; def v[0:1] 7245; GFX900-NEXT: ;;#ASMEND 7246; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7247; GFX900-NEXT: v_mov_b32_e32 v3, 0 7248; GFX900-NEXT: v_alignbit_b32 v2, v1, v0, 16 7249; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 7250; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 7251; GFX900-NEXT: s_waitcnt vmcnt(0) 7252; GFX900-NEXT: s_setpc_b64 s[30:31] 7253; 7254; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_5: 7255; GFX90A: ; %bb.0: 7256; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7257; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7258; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7259; GFX90A-NEXT: ;;#ASMSTART 7260; GFX90A-NEXT: ; def v[0:1] 7261; GFX90A-NEXT: ;;#ASMEND 7262; GFX90A-NEXT: v_alignbit_b32 v3, v1, v0, 16 7263; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 7264; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 7265; GFX90A-NEXT: s_waitcnt vmcnt(0) 7266; GFX90A-NEXT: s_setpc_b64 s[30:31] 7267; 7268; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_5: 7269; GFX940: ; %bb.0: 7270; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7271; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7272; GFX940-NEXT: v_mov_b32_e32 v4, 0 7273; GFX940-NEXT: ;;#ASMSTART 7274; GFX940-NEXT: ; def v[0:1] 7275; GFX940-NEXT: ;;#ASMEND 7276; GFX940-NEXT: s_nop 0 7277; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 7278; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 7279; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 7280; GFX940-NEXT: s_waitcnt vmcnt(0) 7281; GFX940-NEXT: s_setpc_b64 s[30:31] 7282 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7283 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7284 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7285 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7286 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 5> 7287 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7288 ret void 7289} 7290 7291define void @s_shuffle_v4i16_v3i16__u_u_u_u() { 7292; GFX9-LABEL: s_shuffle_v4i16_v3i16__u_u_u_u: 7293; GFX9: ; %bb.0: 7294; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7295; GFX9-NEXT: ;;#ASMSTART 7296; GFX9-NEXT: ; use s[8:9] 7297; GFX9-NEXT: ;;#ASMEND 7298; GFX9-NEXT: s_setpc_b64 s[30:31] 7299 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7300 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7301 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> poison 7302 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7303 ret void 7304} 7305 7306define void @s_shuffle_v4i16_v3i16__0_u_u_u() { 7307; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_u_u_u: 7308; GFX900: ; %bb.0: 7309; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7310; GFX900-NEXT: ;;#ASMSTART 7311; GFX900-NEXT: ; def s[8:9] 7312; GFX900-NEXT: ;;#ASMEND 7313; GFX900-NEXT: ;;#ASMSTART 7314; GFX900-NEXT: ; use s[8:9] 7315; GFX900-NEXT: ;;#ASMEND 7316; GFX900-NEXT: s_setpc_b64 s[30:31] 7317; 7318; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_u_u_u: 7319; GFX90A: ; %bb.0: 7320; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7321; GFX90A-NEXT: ;;#ASMSTART 7322; GFX90A-NEXT: ; def s[8:9] 7323; GFX90A-NEXT: ;;#ASMEND 7324; GFX90A-NEXT: ;;#ASMSTART 7325; GFX90A-NEXT: ; use s[8:9] 7326; GFX90A-NEXT: ;;#ASMEND 7327; GFX90A-NEXT: s_setpc_b64 s[30:31] 7328; 7329; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_u_u_u: 7330; GFX940: ; %bb.0: 7331; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7332; GFX940-NEXT: ;;#ASMSTART 7333; GFX940-NEXT: ; def s[8:9] 7334; GFX940-NEXT: ;;#ASMEND 7335; GFX940-NEXT: s_nop 0 7336; GFX940-NEXT: ;;#ASMSTART 7337; GFX940-NEXT: ; use s[8:9] 7338; GFX940-NEXT: ;;#ASMEND 7339; GFX940-NEXT: s_setpc_b64 s[30:31] 7340 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7341 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7342 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 7343 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7344 ret void 7345} 7346 7347define void @s_shuffle_v4i16_v3i16__1_u_u_u() { 7348; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_u_u_u: 7349; GFX900: ; %bb.0: 7350; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7351; GFX900-NEXT: ;;#ASMSTART 7352; GFX900-NEXT: ; def s[4:5] 7353; GFX900-NEXT: ;;#ASMEND 7354; GFX900-NEXT: s_lshr_b32 s8, s4, 16 7355; GFX900-NEXT: ;;#ASMSTART 7356; GFX900-NEXT: ; use s[8:9] 7357; GFX900-NEXT: ;;#ASMEND 7358; GFX900-NEXT: s_setpc_b64 s[30:31] 7359; 7360; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_u_u_u: 7361; GFX90A: ; %bb.0: 7362; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7363; GFX90A-NEXT: ;;#ASMSTART 7364; GFX90A-NEXT: ; def s[4:5] 7365; GFX90A-NEXT: ;;#ASMEND 7366; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 7367; GFX90A-NEXT: ;;#ASMSTART 7368; GFX90A-NEXT: ; use s[8:9] 7369; GFX90A-NEXT: ;;#ASMEND 7370; GFX90A-NEXT: s_setpc_b64 s[30:31] 7371; 7372; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_u_u_u: 7373; GFX940: ; %bb.0: 7374; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7375; GFX940-NEXT: ;;#ASMSTART 7376; GFX940-NEXT: ; def s[0:1] 7377; GFX940-NEXT: ;;#ASMEND 7378; GFX940-NEXT: s_lshr_b32 s8, s0, 16 7379; GFX940-NEXT: ;;#ASMSTART 7380; GFX940-NEXT: ; use s[8:9] 7381; GFX940-NEXT: ;;#ASMEND 7382; GFX940-NEXT: s_setpc_b64 s[30:31] 7383 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7384 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7385 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 7386 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7387 ret void 7388} 7389 7390define void @s_shuffle_v4i16_v3i16__2_u_u_u() { 7391; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_u_u_u: 7392; GFX900: ; %bb.0: 7393; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7394; GFX900-NEXT: ;;#ASMSTART 7395; GFX900-NEXT: ; def s[4:5] 7396; GFX900-NEXT: ;;#ASMEND 7397; GFX900-NEXT: s_mov_b32 s8, s5 7398; GFX900-NEXT: ;;#ASMSTART 7399; GFX900-NEXT: ; use s[8:9] 7400; GFX900-NEXT: ;;#ASMEND 7401; GFX900-NEXT: s_setpc_b64 s[30:31] 7402; 7403; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_u_u_u: 7404; GFX90A: ; %bb.0: 7405; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7406; GFX90A-NEXT: ;;#ASMSTART 7407; GFX90A-NEXT: ; def s[4:5] 7408; GFX90A-NEXT: ;;#ASMEND 7409; GFX90A-NEXT: s_mov_b32 s8, s5 7410; GFX90A-NEXT: ;;#ASMSTART 7411; GFX90A-NEXT: ; use s[8:9] 7412; GFX90A-NEXT: ;;#ASMEND 7413; GFX90A-NEXT: s_setpc_b64 s[30:31] 7414; 7415; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_u_u_u: 7416; GFX940: ; %bb.0: 7417; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7418; GFX940-NEXT: ;;#ASMSTART 7419; GFX940-NEXT: ; def s[0:1] 7420; GFX940-NEXT: ;;#ASMEND 7421; GFX940-NEXT: s_mov_b32 s8, s1 7422; GFX940-NEXT: ;;#ASMSTART 7423; GFX940-NEXT: ; use s[8:9] 7424; GFX940-NEXT: ;;#ASMEND 7425; GFX940-NEXT: s_setpc_b64 s[30:31] 7426 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7427 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7428 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 7429 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7430 ret void 7431} 7432 7433define void @s_shuffle_v4i16_v3i16__3_u_u_u() { 7434; GFX9-LABEL: s_shuffle_v4i16_v3i16__3_u_u_u: 7435; GFX9: ; %bb.0: 7436; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7437; GFX9-NEXT: ;;#ASMSTART 7438; GFX9-NEXT: ; use s[8:9] 7439; GFX9-NEXT: ;;#ASMEND 7440; GFX9-NEXT: s_setpc_b64 s[30:31] 7441 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7442 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7443 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison> 7444 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7445 ret void 7446} 7447 7448define void @s_shuffle_v4i16_v3i16__4_u_u_u() { 7449; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_u_u_u: 7450; GFX900: ; %bb.0: 7451; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7452; GFX900-NEXT: ;;#ASMSTART 7453; GFX900-NEXT: ; def s[4:5] 7454; GFX900-NEXT: ;;#ASMEND 7455; GFX900-NEXT: s_lshr_b32 s8, s4, 16 7456; GFX900-NEXT: ;;#ASMSTART 7457; GFX900-NEXT: ; use s[8:9] 7458; GFX900-NEXT: ;;#ASMEND 7459; GFX900-NEXT: s_setpc_b64 s[30:31] 7460; 7461; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_u_u_u: 7462; GFX90A: ; %bb.0: 7463; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7464; GFX90A-NEXT: ;;#ASMSTART 7465; GFX90A-NEXT: ; def s[4:5] 7466; GFX90A-NEXT: ;;#ASMEND 7467; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 7468; GFX90A-NEXT: ;;#ASMSTART 7469; GFX90A-NEXT: ; use s[8:9] 7470; GFX90A-NEXT: ;;#ASMEND 7471; GFX90A-NEXT: s_setpc_b64 s[30:31] 7472; 7473; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_u_u_u: 7474; GFX940: ; %bb.0: 7475; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7476; GFX940-NEXT: ;;#ASMSTART 7477; GFX940-NEXT: ; def s[0:1] 7478; GFX940-NEXT: ;;#ASMEND 7479; GFX940-NEXT: s_lshr_b32 s8, s0, 16 7480; GFX940-NEXT: ;;#ASMSTART 7481; GFX940-NEXT: ; use s[8:9] 7482; GFX940-NEXT: ;;#ASMEND 7483; GFX940-NEXT: s_setpc_b64 s[30:31] 7484 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7485 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7486 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7487 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7488 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison> 7489 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7490 ret void 7491} 7492 7493define void @s_shuffle_v4i16_v3i16__5_u_u_u() { 7494; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_u_u: 7495; GFX900: ; %bb.0: 7496; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7497; GFX900-NEXT: ;;#ASMSTART 7498; GFX900-NEXT: ; def s[4:5] 7499; GFX900-NEXT: ;;#ASMEND 7500; GFX900-NEXT: s_mov_b32 s8, s5 7501; GFX900-NEXT: ;;#ASMSTART 7502; GFX900-NEXT: ; use s[8:9] 7503; GFX900-NEXT: ;;#ASMEND 7504; GFX900-NEXT: s_setpc_b64 s[30:31] 7505; 7506; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_u_u: 7507; GFX90A: ; %bb.0: 7508; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7509; GFX90A-NEXT: ;;#ASMSTART 7510; GFX90A-NEXT: ; def s[4:5] 7511; GFX90A-NEXT: ;;#ASMEND 7512; GFX90A-NEXT: s_mov_b32 s8, s5 7513; GFX90A-NEXT: ;;#ASMSTART 7514; GFX90A-NEXT: ; use s[8:9] 7515; GFX90A-NEXT: ;;#ASMEND 7516; GFX90A-NEXT: s_setpc_b64 s[30:31] 7517; 7518; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_u_u: 7519; GFX940: ; %bb.0: 7520; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7521; GFX940-NEXT: ;;#ASMSTART 7522; GFX940-NEXT: ; def s[0:1] 7523; GFX940-NEXT: ;;#ASMEND 7524; GFX940-NEXT: s_mov_b32 s8, s1 7525; GFX940-NEXT: ;;#ASMSTART 7526; GFX940-NEXT: ; use s[8:9] 7527; GFX940-NEXT: ;;#ASMEND 7528; GFX940-NEXT: s_setpc_b64 s[30:31] 7529 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7530 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7531 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7532 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7533 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison> 7534 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7535 ret void 7536} 7537 7538define void @s_shuffle_v4i16_v3i16__5_0_u_u() { 7539; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_u_u: 7540; GFX900: ; %bb.0: 7541; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7542; GFX900-NEXT: ;;#ASMSTART 7543; GFX900-NEXT: ; def s[4:5] 7544; GFX900-NEXT: ;;#ASMEND 7545; GFX900-NEXT: ;;#ASMSTART 7546; GFX900-NEXT: ; def s[6:7] 7547; GFX900-NEXT: ;;#ASMEND 7548; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 7549; GFX900-NEXT: ;;#ASMSTART 7550; GFX900-NEXT: ; use s[8:9] 7551; GFX900-NEXT: ;;#ASMEND 7552; GFX900-NEXT: s_setpc_b64 s[30:31] 7553; 7554; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_u_u: 7555; GFX90A: ; %bb.0: 7556; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7557; GFX90A-NEXT: ;;#ASMSTART 7558; GFX90A-NEXT: ; def s[4:5] 7559; GFX90A-NEXT: ;;#ASMEND 7560; GFX90A-NEXT: ;;#ASMSTART 7561; GFX90A-NEXT: ; def s[6:7] 7562; GFX90A-NEXT: ;;#ASMEND 7563; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 7564; GFX90A-NEXT: ;;#ASMSTART 7565; GFX90A-NEXT: ; use s[8:9] 7566; GFX90A-NEXT: ;;#ASMEND 7567; GFX90A-NEXT: s_setpc_b64 s[30:31] 7568; 7569; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_u_u: 7570; GFX940: ; %bb.0: 7571; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7572; GFX940-NEXT: ;;#ASMSTART 7573; GFX940-NEXT: ; def s[0:1] 7574; GFX940-NEXT: ;;#ASMEND 7575; GFX940-NEXT: ;;#ASMSTART 7576; GFX940-NEXT: ; def s[2:3] 7577; GFX940-NEXT: ;;#ASMEND 7578; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 7579; GFX940-NEXT: ;;#ASMSTART 7580; GFX940-NEXT: ; use s[8:9] 7581; GFX940-NEXT: ;;#ASMEND 7582; GFX940-NEXT: s_setpc_b64 s[30:31] 7583 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7584 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7585 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7586 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7587 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison> 7588 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7589 ret void 7590} 7591 7592define void @s_shuffle_v4i16_v3i16__5_1_u_u() { 7593; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_u_u: 7594; GFX900: ; %bb.0: 7595; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7596; GFX900-NEXT: ;;#ASMSTART 7597; GFX900-NEXT: ; def s[4:5] 7598; GFX900-NEXT: ;;#ASMEND 7599; GFX900-NEXT: ;;#ASMSTART 7600; GFX900-NEXT: ; def s[6:7] 7601; GFX900-NEXT: ;;#ASMEND 7602; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 7603; GFX900-NEXT: ;;#ASMSTART 7604; GFX900-NEXT: ; use s[8:9] 7605; GFX900-NEXT: ;;#ASMEND 7606; GFX900-NEXT: s_setpc_b64 s[30:31] 7607; 7608; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_u_u: 7609; GFX90A: ; %bb.0: 7610; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7611; GFX90A-NEXT: ;;#ASMSTART 7612; GFX90A-NEXT: ; def s[4:5] 7613; GFX90A-NEXT: ;;#ASMEND 7614; GFX90A-NEXT: ;;#ASMSTART 7615; GFX90A-NEXT: ; def s[6:7] 7616; GFX90A-NEXT: ;;#ASMEND 7617; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 7618; GFX90A-NEXT: ;;#ASMSTART 7619; GFX90A-NEXT: ; use s[8:9] 7620; GFX90A-NEXT: ;;#ASMEND 7621; GFX90A-NEXT: s_setpc_b64 s[30:31] 7622; 7623; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_u_u: 7624; GFX940: ; %bb.0: 7625; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7626; GFX940-NEXT: ;;#ASMSTART 7627; GFX940-NEXT: ; def s[0:1] 7628; GFX940-NEXT: ;;#ASMEND 7629; GFX940-NEXT: ;;#ASMSTART 7630; GFX940-NEXT: ; def s[2:3] 7631; GFX940-NEXT: ;;#ASMEND 7632; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 7633; GFX940-NEXT: ;;#ASMSTART 7634; GFX940-NEXT: ; use s[8:9] 7635; GFX940-NEXT: ;;#ASMEND 7636; GFX940-NEXT: s_setpc_b64 s[30:31] 7637 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7638 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7639 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7640 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7641 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison> 7642 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7643 ret void 7644} 7645 7646define void @s_shuffle_v4i16_v3i16__5_2_u_u() { 7647; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_u_u: 7648; GFX900: ; %bb.0: 7649; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7650; GFX900-NEXT: ;;#ASMSTART 7651; GFX900-NEXT: ; def s[4:5] 7652; GFX900-NEXT: ;;#ASMEND 7653; GFX900-NEXT: ;;#ASMSTART 7654; GFX900-NEXT: ; def s[6:7] 7655; GFX900-NEXT: ;;#ASMEND 7656; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 7657; GFX900-NEXT: ;;#ASMSTART 7658; GFX900-NEXT: ; use s[8:9] 7659; GFX900-NEXT: ;;#ASMEND 7660; GFX900-NEXT: s_setpc_b64 s[30:31] 7661; 7662; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_u_u: 7663; GFX90A: ; %bb.0: 7664; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7665; GFX90A-NEXT: ;;#ASMSTART 7666; GFX90A-NEXT: ; def s[4:5] 7667; GFX90A-NEXT: ;;#ASMEND 7668; GFX90A-NEXT: ;;#ASMSTART 7669; GFX90A-NEXT: ; def s[6:7] 7670; GFX90A-NEXT: ;;#ASMEND 7671; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 7672; GFX90A-NEXT: ;;#ASMSTART 7673; GFX90A-NEXT: ; use s[8:9] 7674; GFX90A-NEXT: ;;#ASMEND 7675; GFX90A-NEXT: s_setpc_b64 s[30:31] 7676; 7677; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_u_u: 7678; GFX940: ; %bb.0: 7679; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7680; GFX940-NEXT: ;;#ASMSTART 7681; GFX940-NEXT: ; def s[0:1] 7682; GFX940-NEXT: ;;#ASMEND 7683; GFX940-NEXT: ;;#ASMSTART 7684; GFX940-NEXT: ; def s[2:3] 7685; GFX940-NEXT: ;;#ASMEND 7686; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 7687; GFX940-NEXT: ;;#ASMSTART 7688; GFX940-NEXT: ; use s[8:9] 7689; GFX940-NEXT: ;;#ASMEND 7690; GFX940-NEXT: s_setpc_b64 s[30:31] 7691 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7692 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7693 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7694 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7695 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison> 7696 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7697 ret void 7698} 7699 7700define void @s_shuffle_v4i16_v3i16__5_3_u_u() { 7701; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_u_u: 7702; GFX900: ; %bb.0: 7703; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7704; GFX900-NEXT: ;;#ASMSTART 7705; GFX900-NEXT: ; def s[4:5] 7706; GFX900-NEXT: ;;#ASMEND 7707; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7708; GFX900-NEXT: ;;#ASMSTART 7709; GFX900-NEXT: ; use s[8:9] 7710; GFX900-NEXT: ;;#ASMEND 7711; GFX900-NEXT: s_setpc_b64 s[30:31] 7712; 7713; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_u_u: 7714; GFX90A: ; %bb.0: 7715; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7716; GFX90A-NEXT: ;;#ASMSTART 7717; GFX90A-NEXT: ; def s[4:5] 7718; GFX90A-NEXT: ;;#ASMEND 7719; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7720; GFX90A-NEXT: ;;#ASMSTART 7721; GFX90A-NEXT: ; use s[8:9] 7722; GFX90A-NEXT: ;;#ASMEND 7723; GFX90A-NEXT: s_setpc_b64 s[30:31] 7724; 7725; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_u_u: 7726; GFX940: ; %bb.0: 7727; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7728; GFX940-NEXT: ;;#ASMSTART 7729; GFX940-NEXT: ; def s[0:1] 7730; GFX940-NEXT: ;;#ASMEND 7731; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 7732; GFX940-NEXT: ;;#ASMSTART 7733; GFX940-NEXT: ; use s[8:9] 7734; GFX940-NEXT: ;;#ASMEND 7735; GFX940-NEXT: s_setpc_b64 s[30:31] 7736 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7737 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7738 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7739 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7740 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison> 7741 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7742 ret void 7743} 7744 7745define void @s_shuffle_v4i16_v3i16__5_4_u_u() { 7746; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_u_u: 7747; GFX900: ; %bb.0: 7748; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7749; GFX900-NEXT: ;;#ASMSTART 7750; GFX900-NEXT: ; def s[4:5] 7751; GFX900-NEXT: ;;#ASMEND 7752; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 7753; GFX900-NEXT: ;;#ASMSTART 7754; GFX900-NEXT: ; use s[8:9] 7755; GFX900-NEXT: ;;#ASMEND 7756; GFX900-NEXT: s_setpc_b64 s[30:31] 7757; 7758; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_u_u: 7759; GFX90A: ; %bb.0: 7760; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7761; GFX90A-NEXT: ;;#ASMSTART 7762; GFX90A-NEXT: ; def s[4:5] 7763; GFX90A-NEXT: ;;#ASMEND 7764; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 7765; GFX90A-NEXT: ;;#ASMSTART 7766; GFX90A-NEXT: ; use s[8:9] 7767; GFX90A-NEXT: ;;#ASMEND 7768; GFX90A-NEXT: s_setpc_b64 s[30:31] 7769; 7770; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_u_u: 7771; GFX940: ; %bb.0: 7772; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7773; GFX940-NEXT: ;;#ASMSTART 7774; GFX940-NEXT: ; def s[0:1] 7775; GFX940-NEXT: ;;#ASMEND 7776; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 7777; GFX940-NEXT: ;;#ASMSTART 7778; GFX940-NEXT: ; use s[8:9] 7779; GFX940-NEXT: ;;#ASMEND 7780; GFX940-NEXT: s_setpc_b64 s[30:31] 7781 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7782 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7783 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7784 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7785 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison> 7786 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7787 ret void 7788} 7789 7790define void @s_shuffle_v4i16_v3i16__5_5_u_u() { 7791; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_u: 7792; GFX900: ; %bb.0: 7793; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7794; GFX900-NEXT: ;;#ASMSTART 7795; GFX900-NEXT: ; def s[4:5] 7796; GFX900-NEXT: ;;#ASMEND 7797; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 7798; GFX900-NEXT: ;;#ASMSTART 7799; GFX900-NEXT: ; use s[8:9] 7800; GFX900-NEXT: ;;#ASMEND 7801; GFX900-NEXT: s_setpc_b64 s[30:31] 7802; 7803; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_u: 7804; GFX90A: ; %bb.0: 7805; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7806; GFX90A-NEXT: ;;#ASMSTART 7807; GFX90A-NEXT: ; def s[4:5] 7808; GFX90A-NEXT: ;;#ASMEND 7809; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 7810; GFX90A-NEXT: ;;#ASMSTART 7811; GFX90A-NEXT: ; use s[8:9] 7812; GFX90A-NEXT: ;;#ASMEND 7813; GFX90A-NEXT: s_setpc_b64 s[30:31] 7814; 7815; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_u: 7816; GFX940: ; %bb.0: 7817; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7818; GFX940-NEXT: ;;#ASMSTART 7819; GFX940-NEXT: ; def s[0:1] 7820; GFX940-NEXT: ;;#ASMEND 7821; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 7822; GFX940-NEXT: ;;#ASMSTART 7823; GFX940-NEXT: ; use s[8:9] 7824; GFX940-NEXT: ;;#ASMEND 7825; GFX940-NEXT: s_setpc_b64 s[30:31] 7826 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7827 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7828 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7829 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7830 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison> 7831 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7832 ret void 7833} 7834 7835define void @s_shuffle_v4i16_v3i16__5_5_0_u() { 7836; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_u: 7837; GFX900: ; %bb.0: 7838; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7839; GFX900-NEXT: ;;#ASMSTART 7840; GFX900-NEXT: ; def s[4:5] 7841; GFX900-NEXT: ;;#ASMEND 7842; GFX900-NEXT: ;;#ASMSTART 7843; GFX900-NEXT: ; def s[6:7] 7844; GFX900-NEXT: ;;#ASMEND 7845; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 7846; GFX900-NEXT: s_mov_b32 s9, s4 7847; GFX900-NEXT: ;;#ASMSTART 7848; GFX900-NEXT: ; use s[8:9] 7849; GFX900-NEXT: ;;#ASMEND 7850; GFX900-NEXT: s_setpc_b64 s[30:31] 7851; 7852; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_u: 7853; GFX90A: ; %bb.0: 7854; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7855; GFX90A-NEXT: ;;#ASMSTART 7856; GFX90A-NEXT: ; def s[4:5] 7857; GFX90A-NEXT: ;;#ASMEND 7858; GFX90A-NEXT: ;;#ASMSTART 7859; GFX90A-NEXT: ; def s[6:7] 7860; GFX90A-NEXT: ;;#ASMEND 7861; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 7862; GFX90A-NEXT: s_mov_b32 s9, s4 7863; GFX90A-NEXT: ;;#ASMSTART 7864; GFX90A-NEXT: ; use s[8:9] 7865; GFX90A-NEXT: ;;#ASMEND 7866; GFX90A-NEXT: s_setpc_b64 s[30:31] 7867; 7868; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_u: 7869; GFX940: ; %bb.0: 7870; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7871; GFX940-NEXT: ;;#ASMSTART 7872; GFX940-NEXT: ; def s[0:1] 7873; GFX940-NEXT: ;;#ASMEND 7874; GFX940-NEXT: ;;#ASMSTART 7875; GFX940-NEXT: ; def s[2:3] 7876; GFX940-NEXT: ;;#ASMEND 7877; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 7878; GFX940-NEXT: s_mov_b32 s9, s0 7879; GFX940-NEXT: ;;#ASMSTART 7880; GFX940-NEXT: ; use s[8:9] 7881; GFX940-NEXT: ;;#ASMEND 7882; GFX940-NEXT: s_setpc_b64 s[30:31] 7883 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7884 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7885 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7886 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7887 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 poison> 7888 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7889 ret void 7890} 7891 7892define void @s_shuffle_v4i16_v3i16__5_5_1_u() { 7893; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_u: 7894; GFX900: ; %bb.0: 7895; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7896; GFX900-NEXT: ;;#ASMSTART 7897; GFX900-NEXT: ; def s[4:5] 7898; GFX900-NEXT: ;;#ASMEND 7899; GFX900-NEXT: ;;#ASMSTART 7900; GFX900-NEXT: ; def s[6:7] 7901; GFX900-NEXT: ;;#ASMEND 7902; GFX900-NEXT: s_lshr_b32 s9, s4, 16 7903; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 7904; GFX900-NEXT: ;;#ASMSTART 7905; GFX900-NEXT: ; use s[8:9] 7906; GFX900-NEXT: ;;#ASMEND 7907; GFX900-NEXT: s_setpc_b64 s[30:31] 7908; 7909; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_u: 7910; GFX90A: ; %bb.0: 7911; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7912; GFX90A-NEXT: ;;#ASMSTART 7913; GFX90A-NEXT: ; def s[4:5] 7914; GFX90A-NEXT: ;;#ASMEND 7915; GFX90A-NEXT: ;;#ASMSTART 7916; GFX90A-NEXT: ; def s[6:7] 7917; GFX90A-NEXT: ;;#ASMEND 7918; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 7919; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 7920; GFX90A-NEXT: ;;#ASMSTART 7921; GFX90A-NEXT: ; use s[8:9] 7922; GFX90A-NEXT: ;;#ASMEND 7923; GFX90A-NEXT: s_setpc_b64 s[30:31] 7924; 7925; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_u: 7926; GFX940: ; %bb.0: 7927; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7928; GFX940-NEXT: ;;#ASMSTART 7929; GFX940-NEXT: ; def s[0:1] 7930; GFX940-NEXT: ;;#ASMEND 7931; GFX940-NEXT: ;;#ASMSTART 7932; GFX940-NEXT: ; def s[2:3] 7933; GFX940-NEXT: ;;#ASMEND 7934; GFX940-NEXT: s_lshr_b32 s9, s0, 16 7935; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 7936; GFX940-NEXT: ;;#ASMSTART 7937; GFX940-NEXT: ; use s[8:9] 7938; GFX940-NEXT: ;;#ASMEND 7939; GFX940-NEXT: s_setpc_b64 s[30:31] 7940 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7941 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7942 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7943 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7944 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 poison> 7945 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 7946 ret void 7947} 7948 7949define void @s_shuffle_v4i16_v3i16__5_5_2_u() { 7950; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_u: 7951; GFX900: ; %bb.0: 7952; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7953; GFX900-NEXT: ;;#ASMSTART 7954; GFX900-NEXT: ; def s[8:9] 7955; GFX900-NEXT: ;;#ASMEND 7956; GFX900-NEXT: ;;#ASMSTART 7957; GFX900-NEXT: ; def s[4:5] 7958; GFX900-NEXT: ;;#ASMEND 7959; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 7960; GFX900-NEXT: ;;#ASMSTART 7961; GFX900-NEXT: ; use s[8:9] 7962; GFX900-NEXT: ;;#ASMEND 7963; GFX900-NEXT: s_setpc_b64 s[30:31] 7964; 7965; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_u: 7966; GFX90A: ; %bb.0: 7967; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7968; GFX90A-NEXT: ;;#ASMSTART 7969; GFX90A-NEXT: ; def s[8:9] 7970; GFX90A-NEXT: ;;#ASMEND 7971; GFX90A-NEXT: ;;#ASMSTART 7972; GFX90A-NEXT: ; def s[4:5] 7973; GFX90A-NEXT: ;;#ASMEND 7974; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 7975; GFX90A-NEXT: ;;#ASMSTART 7976; GFX90A-NEXT: ; use s[8:9] 7977; GFX90A-NEXT: ;;#ASMEND 7978; GFX90A-NEXT: s_setpc_b64 s[30:31] 7979; 7980; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_u: 7981; GFX940: ; %bb.0: 7982; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7983; GFX940-NEXT: ;;#ASMSTART 7984; GFX940-NEXT: ; def s[8:9] 7985; GFX940-NEXT: ;;#ASMEND 7986; GFX940-NEXT: ;;#ASMSTART 7987; GFX940-NEXT: ; def s[0:1] 7988; GFX940-NEXT: ;;#ASMEND 7989; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 7990; GFX940-NEXT: ;;#ASMSTART 7991; GFX940-NEXT: ; use s[8:9] 7992; GFX940-NEXT: ;;#ASMEND 7993; GFX940-NEXT: s_setpc_b64 s[30:31] 7994 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7995 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7996 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7997 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7998 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 poison> 7999 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8000 ret void 8001} 8002 8003define void @s_shuffle_v4i16_v3i16__5_5_3_u() { 8004; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_u: 8005; GFX900: ; %bb.0: 8006; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8007; GFX900-NEXT: ;;#ASMSTART 8008; GFX900-NEXT: ; def s[4:5] 8009; GFX900-NEXT: ;;#ASMEND 8010; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8011; GFX900-NEXT: s_mov_b32 s9, s4 8012; GFX900-NEXT: ;;#ASMSTART 8013; GFX900-NEXT: ; use s[8:9] 8014; GFX900-NEXT: ;;#ASMEND 8015; GFX900-NEXT: s_setpc_b64 s[30:31] 8016; 8017; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_u: 8018; GFX90A: ; %bb.0: 8019; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8020; GFX90A-NEXT: ;;#ASMSTART 8021; GFX90A-NEXT: ; def s[4:5] 8022; GFX90A-NEXT: ;;#ASMEND 8023; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8024; GFX90A-NEXT: s_mov_b32 s9, s4 8025; GFX90A-NEXT: ;;#ASMSTART 8026; GFX90A-NEXT: ; use s[8:9] 8027; GFX90A-NEXT: ;;#ASMEND 8028; GFX90A-NEXT: s_setpc_b64 s[30:31] 8029; 8030; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_u: 8031; GFX940: ; %bb.0: 8032; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8033; GFX940-NEXT: ;;#ASMSTART 8034; GFX940-NEXT: ; def s[0:1] 8035; GFX940-NEXT: ;;#ASMEND 8036; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 8037; GFX940-NEXT: s_mov_b32 s9, s0 8038; GFX940-NEXT: ;;#ASMSTART 8039; GFX940-NEXT: ; use s[8:9] 8040; GFX940-NEXT: ;;#ASMEND 8041; GFX940-NEXT: s_setpc_b64 s[30:31] 8042 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8043 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8044 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8045 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8046 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 poison> 8047 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8048 ret void 8049} 8050 8051define void @s_shuffle_v4i16_v3i16__5_5_4_u() { 8052; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_u: 8053; GFX900: ; %bb.0: 8054; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8055; GFX900-NEXT: ;;#ASMSTART 8056; GFX900-NEXT: ; def s[4:5] 8057; GFX900-NEXT: ;;#ASMEND 8058; GFX900-NEXT: s_lshr_b32 s9, s4, 16 8059; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8060; GFX900-NEXT: ;;#ASMSTART 8061; GFX900-NEXT: ; use s[8:9] 8062; GFX900-NEXT: ;;#ASMEND 8063; GFX900-NEXT: s_setpc_b64 s[30:31] 8064; 8065; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_u: 8066; GFX90A: ; %bb.0: 8067; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8068; GFX90A-NEXT: ;;#ASMSTART 8069; GFX90A-NEXT: ; def s[4:5] 8070; GFX90A-NEXT: ;;#ASMEND 8071; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 8072; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8073; GFX90A-NEXT: ;;#ASMSTART 8074; GFX90A-NEXT: ; use s[8:9] 8075; GFX90A-NEXT: ;;#ASMEND 8076; GFX90A-NEXT: s_setpc_b64 s[30:31] 8077; 8078; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_u: 8079; GFX940: ; %bb.0: 8080; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8081; GFX940-NEXT: ;;#ASMSTART 8082; GFX940-NEXT: ; def s[0:1] 8083; GFX940-NEXT: ;;#ASMEND 8084; GFX940-NEXT: s_lshr_b32 s9, s0, 16 8085; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 8086; GFX940-NEXT: ;;#ASMSTART 8087; GFX940-NEXT: ; use s[8:9] 8088; GFX940-NEXT: ;;#ASMEND 8089; GFX940-NEXT: s_setpc_b64 s[30:31] 8090 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8091 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8092 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8093 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8094 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 poison> 8095 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8096 ret void 8097} 8098 8099define void @s_shuffle_v4i16_v3i16__5_5_5_u() { 8100; GFX9-LABEL: s_shuffle_v4i16_v3i16__5_5_5_u: 8101; GFX9: ; %bb.0: 8102; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8103; GFX9-NEXT: ;;#ASMSTART 8104; GFX9-NEXT: ; def s[8:9] 8105; GFX9-NEXT: ;;#ASMEND 8106; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9 8107; GFX9-NEXT: ;;#ASMSTART 8108; GFX9-NEXT: ; use s[8:9] 8109; GFX9-NEXT: ;;#ASMEND 8110; GFX9-NEXT: s_setpc_b64 s[30:31] 8111 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8112 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8113 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8114 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8115 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 poison> 8116 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8117 ret void 8118} 8119 8120define void @s_shuffle_v4i16_v3i16__5_5_5_0() { 8121; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_0: 8122; GFX900: ; %bb.0: 8123; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8124; GFX900-NEXT: ;;#ASMSTART 8125; GFX900-NEXT: ; def s[4:5] 8126; GFX900-NEXT: ;;#ASMEND 8127; GFX900-NEXT: ;;#ASMSTART 8128; GFX900-NEXT: ; def s[6:7] 8129; GFX900-NEXT: ;;#ASMEND 8130; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s4 8131; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 8132; GFX900-NEXT: ;;#ASMSTART 8133; GFX900-NEXT: ; use s[8:9] 8134; GFX900-NEXT: ;;#ASMEND 8135; GFX900-NEXT: s_setpc_b64 s[30:31] 8136; 8137; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_0: 8138; GFX90A: ; %bb.0: 8139; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8140; GFX90A-NEXT: ;;#ASMSTART 8141; GFX90A-NEXT: ; def s[4:5] 8142; GFX90A-NEXT: ;;#ASMEND 8143; GFX90A-NEXT: ;;#ASMSTART 8144; GFX90A-NEXT: ; def s[6:7] 8145; GFX90A-NEXT: ;;#ASMEND 8146; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s4 8147; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 8148; GFX90A-NEXT: ;;#ASMSTART 8149; GFX90A-NEXT: ; use s[8:9] 8150; GFX90A-NEXT: ;;#ASMEND 8151; GFX90A-NEXT: s_setpc_b64 s[30:31] 8152; 8153; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_0: 8154; GFX940: ; %bb.0: 8155; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8156; GFX940-NEXT: ;;#ASMSTART 8157; GFX940-NEXT: ; def s[0:1] 8158; GFX940-NEXT: ;;#ASMEND 8159; GFX940-NEXT: ;;#ASMSTART 8160; GFX940-NEXT: ; def s[2:3] 8161; GFX940-NEXT: ;;#ASMEND 8162; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 8163; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 8164; GFX940-NEXT: ;;#ASMSTART 8165; GFX940-NEXT: ; use s[8:9] 8166; GFX940-NEXT: ;;#ASMEND 8167; GFX940-NEXT: s_setpc_b64 s[30:31] 8168 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8169 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8170 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8171 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8172 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 0> 8173 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8174 ret void 8175} 8176 8177define void @s_shuffle_v4i16_v3i16__5_5_5_1() { 8178; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_1: 8179; GFX900: ; %bb.0: 8180; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8181; GFX900-NEXT: ;;#ASMSTART 8182; GFX900-NEXT: ; def s[4:5] 8183; GFX900-NEXT: ;;#ASMEND 8184; GFX900-NEXT: ;;#ASMSTART 8185; GFX900-NEXT: ; def s[6:7] 8186; GFX900-NEXT: ;;#ASMEND 8187; GFX900-NEXT: s_pack_lh_b32_b16 s9, s7, s4 8188; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 8189; GFX900-NEXT: ;;#ASMSTART 8190; GFX900-NEXT: ; use s[8:9] 8191; GFX900-NEXT: ;;#ASMEND 8192; GFX900-NEXT: s_setpc_b64 s[30:31] 8193; 8194; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_1: 8195; GFX90A: ; %bb.0: 8196; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8197; GFX90A-NEXT: ;;#ASMSTART 8198; GFX90A-NEXT: ; def s[4:5] 8199; GFX90A-NEXT: ;;#ASMEND 8200; GFX90A-NEXT: ;;#ASMSTART 8201; GFX90A-NEXT: ; def s[6:7] 8202; GFX90A-NEXT: ;;#ASMEND 8203; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s7, s4 8204; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 8205; GFX90A-NEXT: ;;#ASMSTART 8206; GFX90A-NEXT: ; use s[8:9] 8207; GFX90A-NEXT: ;;#ASMEND 8208; GFX90A-NEXT: s_setpc_b64 s[30:31] 8209; 8210; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_1: 8211; GFX940: ; %bb.0: 8212; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8213; GFX940-NEXT: ;;#ASMSTART 8214; GFX940-NEXT: ; def s[0:1] 8215; GFX940-NEXT: ;;#ASMEND 8216; GFX940-NEXT: ;;#ASMSTART 8217; GFX940-NEXT: ; def s[2:3] 8218; GFX940-NEXT: ;;#ASMEND 8219; GFX940-NEXT: s_pack_lh_b32_b16 s9, s3, s0 8220; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 8221; GFX940-NEXT: ;;#ASMSTART 8222; GFX940-NEXT: ; use s[8:9] 8223; GFX940-NEXT: ;;#ASMEND 8224; GFX940-NEXT: s_setpc_b64 s[30:31] 8225 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8226 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8227 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8228 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8229 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 1> 8230 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8231 ret void 8232} 8233 8234define void @s_shuffle_v4i16_v3i16__5_5_5_2() { 8235; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_2: 8236; GFX900: ; %bb.0: 8237; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8238; GFX900-NEXT: ;;#ASMSTART 8239; GFX900-NEXT: ; def s[4:5] 8240; GFX900-NEXT: ;;#ASMEND 8241; GFX900-NEXT: ;;#ASMSTART 8242; GFX900-NEXT: ; def s[6:7] 8243; GFX900-NEXT: ;;#ASMEND 8244; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s5 8245; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 8246; GFX900-NEXT: ;;#ASMSTART 8247; GFX900-NEXT: ; use s[8:9] 8248; GFX900-NEXT: ;;#ASMEND 8249; GFX900-NEXT: s_setpc_b64 s[30:31] 8250; 8251; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_2: 8252; GFX90A: ; %bb.0: 8253; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8254; GFX90A-NEXT: ;;#ASMSTART 8255; GFX90A-NEXT: ; def s[4:5] 8256; GFX90A-NEXT: ;;#ASMEND 8257; GFX90A-NEXT: ;;#ASMSTART 8258; GFX90A-NEXT: ; def s[6:7] 8259; GFX90A-NEXT: ;;#ASMEND 8260; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s5 8261; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 8262; GFX90A-NEXT: ;;#ASMSTART 8263; GFX90A-NEXT: ; use s[8:9] 8264; GFX90A-NEXT: ;;#ASMEND 8265; GFX90A-NEXT: s_setpc_b64 s[30:31] 8266; 8267; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_2: 8268; GFX940: ; %bb.0: 8269; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8270; GFX940-NEXT: ;;#ASMSTART 8271; GFX940-NEXT: ; def s[0:1] 8272; GFX940-NEXT: ;;#ASMEND 8273; GFX940-NEXT: ;;#ASMSTART 8274; GFX940-NEXT: ; def s[2:3] 8275; GFX940-NEXT: ;;#ASMEND 8276; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s1 8277; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 8278; GFX940-NEXT: ;;#ASMSTART 8279; GFX940-NEXT: ; use s[8:9] 8280; GFX940-NEXT: ;;#ASMEND 8281; GFX940-NEXT: s_setpc_b64 s[30:31] 8282 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8283 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8284 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8285 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8286 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 2> 8287 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8288 ret void 8289} 8290 8291define void @s_shuffle_v4i16_v3i16__5_5_5_3() { 8292; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_3: 8293; GFX900: ; %bb.0: 8294; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8295; GFX900-NEXT: ;;#ASMSTART 8296; GFX900-NEXT: ; def s[4:5] 8297; GFX900-NEXT: ;;#ASMEND 8298; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4 8299; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8300; GFX900-NEXT: ;;#ASMSTART 8301; GFX900-NEXT: ; use s[8:9] 8302; GFX900-NEXT: ;;#ASMEND 8303; GFX900-NEXT: s_setpc_b64 s[30:31] 8304; 8305; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_3: 8306; GFX90A: ; %bb.0: 8307; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8308; GFX90A-NEXT: ;;#ASMSTART 8309; GFX90A-NEXT: ; def s[4:5] 8310; GFX90A-NEXT: ;;#ASMEND 8311; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4 8312; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8313; GFX90A-NEXT: ;;#ASMSTART 8314; GFX90A-NEXT: ; use s[8:9] 8315; GFX90A-NEXT: ;;#ASMEND 8316; GFX90A-NEXT: s_setpc_b64 s[30:31] 8317; 8318; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_3: 8319; GFX940: ; %bb.0: 8320; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8321; GFX940-NEXT: ;;#ASMSTART 8322; GFX940-NEXT: ; def s[0:1] 8323; GFX940-NEXT: ;;#ASMEND 8324; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 8325; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 8326; GFX940-NEXT: ;;#ASMSTART 8327; GFX940-NEXT: ; use s[8:9] 8328; GFX940-NEXT: ;;#ASMEND 8329; GFX940-NEXT: s_setpc_b64 s[30:31] 8330 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8331 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8332 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8333 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8334 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 3> 8335 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8336 ret void 8337} 8338 8339define void @s_shuffle_v4i16_v3i16__5_5_5_4() { 8340; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_4: 8341; GFX900: ; %bb.0: 8342; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8343; GFX900-NEXT: ;;#ASMSTART 8344; GFX900-NEXT: ; def s[4:5] 8345; GFX900-NEXT: ;;#ASMEND 8346; GFX900-NEXT: s_pack_lh_b32_b16 s9, s5, s4 8347; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8348; GFX900-NEXT: ;;#ASMSTART 8349; GFX900-NEXT: ; use s[8:9] 8350; GFX900-NEXT: ;;#ASMEND 8351; GFX900-NEXT: s_setpc_b64 s[30:31] 8352; 8353; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_4: 8354; GFX90A: ; %bb.0: 8355; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8356; GFX90A-NEXT: ;;#ASMSTART 8357; GFX90A-NEXT: ; def s[4:5] 8358; GFX90A-NEXT: ;;#ASMEND 8359; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s5, s4 8360; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8361; GFX90A-NEXT: ;;#ASMSTART 8362; GFX90A-NEXT: ; use s[8:9] 8363; GFX90A-NEXT: ;;#ASMEND 8364; GFX90A-NEXT: s_setpc_b64 s[30:31] 8365; 8366; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_4: 8367; GFX940: ; %bb.0: 8368; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8369; GFX940-NEXT: ;;#ASMSTART 8370; GFX940-NEXT: ; def s[0:1] 8371; GFX940-NEXT: ;;#ASMEND 8372; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s0 8373; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 8374; GFX940-NEXT: ;;#ASMSTART 8375; GFX940-NEXT: ; use s[8:9] 8376; GFX940-NEXT: ;;#ASMEND 8377; GFX940-NEXT: s_setpc_b64 s[30:31] 8378 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8379 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8380 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8381 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8382 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 4> 8383 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8384 ret void 8385} 8386 8387define void @s_shuffle_v4i16_v3i16__5_5_5_5() { 8388; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_5: 8389; GFX900: ; %bb.0: 8390; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8391; GFX900-NEXT: ;;#ASMSTART 8392; GFX900-NEXT: ; def s[4:5] 8393; GFX900-NEXT: ;;#ASMEND 8394; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8395; GFX900-NEXT: s_mov_b32 s9, s8 8396; GFX900-NEXT: ;;#ASMSTART 8397; GFX900-NEXT: ; use s[8:9] 8398; GFX900-NEXT: ;;#ASMEND 8399; GFX900-NEXT: s_setpc_b64 s[30:31] 8400; 8401; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_5: 8402; GFX90A: ; %bb.0: 8403; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8404; GFX90A-NEXT: ;;#ASMSTART 8405; GFX90A-NEXT: ; def s[4:5] 8406; GFX90A-NEXT: ;;#ASMEND 8407; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8408; GFX90A-NEXT: s_mov_b32 s9, s8 8409; GFX90A-NEXT: ;;#ASMSTART 8410; GFX90A-NEXT: ; use s[8:9] 8411; GFX90A-NEXT: ;;#ASMEND 8412; GFX90A-NEXT: s_setpc_b64 s[30:31] 8413; 8414; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_5: 8415; GFX940: ; %bb.0: 8416; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8417; GFX940-NEXT: ;;#ASMSTART 8418; GFX940-NEXT: ; def s[0:1] 8419; GFX940-NEXT: ;;#ASMEND 8420; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 8421; GFX940-NEXT: s_mov_b32 s9, s8 8422; GFX940-NEXT: ;;#ASMSTART 8423; GFX940-NEXT: ; use s[8:9] 8424; GFX940-NEXT: ;;#ASMEND 8425; GFX940-NEXT: s_setpc_b64 s[30:31] 8426 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8427 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8428 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8429 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8430 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 5> 8431 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8432 ret void 8433} 8434 8435define void @s_shuffle_v4i16_v3i16__u_0_0_0() { 8436; GFX900-LABEL: s_shuffle_v4i16_v3i16__u_0_0_0: 8437; GFX900: ; %bb.0: 8438; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8439; GFX900-NEXT: ;;#ASMSTART 8440; GFX900-NEXT: ; def s[4:5] 8441; GFX900-NEXT: ;;#ASMEND 8442; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8443; GFX900-NEXT: s_lshl_b32 s8, s4, 16 8444; GFX900-NEXT: ;;#ASMSTART 8445; GFX900-NEXT: ; use s[8:9] 8446; GFX900-NEXT: ;;#ASMEND 8447; GFX900-NEXT: s_setpc_b64 s[30:31] 8448; 8449; GFX90A-LABEL: s_shuffle_v4i16_v3i16__u_0_0_0: 8450; GFX90A: ; %bb.0: 8451; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8452; GFX90A-NEXT: ;;#ASMSTART 8453; GFX90A-NEXT: ; def s[4:5] 8454; GFX90A-NEXT: ;;#ASMEND 8455; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8456; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 8457; GFX90A-NEXT: ;;#ASMSTART 8458; GFX90A-NEXT: ; use s[8:9] 8459; GFX90A-NEXT: ;;#ASMEND 8460; GFX90A-NEXT: s_setpc_b64 s[30:31] 8461; 8462; GFX940-LABEL: s_shuffle_v4i16_v3i16__u_0_0_0: 8463; GFX940: ; %bb.0: 8464; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8465; GFX940-NEXT: ;;#ASMSTART 8466; GFX940-NEXT: ; def s[0:1] 8467; GFX940-NEXT: ;;#ASMEND 8468; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 8469; GFX940-NEXT: s_lshl_b32 s8, s0, 16 8470; GFX940-NEXT: ;;#ASMSTART 8471; GFX940-NEXT: ; use s[8:9] 8472; GFX940-NEXT: ;;#ASMEND 8473; GFX940-NEXT: s_setpc_b64 s[30:31] 8474 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8475 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8476 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0> 8477 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8478 ret void 8479} 8480 8481define void @s_shuffle_v4i16_v3i16__0_0_0_0() { 8482; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_0_0_0: 8483; GFX900: ; %bb.0: 8484; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8485; GFX900-NEXT: ;;#ASMSTART 8486; GFX900-NEXT: ; def s[4:5] 8487; GFX900-NEXT: ;;#ASMEND 8488; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8489; GFX900-NEXT: s_mov_b32 s9, s8 8490; GFX900-NEXT: ;;#ASMSTART 8491; GFX900-NEXT: ; use s[8:9] 8492; GFX900-NEXT: ;;#ASMEND 8493; GFX900-NEXT: s_setpc_b64 s[30:31] 8494; 8495; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_0_0_0: 8496; GFX90A: ; %bb.0: 8497; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8498; GFX90A-NEXT: ;;#ASMSTART 8499; GFX90A-NEXT: ; def s[4:5] 8500; GFX90A-NEXT: ;;#ASMEND 8501; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8502; GFX90A-NEXT: s_mov_b32 s9, s8 8503; GFX90A-NEXT: ;;#ASMSTART 8504; GFX90A-NEXT: ; use s[8:9] 8505; GFX90A-NEXT: ;;#ASMEND 8506; GFX90A-NEXT: s_setpc_b64 s[30:31] 8507; 8508; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_0_0_0: 8509; GFX940: ; %bb.0: 8510; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8511; GFX940-NEXT: ;;#ASMSTART 8512; GFX940-NEXT: ; def s[0:1] 8513; GFX940-NEXT: ;;#ASMEND 8514; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 8515; GFX940-NEXT: s_mov_b32 s9, s8 8516; GFX940-NEXT: ;;#ASMSTART 8517; GFX940-NEXT: ; use s[8:9] 8518; GFX940-NEXT: ;;#ASMEND 8519; GFX940-NEXT: s_setpc_b64 s[30:31] 8520 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8521 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8522 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> zeroinitializer 8523 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8524 ret void 8525} 8526 8527define void @s_shuffle_v4i16_v3i16__1_0_0_0() { 8528; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_0_0_0: 8529; GFX900: ; %bb.0: 8530; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8531; GFX900-NEXT: ;;#ASMSTART 8532; GFX900-NEXT: ; def s[4:5] 8533; GFX900-NEXT: ;;#ASMEND 8534; GFX900-NEXT: s_lshr_b32 s5, s4, 16 8535; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8536; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8537; GFX900-NEXT: ;;#ASMSTART 8538; GFX900-NEXT: ; use s[8:9] 8539; GFX900-NEXT: ;;#ASMEND 8540; GFX900-NEXT: s_setpc_b64 s[30:31] 8541; 8542; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_0_0_0: 8543; GFX90A: ; %bb.0: 8544; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8545; GFX90A-NEXT: ;;#ASMSTART 8546; GFX90A-NEXT: ; def s[4:5] 8547; GFX90A-NEXT: ;;#ASMEND 8548; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 8549; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8550; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8551; GFX90A-NEXT: ;;#ASMSTART 8552; GFX90A-NEXT: ; use s[8:9] 8553; GFX90A-NEXT: ;;#ASMEND 8554; GFX90A-NEXT: s_setpc_b64 s[30:31] 8555; 8556; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_0_0_0: 8557; GFX940: ; %bb.0: 8558; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8559; GFX940-NEXT: ;;#ASMSTART 8560; GFX940-NEXT: ; def s[0:1] 8561; GFX940-NEXT: ;;#ASMEND 8562; GFX940-NEXT: s_lshr_b32 s1, s0, 16 8563; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 8564; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 8565; GFX940-NEXT: ;;#ASMSTART 8566; GFX940-NEXT: ; use s[8:9] 8567; GFX940-NEXT: ;;#ASMEND 8568; GFX940-NEXT: s_setpc_b64 s[30:31] 8569 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8570 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8571 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 8572 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8573 ret void 8574} 8575 8576define void @s_shuffle_v4i16_v3i16__2_0_0_0() { 8577; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_0_0_0: 8578; GFX900: ; %bb.0: 8579; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8580; GFX900-NEXT: ;;#ASMSTART 8581; GFX900-NEXT: ; def s[4:5] 8582; GFX900-NEXT: ;;#ASMEND 8583; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8584; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8585; GFX900-NEXT: ;;#ASMSTART 8586; GFX900-NEXT: ; use s[8:9] 8587; GFX900-NEXT: ;;#ASMEND 8588; GFX900-NEXT: s_setpc_b64 s[30:31] 8589; 8590; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_0_0_0: 8591; GFX90A: ; %bb.0: 8592; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8593; GFX90A-NEXT: ;;#ASMSTART 8594; GFX90A-NEXT: ; def s[4:5] 8595; GFX90A-NEXT: ;;#ASMEND 8596; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8597; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8598; GFX90A-NEXT: ;;#ASMSTART 8599; GFX90A-NEXT: ; use s[8:9] 8600; GFX90A-NEXT: ;;#ASMEND 8601; GFX90A-NEXT: s_setpc_b64 s[30:31] 8602; 8603; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_0_0_0: 8604; GFX940: ; %bb.0: 8605; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8606; GFX940-NEXT: ;;#ASMSTART 8607; GFX940-NEXT: ; def s[0:1] 8608; GFX940-NEXT: ;;#ASMEND 8609; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 8610; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 8611; GFX940-NEXT: ;;#ASMSTART 8612; GFX940-NEXT: ; use s[8:9] 8613; GFX940-NEXT: ;;#ASMEND 8614; GFX940-NEXT: s_setpc_b64 s[30:31] 8615 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8616 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8617 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0> 8618 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8619 ret void 8620} 8621 8622define void @s_shuffle_v4i16_v3i16__3_0_0_0() { 8623; GFX900-LABEL: s_shuffle_v4i16_v3i16__3_0_0_0: 8624; GFX900: ; %bb.0: 8625; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8626; GFX900-NEXT: ;;#ASMSTART 8627; GFX900-NEXT: ; def s[4:5] 8628; GFX900-NEXT: ;;#ASMEND 8629; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8630; GFX900-NEXT: s_lshl_b32 s8, s4, 16 8631; GFX900-NEXT: ;;#ASMSTART 8632; GFX900-NEXT: ; use s[8:9] 8633; GFX900-NEXT: ;;#ASMEND 8634; GFX900-NEXT: s_setpc_b64 s[30:31] 8635; 8636; GFX90A-LABEL: s_shuffle_v4i16_v3i16__3_0_0_0: 8637; GFX90A: ; %bb.0: 8638; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8639; GFX90A-NEXT: ;;#ASMSTART 8640; GFX90A-NEXT: ; def s[4:5] 8641; GFX90A-NEXT: ;;#ASMEND 8642; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8643; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 8644; GFX90A-NEXT: ;;#ASMSTART 8645; GFX90A-NEXT: ; use s[8:9] 8646; GFX90A-NEXT: ;;#ASMEND 8647; GFX90A-NEXT: s_setpc_b64 s[30:31] 8648; 8649; GFX940-LABEL: s_shuffle_v4i16_v3i16__3_0_0_0: 8650; GFX940: ; %bb.0: 8651; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8652; GFX940-NEXT: ;;#ASMSTART 8653; GFX940-NEXT: ; def s[0:1] 8654; GFX940-NEXT: ;;#ASMEND 8655; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 8656; GFX940-NEXT: s_lshl_b32 s8, s0, 16 8657; GFX940-NEXT: ;;#ASMSTART 8658; GFX940-NEXT: ; use s[8:9] 8659; GFX940-NEXT: ;;#ASMEND 8660; GFX940-NEXT: s_setpc_b64 s[30:31] 8661 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8662 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8663 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 8664 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8665 ret void 8666} 8667 8668define void @s_shuffle_v4i16_v3i16__4_0_0_0() { 8669; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_0_0_0: 8670; GFX900: ; %bb.0: 8671; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8672; GFX900-NEXT: ;;#ASMSTART 8673; GFX900-NEXT: ; def s[4:5] 8674; GFX900-NEXT: ;;#ASMEND 8675; GFX900-NEXT: ;;#ASMSTART 8676; GFX900-NEXT: ; def s[6:7] 8677; GFX900-NEXT: ;;#ASMEND 8678; GFX900-NEXT: s_lshr_b32 s5, s6, 16 8679; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8680; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8681; GFX900-NEXT: ;;#ASMSTART 8682; GFX900-NEXT: ; use s[8:9] 8683; GFX900-NEXT: ;;#ASMEND 8684; GFX900-NEXT: s_setpc_b64 s[30:31] 8685; 8686; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_0_0_0: 8687; GFX90A: ; %bb.0: 8688; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8689; GFX90A-NEXT: ;;#ASMSTART 8690; GFX90A-NEXT: ; def s[4:5] 8691; GFX90A-NEXT: ;;#ASMEND 8692; GFX90A-NEXT: ;;#ASMSTART 8693; GFX90A-NEXT: ; def s[6:7] 8694; GFX90A-NEXT: ;;#ASMEND 8695; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 8696; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8697; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8698; GFX90A-NEXT: ;;#ASMSTART 8699; GFX90A-NEXT: ; use s[8:9] 8700; GFX90A-NEXT: ;;#ASMEND 8701; GFX90A-NEXT: s_setpc_b64 s[30:31] 8702; 8703; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_0_0_0: 8704; GFX940: ; %bb.0: 8705; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8706; GFX940-NEXT: ;;#ASMSTART 8707; GFX940-NEXT: ; def s[0:1] 8708; GFX940-NEXT: ;;#ASMEND 8709; GFX940-NEXT: ;;#ASMSTART 8710; GFX940-NEXT: ; def s[2:3] 8711; GFX940-NEXT: ;;#ASMEND 8712; GFX940-NEXT: s_lshr_b32 s1, s2, 16 8713; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 8714; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 8715; GFX940-NEXT: ;;#ASMSTART 8716; GFX940-NEXT: ; use s[8:9] 8717; GFX940-NEXT: ;;#ASMEND 8718; GFX940-NEXT: s_setpc_b64 s[30:31] 8719 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8720 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8721 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8722 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8723 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 0, i32 0, i32 0> 8724 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8725 ret void 8726} 8727 8728define void @s_shuffle_v4i16_v3i16__5_0_0_0() { 8729; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_0_0: 8730; GFX900: ; %bb.0: 8731; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8732; GFX900-NEXT: ;;#ASMSTART 8733; GFX900-NEXT: ; def s[4:5] 8734; GFX900-NEXT: ;;#ASMEND 8735; GFX900-NEXT: ;;#ASMSTART 8736; GFX900-NEXT: ; def s[6:7] 8737; GFX900-NEXT: ;;#ASMEND 8738; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 8739; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8740; GFX900-NEXT: ;;#ASMSTART 8741; GFX900-NEXT: ; use s[8:9] 8742; GFX900-NEXT: ;;#ASMEND 8743; GFX900-NEXT: s_setpc_b64 s[30:31] 8744; 8745; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_0_0: 8746; GFX90A: ; %bb.0: 8747; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8748; GFX90A-NEXT: ;;#ASMSTART 8749; GFX90A-NEXT: ; def s[4:5] 8750; GFX90A-NEXT: ;;#ASMEND 8751; GFX90A-NEXT: ;;#ASMSTART 8752; GFX90A-NEXT: ; def s[6:7] 8753; GFX90A-NEXT: ;;#ASMEND 8754; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 8755; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8756; GFX90A-NEXT: ;;#ASMSTART 8757; GFX90A-NEXT: ; use s[8:9] 8758; GFX90A-NEXT: ;;#ASMEND 8759; GFX90A-NEXT: s_setpc_b64 s[30:31] 8760; 8761; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_0_0: 8762; GFX940: ; %bb.0: 8763; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8764; GFX940-NEXT: ;;#ASMSTART 8765; GFX940-NEXT: ; def s[0:1] 8766; GFX940-NEXT: ;;#ASMEND 8767; GFX940-NEXT: ;;#ASMSTART 8768; GFX940-NEXT: ; def s[2:3] 8769; GFX940-NEXT: ;;#ASMEND 8770; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 8771; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 8772; GFX940-NEXT: ;;#ASMSTART 8773; GFX940-NEXT: ; use s[8:9] 8774; GFX940-NEXT: ;;#ASMEND 8775; GFX940-NEXT: s_setpc_b64 s[30:31] 8776 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8777 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8778 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8779 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8780 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 0, i32 0> 8781 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8782 ret void 8783} 8784 8785define void @s_shuffle_v4i16_v3i16__5_u_0_0() { 8786; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_0_0: 8787; GFX900: ; %bb.0: 8788; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8789; GFX900-NEXT: ;;#ASMSTART 8790; GFX900-NEXT: ; def s[4:5] 8791; GFX900-NEXT: ;;#ASMEND 8792; GFX900-NEXT: ;;#ASMSTART 8793; GFX900-NEXT: ; def s[6:7] 8794; GFX900-NEXT: ;;#ASMEND 8795; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8796; GFX900-NEXT: s_mov_b32 s8, s7 8797; GFX900-NEXT: ;;#ASMSTART 8798; GFX900-NEXT: ; use s[8:9] 8799; GFX900-NEXT: ;;#ASMEND 8800; GFX900-NEXT: s_setpc_b64 s[30:31] 8801; 8802; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_0_0: 8803; GFX90A: ; %bb.0: 8804; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8805; GFX90A-NEXT: ;;#ASMSTART 8806; GFX90A-NEXT: ; def s[4:5] 8807; GFX90A-NEXT: ;;#ASMEND 8808; GFX90A-NEXT: ;;#ASMSTART 8809; GFX90A-NEXT: ; def s[6:7] 8810; GFX90A-NEXT: ;;#ASMEND 8811; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8812; GFX90A-NEXT: s_mov_b32 s8, s7 8813; GFX90A-NEXT: ;;#ASMSTART 8814; GFX90A-NEXT: ; use s[8:9] 8815; GFX90A-NEXT: ;;#ASMEND 8816; GFX90A-NEXT: s_setpc_b64 s[30:31] 8817; 8818; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_0_0: 8819; GFX940: ; %bb.0: 8820; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8821; GFX940-NEXT: ;;#ASMSTART 8822; GFX940-NEXT: ; def s[0:1] 8823; GFX940-NEXT: ;;#ASMEND 8824; GFX940-NEXT: ;;#ASMSTART 8825; GFX940-NEXT: ; def s[2:3] 8826; GFX940-NEXT: ;;#ASMEND 8827; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 8828; GFX940-NEXT: s_mov_b32 s8, s3 8829; GFX940-NEXT: ;;#ASMSTART 8830; GFX940-NEXT: ; use s[8:9] 8831; GFX940-NEXT: ;;#ASMEND 8832; GFX940-NEXT: s_setpc_b64 s[30:31] 8833 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8834 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8835 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8836 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8837 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 0, i32 0> 8838 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8839 ret void 8840} 8841 8842define void @s_shuffle_v4i16_v3i16__5_1_0_0() { 8843; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_0_0: 8844; GFX900: ; %bb.0: 8845; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8846; GFX900-NEXT: ;;#ASMSTART 8847; GFX900-NEXT: ; def s[4:5] 8848; GFX900-NEXT: ;;#ASMEND 8849; GFX900-NEXT: ;;#ASMSTART 8850; GFX900-NEXT: ; def s[6:7] 8851; GFX900-NEXT: ;;#ASMEND 8852; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 8853; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8854; GFX900-NEXT: ;;#ASMSTART 8855; GFX900-NEXT: ; use s[8:9] 8856; GFX900-NEXT: ;;#ASMEND 8857; GFX900-NEXT: s_setpc_b64 s[30:31] 8858; 8859; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_0_0: 8860; GFX90A: ; %bb.0: 8861; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8862; GFX90A-NEXT: ;;#ASMSTART 8863; GFX90A-NEXT: ; def s[4:5] 8864; GFX90A-NEXT: ;;#ASMEND 8865; GFX90A-NEXT: ;;#ASMSTART 8866; GFX90A-NEXT: ; def s[6:7] 8867; GFX90A-NEXT: ;;#ASMEND 8868; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 8869; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8870; GFX90A-NEXT: ;;#ASMSTART 8871; GFX90A-NEXT: ; use s[8:9] 8872; GFX90A-NEXT: ;;#ASMEND 8873; GFX90A-NEXT: s_setpc_b64 s[30:31] 8874; 8875; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_0_0: 8876; GFX940: ; %bb.0: 8877; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8878; GFX940-NEXT: ;;#ASMSTART 8879; GFX940-NEXT: ; def s[0:1] 8880; GFX940-NEXT: ;;#ASMEND 8881; GFX940-NEXT: ;;#ASMSTART 8882; GFX940-NEXT: ; def s[2:3] 8883; GFX940-NEXT: ;;#ASMEND 8884; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 8885; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 8886; GFX940-NEXT: ;;#ASMSTART 8887; GFX940-NEXT: ; use s[8:9] 8888; GFX940-NEXT: ;;#ASMEND 8889; GFX940-NEXT: s_setpc_b64 s[30:31] 8890 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8891 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8892 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8893 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8894 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 0, i32 0> 8895 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8896 ret void 8897} 8898 8899define void @s_shuffle_v4i16_v3i16__5_2_0_0() { 8900; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_0_0: 8901; GFX900: ; %bb.0: 8902; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8903; GFX900-NEXT: ;;#ASMSTART 8904; GFX900-NEXT: ; def s[4:5] 8905; GFX900-NEXT: ;;#ASMEND 8906; GFX900-NEXT: ;;#ASMSTART 8907; GFX900-NEXT: ; def s[6:7] 8908; GFX900-NEXT: ;;#ASMEND 8909; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 8910; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8911; GFX900-NEXT: ;;#ASMSTART 8912; GFX900-NEXT: ; use s[8:9] 8913; GFX900-NEXT: ;;#ASMEND 8914; GFX900-NEXT: s_setpc_b64 s[30:31] 8915; 8916; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_0_0: 8917; GFX90A: ; %bb.0: 8918; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8919; GFX90A-NEXT: ;;#ASMSTART 8920; GFX90A-NEXT: ; def s[4:5] 8921; GFX90A-NEXT: ;;#ASMEND 8922; GFX90A-NEXT: ;;#ASMSTART 8923; GFX90A-NEXT: ; def s[6:7] 8924; GFX90A-NEXT: ;;#ASMEND 8925; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 8926; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8927; GFX90A-NEXT: ;;#ASMSTART 8928; GFX90A-NEXT: ; use s[8:9] 8929; GFX90A-NEXT: ;;#ASMEND 8930; GFX90A-NEXT: s_setpc_b64 s[30:31] 8931; 8932; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_0_0: 8933; GFX940: ; %bb.0: 8934; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8935; GFX940-NEXT: ;;#ASMSTART 8936; GFX940-NEXT: ; def s[0:1] 8937; GFX940-NEXT: ;;#ASMEND 8938; GFX940-NEXT: ;;#ASMSTART 8939; GFX940-NEXT: ; def s[2:3] 8940; GFX940-NEXT: ;;#ASMEND 8941; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 8942; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 8943; GFX940-NEXT: ;;#ASMSTART 8944; GFX940-NEXT: ; use s[8:9] 8945; GFX940-NEXT: ;;#ASMEND 8946; GFX940-NEXT: s_setpc_b64 s[30:31] 8947 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8948 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8949 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8950 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8951 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 0, i32 0> 8952 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 8953 ret void 8954} 8955 8956define void @s_shuffle_v4i16_v3i16__5_3_0_0() { 8957; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_0_0: 8958; GFX900: ; %bb.0: 8959; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8960; GFX900-NEXT: ;;#ASMSTART 8961; GFX900-NEXT: ; def s[4:5] 8962; GFX900-NEXT: ;;#ASMEND 8963; GFX900-NEXT: ;;#ASMSTART 8964; GFX900-NEXT: ; def s[6:7] 8965; GFX900-NEXT: ;;#ASMEND 8966; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s6 8967; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8968; GFX900-NEXT: ;;#ASMSTART 8969; GFX900-NEXT: ; use s[8:9] 8970; GFX900-NEXT: ;;#ASMEND 8971; GFX900-NEXT: s_setpc_b64 s[30:31] 8972; 8973; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_0_0: 8974; GFX90A: ; %bb.0: 8975; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8976; GFX90A-NEXT: ;;#ASMSTART 8977; GFX90A-NEXT: ; def s[4:5] 8978; GFX90A-NEXT: ;;#ASMEND 8979; GFX90A-NEXT: ;;#ASMSTART 8980; GFX90A-NEXT: ; def s[6:7] 8981; GFX90A-NEXT: ;;#ASMEND 8982; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s6 8983; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 8984; GFX90A-NEXT: ;;#ASMSTART 8985; GFX90A-NEXT: ; use s[8:9] 8986; GFX90A-NEXT: ;;#ASMEND 8987; GFX90A-NEXT: s_setpc_b64 s[30:31] 8988; 8989; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_0_0: 8990; GFX940: ; %bb.0: 8991; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8992; GFX940-NEXT: ;;#ASMSTART 8993; GFX940-NEXT: ; def s[0:1] 8994; GFX940-NEXT: ;;#ASMEND 8995; GFX940-NEXT: ;;#ASMSTART 8996; GFX940-NEXT: ; def s[2:3] 8997; GFX940-NEXT: ;;#ASMEND 8998; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 8999; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 9000; GFX940-NEXT: ;;#ASMSTART 9001; GFX940-NEXT: ; use s[8:9] 9002; GFX940-NEXT: ;;#ASMEND 9003; GFX940-NEXT: s_setpc_b64 s[30:31] 9004 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9005 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9006 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9007 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9008 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 0, i32 0> 9009 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9010 ret void 9011} 9012 9013define void @s_shuffle_v4i16_v3i16__5_4_0_0() { 9014; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_0_0: 9015; GFX900: ; %bb.0: 9016; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9017; GFX900-NEXT: ;;#ASMSTART 9018; GFX900-NEXT: ; def s[4:5] 9019; GFX900-NEXT: ;;#ASMEND 9020; GFX900-NEXT: ;;#ASMSTART 9021; GFX900-NEXT: ; def s[6:7] 9022; GFX900-NEXT: ;;#ASMEND 9023; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s6 9024; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 9025; GFX900-NEXT: ;;#ASMSTART 9026; GFX900-NEXT: ; use s[8:9] 9027; GFX900-NEXT: ;;#ASMEND 9028; GFX900-NEXT: s_setpc_b64 s[30:31] 9029; 9030; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_0_0: 9031; GFX90A: ; %bb.0: 9032; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9033; GFX90A-NEXT: ;;#ASMSTART 9034; GFX90A-NEXT: ; def s[4:5] 9035; GFX90A-NEXT: ;;#ASMEND 9036; GFX90A-NEXT: ;;#ASMSTART 9037; GFX90A-NEXT: ; def s[6:7] 9038; GFX90A-NEXT: ;;#ASMEND 9039; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s6 9040; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 9041; GFX90A-NEXT: ;;#ASMSTART 9042; GFX90A-NEXT: ; use s[8:9] 9043; GFX90A-NEXT: ;;#ASMEND 9044; GFX90A-NEXT: s_setpc_b64 s[30:31] 9045; 9046; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_0_0: 9047; GFX940: ; %bb.0: 9048; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9049; GFX940-NEXT: ;;#ASMSTART 9050; GFX940-NEXT: ; def s[0:1] 9051; GFX940-NEXT: ;;#ASMEND 9052; GFX940-NEXT: ;;#ASMSTART 9053; GFX940-NEXT: ; def s[2:3] 9054; GFX940-NEXT: ;;#ASMEND 9055; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 9056; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 9057; GFX940-NEXT: ;;#ASMSTART 9058; GFX940-NEXT: ; use s[8:9] 9059; GFX940-NEXT: ;;#ASMEND 9060; GFX940-NEXT: s_setpc_b64 s[30:31] 9061 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9062 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9063 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9064 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9065 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 0, i32 0> 9066 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9067 ret void 9068} 9069 9070define void @s_shuffle_v4i16_v3i16__5_5_0_0() { 9071; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_0: 9072; GFX900: ; %bb.0: 9073; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9074; GFX900-NEXT: ;;#ASMSTART 9075; GFX900-NEXT: ; def s[4:5] 9076; GFX900-NEXT: ;;#ASMEND 9077; GFX900-NEXT: ;;#ASMSTART 9078; GFX900-NEXT: ; def s[6:7] 9079; GFX900-NEXT: ;;#ASMEND 9080; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 9081; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9082; GFX900-NEXT: ;;#ASMSTART 9083; GFX900-NEXT: ; use s[8:9] 9084; GFX900-NEXT: ;;#ASMEND 9085; GFX900-NEXT: s_setpc_b64 s[30:31] 9086; 9087; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_0: 9088; GFX90A: ; %bb.0: 9089; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9090; GFX90A-NEXT: ;;#ASMSTART 9091; GFX90A-NEXT: ; def s[4:5] 9092; GFX90A-NEXT: ;;#ASMEND 9093; GFX90A-NEXT: ;;#ASMSTART 9094; GFX90A-NEXT: ; def s[6:7] 9095; GFX90A-NEXT: ;;#ASMEND 9096; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 9097; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9098; GFX90A-NEXT: ;;#ASMSTART 9099; GFX90A-NEXT: ; use s[8:9] 9100; GFX90A-NEXT: ;;#ASMEND 9101; GFX90A-NEXT: s_setpc_b64 s[30:31] 9102; 9103; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_0: 9104; GFX940: ; %bb.0: 9105; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9106; GFX940-NEXT: ;;#ASMSTART 9107; GFX940-NEXT: ; def s[0:1] 9108; GFX940-NEXT: ;;#ASMEND 9109; GFX940-NEXT: ;;#ASMSTART 9110; GFX940-NEXT: ; def s[2:3] 9111; GFX940-NEXT: ;;#ASMEND 9112; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 9113; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 9114; GFX940-NEXT: ;;#ASMSTART 9115; GFX940-NEXT: ; use s[8:9] 9116; GFX940-NEXT: ;;#ASMEND 9117; GFX940-NEXT: s_setpc_b64 s[30:31] 9118 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9119 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9120 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9121 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9122 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 0> 9123 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9124 ret void 9125} 9126 9127define void @s_shuffle_v4i16_v3i16__5_5_u_0() { 9128; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_0: 9129; GFX900: ; %bb.0: 9130; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9131; GFX900-NEXT: ;;#ASMSTART 9132; GFX900-NEXT: ; def s[4:5] 9133; GFX900-NEXT: ;;#ASMEND 9134; GFX900-NEXT: ;;#ASMSTART 9135; GFX900-NEXT: ; def s[6:7] 9136; GFX900-NEXT: ;;#ASMEND 9137; GFX900-NEXT: s_lshl_b32 s9, s4, 16 9138; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9139; GFX900-NEXT: ;;#ASMSTART 9140; GFX900-NEXT: ; use s[8:9] 9141; GFX900-NEXT: ;;#ASMEND 9142; GFX900-NEXT: s_setpc_b64 s[30:31] 9143; 9144; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_0: 9145; GFX90A: ; %bb.0: 9146; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9147; GFX90A-NEXT: ;;#ASMSTART 9148; GFX90A-NEXT: ; def s[4:5] 9149; GFX90A-NEXT: ;;#ASMEND 9150; GFX90A-NEXT: ;;#ASMSTART 9151; GFX90A-NEXT: ; def s[6:7] 9152; GFX90A-NEXT: ;;#ASMEND 9153; GFX90A-NEXT: s_lshl_b32 s9, s4, 16 9154; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9155; GFX90A-NEXT: ;;#ASMSTART 9156; GFX90A-NEXT: ; use s[8:9] 9157; GFX90A-NEXT: ;;#ASMEND 9158; GFX90A-NEXT: s_setpc_b64 s[30:31] 9159; 9160; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_0: 9161; GFX940: ; %bb.0: 9162; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9163; GFX940-NEXT: ;;#ASMSTART 9164; GFX940-NEXT: ; def s[0:1] 9165; GFX940-NEXT: ;;#ASMEND 9166; GFX940-NEXT: ;;#ASMSTART 9167; GFX940-NEXT: ; def s[2:3] 9168; GFX940-NEXT: ;;#ASMEND 9169; GFX940-NEXT: s_lshl_b32 s9, s0, 16 9170; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 9171; GFX940-NEXT: ;;#ASMSTART 9172; GFX940-NEXT: ; use s[8:9] 9173; GFX940-NEXT: ;;#ASMEND 9174; GFX940-NEXT: s_setpc_b64 s[30:31] 9175 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9176 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9177 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9178 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9179 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 0> 9180 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9181 ret void 9182} 9183 9184define void @s_shuffle_v4i16_v3i16__5_5_1_0() { 9185; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_0: 9186; GFX900: ; %bb.0: 9187; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9188; GFX900-NEXT: ;;#ASMSTART 9189; GFX900-NEXT: ; def s[4:5] 9190; GFX900-NEXT: ;;#ASMEND 9191; GFX900-NEXT: s_lshr_b32 s5, s4, 16 9192; GFX900-NEXT: ;;#ASMSTART 9193; GFX900-NEXT: ; def s[6:7] 9194; GFX900-NEXT: ;;#ASMEND 9195; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4 9196; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9197; GFX900-NEXT: ;;#ASMSTART 9198; GFX900-NEXT: ; use s[8:9] 9199; GFX900-NEXT: ;;#ASMEND 9200; GFX900-NEXT: s_setpc_b64 s[30:31] 9201; 9202; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_0: 9203; GFX90A: ; %bb.0: 9204; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9205; GFX90A-NEXT: ;;#ASMSTART 9206; GFX90A-NEXT: ; def s[4:5] 9207; GFX90A-NEXT: ;;#ASMEND 9208; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 9209; GFX90A-NEXT: ;;#ASMSTART 9210; GFX90A-NEXT: ; def s[6:7] 9211; GFX90A-NEXT: ;;#ASMEND 9212; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4 9213; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9214; GFX90A-NEXT: ;;#ASMSTART 9215; GFX90A-NEXT: ; use s[8:9] 9216; GFX90A-NEXT: ;;#ASMEND 9217; GFX90A-NEXT: s_setpc_b64 s[30:31] 9218; 9219; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_0: 9220; GFX940: ; %bb.0: 9221; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9222; GFX940-NEXT: ;;#ASMSTART 9223; GFX940-NEXT: ; def s[0:1] 9224; GFX940-NEXT: ;;#ASMEND 9225; GFX940-NEXT: s_lshr_b32 s1, s0, 16 9226; GFX940-NEXT: ;;#ASMSTART 9227; GFX940-NEXT: ; def s[2:3] 9228; GFX940-NEXT: ;;#ASMEND 9229; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 9230; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 9231; GFX940-NEXT: ;;#ASMSTART 9232; GFX940-NEXT: ; use s[8:9] 9233; GFX940-NEXT: ;;#ASMEND 9234; GFX940-NEXT: s_setpc_b64 s[30:31] 9235 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9236 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9237 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9238 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9239 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 0> 9240 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9241 ret void 9242} 9243 9244define void @s_shuffle_v4i16_v3i16__5_5_2_0() { 9245; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_0: 9246; GFX900: ; %bb.0: 9247; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9248; GFX900-NEXT: ;;#ASMSTART 9249; GFX900-NEXT: ; def s[4:5] 9250; GFX900-NEXT: ;;#ASMEND 9251; GFX900-NEXT: ;;#ASMSTART 9252; GFX900-NEXT: ; def s[6:7] 9253; GFX900-NEXT: ;;#ASMEND 9254; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4 9255; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9256; GFX900-NEXT: ;;#ASMSTART 9257; GFX900-NEXT: ; use s[8:9] 9258; GFX900-NEXT: ;;#ASMEND 9259; GFX900-NEXT: s_setpc_b64 s[30:31] 9260; 9261; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_0: 9262; GFX90A: ; %bb.0: 9263; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9264; GFX90A-NEXT: ;;#ASMSTART 9265; GFX90A-NEXT: ; def s[4:5] 9266; GFX90A-NEXT: ;;#ASMEND 9267; GFX90A-NEXT: ;;#ASMSTART 9268; GFX90A-NEXT: ; def s[6:7] 9269; GFX90A-NEXT: ;;#ASMEND 9270; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4 9271; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9272; GFX90A-NEXT: ;;#ASMSTART 9273; GFX90A-NEXT: ; use s[8:9] 9274; GFX90A-NEXT: ;;#ASMEND 9275; GFX90A-NEXT: s_setpc_b64 s[30:31] 9276; 9277; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_0: 9278; GFX940: ; %bb.0: 9279; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9280; GFX940-NEXT: ;;#ASMSTART 9281; GFX940-NEXT: ; def s[0:1] 9282; GFX940-NEXT: ;;#ASMEND 9283; GFX940-NEXT: ;;#ASMSTART 9284; GFX940-NEXT: ; def s[2:3] 9285; GFX940-NEXT: ;;#ASMEND 9286; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 9287; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 9288; GFX940-NEXT: ;;#ASMSTART 9289; GFX940-NEXT: ; use s[8:9] 9290; GFX940-NEXT: ;;#ASMEND 9291; GFX940-NEXT: s_setpc_b64 s[30:31] 9292 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9293 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9294 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9295 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9296 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 0> 9297 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9298 ret void 9299} 9300 9301define void @s_shuffle_v4i16_v3i16__5_5_3_0() { 9302; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_0: 9303; GFX900: ; %bb.0: 9304; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9305; GFX900-NEXT: ;;#ASMSTART 9306; GFX900-NEXT: ; def s[4:5] 9307; GFX900-NEXT: ;;#ASMEND 9308; GFX900-NEXT: ;;#ASMSTART 9309; GFX900-NEXT: ; def s[6:7] 9310; GFX900-NEXT: ;;#ASMEND 9311; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s4 9312; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9313; GFX900-NEXT: ;;#ASMSTART 9314; GFX900-NEXT: ; use s[8:9] 9315; GFX900-NEXT: ;;#ASMEND 9316; GFX900-NEXT: s_setpc_b64 s[30:31] 9317; 9318; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_0: 9319; GFX90A: ; %bb.0: 9320; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9321; GFX90A-NEXT: ;;#ASMSTART 9322; GFX90A-NEXT: ; def s[4:5] 9323; GFX90A-NEXT: ;;#ASMEND 9324; GFX90A-NEXT: ;;#ASMSTART 9325; GFX90A-NEXT: ; def s[6:7] 9326; GFX90A-NEXT: ;;#ASMEND 9327; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s4 9328; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9329; GFX90A-NEXT: ;;#ASMSTART 9330; GFX90A-NEXT: ; use s[8:9] 9331; GFX90A-NEXT: ;;#ASMEND 9332; GFX90A-NEXT: s_setpc_b64 s[30:31] 9333; 9334; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_0: 9335; GFX940: ; %bb.0: 9336; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9337; GFX940-NEXT: ;;#ASMSTART 9338; GFX940-NEXT: ; def s[0:1] 9339; GFX940-NEXT: ;;#ASMEND 9340; GFX940-NEXT: ;;#ASMSTART 9341; GFX940-NEXT: ; def s[2:3] 9342; GFX940-NEXT: ;;#ASMEND 9343; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 9344; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 9345; GFX940-NEXT: ;;#ASMSTART 9346; GFX940-NEXT: ; use s[8:9] 9347; GFX940-NEXT: ;;#ASMEND 9348; GFX940-NEXT: s_setpc_b64 s[30:31] 9349 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9350 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9351 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9352 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9353 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 0> 9354 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9355 ret void 9356} 9357 9358define void @s_shuffle_v4i16_v3i16__5_5_4_0() { 9359; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_0: 9360; GFX900: ; %bb.0: 9361; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9362; GFX900-NEXT: ;;#ASMSTART 9363; GFX900-NEXT: ; def s[4:5] 9364; GFX900-NEXT: ;;#ASMEND 9365; GFX900-NEXT: ;;#ASMSTART 9366; GFX900-NEXT: ; def s[6:7] 9367; GFX900-NEXT: ;;#ASMEND 9368; GFX900-NEXT: s_lshr_b32 s5, s6, 16 9369; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4 9370; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9371; GFX900-NEXT: ;;#ASMSTART 9372; GFX900-NEXT: ; use s[8:9] 9373; GFX900-NEXT: ;;#ASMEND 9374; GFX900-NEXT: s_setpc_b64 s[30:31] 9375; 9376; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_0: 9377; GFX90A: ; %bb.0: 9378; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9379; GFX90A-NEXT: ;;#ASMSTART 9380; GFX90A-NEXT: ; def s[4:5] 9381; GFX90A-NEXT: ;;#ASMEND 9382; GFX90A-NEXT: ;;#ASMSTART 9383; GFX90A-NEXT: ; def s[6:7] 9384; GFX90A-NEXT: ;;#ASMEND 9385; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 9386; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4 9387; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9388; GFX90A-NEXT: ;;#ASMSTART 9389; GFX90A-NEXT: ; use s[8:9] 9390; GFX90A-NEXT: ;;#ASMEND 9391; GFX90A-NEXT: s_setpc_b64 s[30:31] 9392; 9393; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_0: 9394; GFX940: ; %bb.0: 9395; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9396; GFX940-NEXT: ;;#ASMSTART 9397; GFX940-NEXT: ; def s[0:1] 9398; GFX940-NEXT: ;;#ASMEND 9399; GFX940-NEXT: ;;#ASMSTART 9400; GFX940-NEXT: ; def s[2:3] 9401; GFX940-NEXT: ;;#ASMEND 9402; GFX940-NEXT: s_lshr_b32 s1, s2, 16 9403; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 9404; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 9405; GFX940-NEXT: ;;#ASMSTART 9406; GFX940-NEXT: ; use s[8:9] 9407; GFX940-NEXT: ;;#ASMEND 9408; GFX940-NEXT: s_setpc_b64 s[30:31] 9409 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9410 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9411 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9412 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9413 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 0> 9414 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9415 ret void 9416} 9417 9418define void @s_shuffle_v4i16_v3i16__u_1_1_1() { 9419; GFX9-LABEL: s_shuffle_v4i16_v3i16__u_1_1_1: 9420; GFX9: ; %bb.0: 9421; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9422; GFX9-NEXT: ;;#ASMSTART 9423; GFX9-NEXT: ; def s[8:9] 9424; GFX9-NEXT: ;;#ASMEND 9425; GFX9-NEXT: s_pack_hh_b32_b16 s9, s8, s8 9426; GFX9-NEXT: ;;#ASMSTART 9427; GFX9-NEXT: ; use s[8:9] 9428; GFX9-NEXT: ;;#ASMEND 9429; GFX9-NEXT: s_setpc_b64 s[30:31] 9430 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9431 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9432 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1> 9433 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9434 ret void 9435} 9436 9437define void @s_shuffle_v4i16_v3i16__0_1_1_1() { 9438; GFX9-LABEL: s_shuffle_v4i16_v3i16__0_1_1_1: 9439; GFX9: ; %bb.0: 9440; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9441; GFX9-NEXT: ;;#ASMSTART 9442; GFX9-NEXT: ; def s[8:9] 9443; GFX9-NEXT: ;;#ASMEND 9444; GFX9-NEXT: s_pack_hh_b32_b16 s9, s8, s8 9445; GFX9-NEXT: ;;#ASMSTART 9446; GFX9-NEXT: ; use s[8:9] 9447; GFX9-NEXT: ;;#ASMEND 9448; GFX9-NEXT: s_setpc_b64 s[30:31] 9449 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9450 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9451 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> 9452 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9453 ret void 9454} 9455 9456define void @s_shuffle_v4i16_v3i16__1_1_1_1() { 9457; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_1_1_1: 9458; GFX900: ; %bb.0: 9459; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9460; GFX900-NEXT: ;;#ASMSTART 9461; GFX900-NEXT: ; def s[4:5] 9462; GFX900-NEXT: ;;#ASMEND 9463; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4 9464; GFX900-NEXT: s_mov_b32 s9, s8 9465; GFX900-NEXT: ;;#ASMSTART 9466; GFX900-NEXT: ; use s[8:9] 9467; GFX900-NEXT: ;;#ASMEND 9468; GFX900-NEXT: s_setpc_b64 s[30:31] 9469; 9470; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_1_1_1: 9471; GFX90A: ; %bb.0: 9472; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9473; GFX90A-NEXT: ;;#ASMSTART 9474; GFX90A-NEXT: ; def s[4:5] 9475; GFX90A-NEXT: ;;#ASMEND 9476; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4 9477; GFX90A-NEXT: s_mov_b32 s9, s8 9478; GFX90A-NEXT: ;;#ASMSTART 9479; GFX90A-NEXT: ; use s[8:9] 9480; GFX90A-NEXT: ;;#ASMEND 9481; GFX90A-NEXT: s_setpc_b64 s[30:31] 9482; 9483; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_1_1_1: 9484; GFX940: ; %bb.0: 9485; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9486; GFX940-NEXT: ;;#ASMSTART 9487; GFX940-NEXT: ; def s[0:1] 9488; GFX940-NEXT: ;;#ASMEND 9489; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 9490; GFX940-NEXT: s_mov_b32 s9, s8 9491; GFX940-NEXT: ;;#ASMSTART 9492; GFX940-NEXT: ; use s[8:9] 9493; GFX940-NEXT: ;;#ASMEND 9494; GFX940-NEXT: s_setpc_b64 s[30:31] 9495 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9496 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9497 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9498 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9499 ret void 9500} 9501 9502define void @s_shuffle_v4i16_v3i16__2_1_1_1() { 9503; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_1_1_1: 9504; GFX900: ; %bb.0: 9505; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9506; GFX900-NEXT: ;;#ASMSTART 9507; GFX900-NEXT: ; def s[4:5] 9508; GFX900-NEXT: ;;#ASMEND 9509; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 9510; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9511; GFX900-NEXT: ;;#ASMSTART 9512; GFX900-NEXT: ; use s[8:9] 9513; GFX900-NEXT: ;;#ASMEND 9514; GFX900-NEXT: s_setpc_b64 s[30:31] 9515; 9516; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_1_1_1: 9517; GFX90A: ; %bb.0: 9518; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9519; GFX90A-NEXT: ;;#ASMSTART 9520; GFX90A-NEXT: ; def s[4:5] 9521; GFX90A-NEXT: ;;#ASMEND 9522; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 9523; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9524; GFX90A-NEXT: ;;#ASMSTART 9525; GFX90A-NEXT: ; use s[8:9] 9526; GFX90A-NEXT: ;;#ASMEND 9527; GFX90A-NEXT: s_setpc_b64 s[30:31] 9528; 9529; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_1_1_1: 9530; GFX940: ; %bb.0: 9531; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9532; GFX940-NEXT: ;;#ASMSTART 9533; GFX940-NEXT: ; def s[0:1] 9534; GFX940-NEXT: ;;#ASMEND 9535; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 9536; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 9537; GFX940-NEXT: ;;#ASMSTART 9538; GFX940-NEXT: ; use s[8:9] 9539; GFX940-NEXT: ;;#ASMEND 9540; GFX940-NEXT: s_setpc_b64 s[30:31] 9541 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9542 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9543 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 9544 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9545 ret void 9546} 9547 9548define void @s_shuffle_v4i16_v3i16__3_1_1_1() { 9549; GFX9-LABEL: s_shuffle_v4i16_v3i16__3_1_1_1: 9550; GFX9: ; %bb.0: 9551; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9552; GFX9-NEXT: ;;#ASMSTART 9553; GFX9-NEXT: ; def s[8:9] 9554; GFX9-NEXT: ;;#ASMEND 9555; GFX9-NEXT: s_pack_hh_b32_b16 s9, s8, s8 9556; GFX9-NEXT: ;;#ASMSTART 9557; GFX9-NEXT: ; use s[8:9] 9558; GFX9-NEXT: ;;#ASMEND 9559; GFX9-NEXT: s_setpc_b64 s[30:31] 9560 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9561 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9562 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 9563 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9564 ret void 9565} 9566 9567define void @s_shuffle_v4i16_v3i16__4_1_1_1() { 9568; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_1_1_1: 9569; GFX900: ; %bb.0: 9570; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9571; GFX900-NEXT: ;;#ASMSTART 9572; GFX900-NEXT: ; def s[4:5] 9573; GFX900-NEXT: ;;#ASMEND 9574; GFX900-NEXT: ;;#ASMSTART 9575; GFX900-NEXT: ; def s[6:7] 9576; GFX900-NEXT: ;;#ASMEND 9577; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s4 9578; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9579; GFX900-NEXT: ;;#ASMSTART 9580; GFX900-NEXT: ; use s[8:9] 9581; GFX900-NEXT: ;;#ASMEND 9582; GFX900-NEXT: s_setpc_b64 s[30:31] 9583; 9584; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_1_1_1: 9585; GFX90A: ; %bb.0: 9586; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9587; GFX90A-NEXT: ;;#ASMSTART 9588; GFX90A-NEXT: ; def s[4:5] 9589; GFX90A-NEXT: ;;#ASMEND 9590; GFX90A-NEXT: ;;#ASMSTART 9591; GFX90A-NEXT: ; def s[6:7] 9592; GFX90A-NEXT: ;;#ASMEND 9593; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s4 9594; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9595; GFX90A-NEXT: ;;#ASMSTART 9596; GFX90A-NEXT: ; use s[8:9] 9597; GFX90A-NEXT: ;;#ASMEND 9598; GFX90A-NEXT: s_setpc_b64 s[30:31] 9599; 9600; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_1_1_1: 9601; GFX940: ; %bb.0: 9602; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9603; GFX940-NEXT: ;;#ASMSTART 9604; GFX940-NEXT: ; def s[0:1] 9605; GFX940-NEXT: ;;#ASMEND 9606; GFX940-NEXT: ;;#ASMSTART 9607; GFX940-NEXT: ; def s[2:3] 9608; GFX940-NEXT: ;;#ASMEND 9609; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 9610; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 9611; GFX940-NEXT: ;;#ASMSTART 9612; GFX940-NEXT: ; use s[8:9] 9613; GFX940-NEXT: ;;#ASMEND 9614; GFX940-NEXT: s_setpc_b64 s[30:31] 9615 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9616 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9617 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9618 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9619 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 1, i32 1, i32 1> 9620 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9621 ret void 9622} 9623 9624define void @s_shuffle_v4i16_v3i16__5_1_1_1() { 9625; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_1_1: 9626; GFX900: ; %bb.0: 9627; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9628; GFX900-NEXT: ;;#ASMSTART 9629; GFX900-NEXT: ; def s[4:5] 9630; GFX900-NEXT: ;;#ASMEND 9631; GFX900-NEXT: ;;#ASMSTART 9632; GFX900-NEXT: ; def s[6:7] 9633; GFX900-NEXT: ;;#ASMEND 9634; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 9635; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9636; GFX900-NEXT: ;;#ASMSTART 9637; GFX900-NEXT: ; use s[8:9] 9638; GFX900-NEXT: ;;#ASMEND 9639; GFX900-NEXT: s_setpc_b64 s[30:31] 9640; 9641; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_1_1: 9642; GFX90A: ; %bb.0: 9643; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9644; GFX90A-NEXT: ;;#ASMSTART 9645; GFX90A-NEXT: ; def s[4:5] 9646; GFX90A-NEXT: ;;#ASMEND 9647; GFX90A-NEXT: ;;#ASMSTART 9648; GFX90A-NEXT: ; def s[6:7] 9649; GFX90A-NEXT: ;;#ASMEND 9650; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 9651; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9652; GFX90A-NEXT: ;;#ASMSTART 9653; GFX90A-NEXT: ; use s[8:9] 9654; GFX90A-NEXT: ;;#ASMEND 9655; GFX90A-NEXT: s_setpc_b64 s[30:31] 9656; 9657; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_1_1: 9658; GFX940: ; %bb.0: 9659; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9660; GFX940-NEXT: ;;#ASMSTART 9661; GFX940-NEXT: ; def s[0:1] 9662; GFX940-NEXT: ;;#ASMEND 9663; GFX940-NEXT: ;;#ASMSTART 9664; GFX940-NEXT: ; def s[2:3] 9665; GFX940-NEXT: ;;#ASMEND 9666; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 9667; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 9668; GFX940-NEXT: ;;#ASMSTART 9669; GFX940-NEXT: ; use s[8:9] 9670; GFX940-NEXT: ;;#ASMEND 9671; GFX940-NEXT: s_setpc_b64 s[30:31] 9672 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9673 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9674 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9675 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9676 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 1, i32 1> 9677 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9678 ret void 9679} 9680 9681define void @s_shuffle_v4i16_v3i16__5_u_1_1() { 9682; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_1_1: 9683; GFX900: ; %bb.0: 9684; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9685; GFX900-NEXT: ;;#ASMSTART 9686; GFX900-NEXT: ; def s[4:5] 9687; GFX900-NEXT: ;;#ASMEND 9688; GFX900-NEXT: ;;#ASMSTART 9689; GFX900-NEXT: ; def s[6:7] 9690; GFX900-NEXT: ;;#ASMEND 9691; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9692; GFX900-NEXT: s_mov_b32 s8, s7 9693; GFX900-NEXT: ;;#ASMSTART 9694; GFX900-NEXT: ; use s[8:9] 9695; GFX900-NEXT: ;;#ASMEND 9696; GFX900-NEXT: s_setpc_b64 s[30:31] 9697; 9698; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_1_1: 9699; GFX90A: ; %bb.0: 9700; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9701; GFX90A-NEXT: ;;#ASMSTART 9702; GFX90A-NEXT: ; def s[4:5] 9703; GFX90A-NEXT: ;;#ASMEND 9704; GFX90A-NEXT: ;;#ASMSTART 9705; GFX90A-NEXT: ; def s[6:7] 9706; GFX90A-NEXT: ;;#ASMEND 9707; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9708; GFX90A-NEXT: s_mov_b32 s8, s7 9709; GFX90A-NEXT: ;;#ASMSTART 9710; GFX90A-NEXT: ; use s[8:9] 9711; GFX90A-NEXT: ;;#ASMEND 9712; GFX90A-NEXT: s_setpc_b64 s[30:31] 9713; 9714; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_1_1: 9715; GFX940: ; %bb.0: 9716; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9717; GFX940-NEXT: ;;#ASMSTART 9718; GFX940-NEXT: ; def s[0:1] 9719; GFX940-NEXT: ;;#ASMEND 9720; GFX940-NEXT: ;;#ASMSTART 9721; GFX940-NEXT: ; def s[2:3] 9722; GFX940-NEXT: ;;#ASMEND 9723; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 9724; GFX940-NEXT: s_mov_b32 s8, s3 9725; GFX940-NEXT: ;;#ASMSTART 9726; GFX940-NEXT: ; use s[8:9] 9727; GFX940-NEXT: ;;#ASMEND 9728; GFX940-NEXT: s_setpc_b64 s[30:31] 9729 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9730 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9731 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9732 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9733 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 1, i32 1> 9734 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9735 ret void 9736} 9737 9738define void @s_shuffle_v4i16_v3i16__5_0_1_1() { 9739; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_1_1: 9740; GFX900: ; %bb.0: 9741; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9742; GFX900-NEXT: ;;#ASMSTART 9743; GFX900-NEXT: ; def s[4:5] 9744; GFX900-NEXT: ;;#ASMEND 9745; GFX900-NEXT: ;;#ASMSTART 9746; GFX900-NEXT: ; def s[6:7] 9747; GFX900-NEXT: ;;#ASMEND 9748; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 9749; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9750; GFX900-NEXT: ;;#ASMSTART 9751; GFX900-NEXT: ; use s[8:9] 9752; GFX900-NEXT: ;;#ASMEND 9753; GFX900-NEXT: s_setpc_b64 s[30:31] 9754; 9755; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_1_1: 9756; GFX90A: ; %bb.0: 9757; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9758; GFX90A-NEXT: ;;#ASMSTART 9759; GFX90A-NEXT: ; def s[4:5] 9760; GFX90A-NEXT: ;;#ASMEND 9761; GFX90A-NEXT: ;;#ASMSTART 9762; GFX90A-NEXT: ; def s[6:7] 9763; GFX90A-NEXT: ;;#ASMEND 9764; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 9765; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9766; GFX90A-NEXT: ;;#ASMSTART 9767; GFX90A-NEXT: ; use s[8:9] 9768; GFX90A-NEXT: ;;#ASMEND 9769; GFX90A-NEXT: s_setpc_b64 s[30:31] 9770; 9771; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_1_1: 9772; GFX940: ; %bb.0: 9773; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9774; GFX940-NEXT: ;;#ASMSTART 9775; GFX940-NEXT: ; def s[0:1] 9776; GFX940-NEXT: ;;#ASMEND 9777; GFX940-NEXT: ;;#ASMSTART 9778; GFX940-NEXT: ; def s[2:3] 9779; GFX940-NEXT: ;;#ASMEND 9780; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 9781; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 9782; GFX940-NEXT: ;;#ASMSTART 9783; GFX940-NEXT: ; use s[8:9] 9784; GFX940-NEXT: ;;#ASMEND 9785; GFX940-NEXT: s_setpc_b64 s[30:31] 9786 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9787 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9788 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9789 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9790 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 1, i32 1> 9791 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9792 ret void 9793} 9794 9795define void @s_shuffle_v4i16_v3i16__5_2_1_1() { 9796; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_1_1: 9797; GFX900: ; %bb.0: 9798; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9799; GFX900-NEXT: ;;#ASMSTART 9800; GFX900-NEXT: ; def s[4:5] 9801; GFX900-NEXT: ;;#ASMEND 9802; GFX900-NEXT: ;;#ASMSTART 9803; GFX900-NEXT: ; def s[6:7] 9804; GFX900-NEXT: ;;#ASMEND 9805; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 9806; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9807; GFX900-NEXT: ;;#ASMSTART 9808; GFX900-NEXT: ; use s[8:9] 9809; GFX900-NEXT: ;;#ASMEND 9810; GFX900-NEXT: s_setpc_b64 s[30:31] 9811; 9812; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_1_1: 9813; GFX90A: ; %bb.0: 9814; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9815; GFX90A-NEXT: ;;#ASMSTART 9816; GFX90A-NEXT: ; def s[4:5] 9817; GFX90A-NEXT: ;;#ASMEND 9818; GFX90A-NEXT: ;;#ASMSTART 9819; GFX90A-NEXT: ; def s[6:7] 9820; GFX90A-NEXT: ;;#ASMEND 9821; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 9822; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9823; GFX90A-NEXT: ;;#ASMSTART 9824; GFX90A-NEXT: ; use s[8:9] 9825; GFX90A-NEXT: ;;#ASMEND 9826; GFX90A-NEXT: s_setpc_b64 s[30:31] 9827; 9828; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_1_1: 9829; GFX940: ; %bb.0: 9830; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9831; GFX940-NEXT: ;;#ASMSTART 9832; GFX940-NEXT: ; def s[0:1] 9833; GFX940-NEXT: ;;#ASMEND 9834; GFX940-NEXT: ;;#ASMSTART 9835; GFX940-NEXT: ; def s[2:3] 9836; GFX940-NEXT: ;;#ASMEND 9837; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 9838; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 9839; GFX940-NEXT: ;;#ASMSTART 9840; GFX940-NEXT: ; use s[8:9] 9841; GFX940-NEXT: ;;#ASMEND 9842; GFX940-NEXT: s_setpc_b64 s[30:31] 9843 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9844 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9845 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9846 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9847 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 1, i32 1> 9848 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9849 ret void 9850} 9851 9852define void @s_shuffle_v4i16_v3i16__5_3_1_1() { 9853; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_1_1: 9854; GFX900: ; %bb.0: 9855; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9856; GFX900-NEXT: ;;#ASMSTART 9857; GFX900-NEXT: ; def s[4:5] 9858; GFX900-NEXT: ;;#ASMEND 9859; GFX900-NEXT: ;;#ASMSTART 9860; GFX900-NEXT: ; def s[6:7] 9861; GFX900-NEXT: ;;#ASMEND 9862; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s6 9863; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9864; GFX900-NEXT: ;;#ASMSTART 9865; GFX900-NEXT: ; use s[8:9] 9866; GFX900-NEXT: ;;#ASMEND 9867; GFX900-NEXT: s_setpc_b64 s[30:31] 9868; 9869; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_1_1: 9870; GFX90A: ; %bb.0: 9871; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9872; GFX90A-NEXT: ;;#ASMSTART 9873; GFX90A-NEXT: ; def s[4:5] 9874; GFX90A-NEXT: ;;#ASMEND 9875; GFX90A-NEXT: ;;#ASMSTART 9876; GFX90A-NEXT: ; def s[6:7] 9877; GFX90A-NEXT: ;;#ASMEND 9878; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s6 9879; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9880; GFX90A-NEXT: ;;#ASMSTART 9881; GFX90A-NEXT: ; use s[8:9] 9882; GFX90A-NEXT: ;;#ASMEND 9883; GFX90A-NEXT: s_setpc_b64 s[30:31] 9884; 9885; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_1_1: 9886; GFX940: ; %bb.0: 9887; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9888; GFX940-NEXT: ;;#ASMSTART 9889; GFX940-NEXT: ; def s[0:1] 9890; GFX940-NEXT: ;;#ASMEND 9891; GFX940-NEXT: ;;#ASMSTART 9892; GFX940-NEXT: ; def s[2:3] 9893; GFX940-NEXT: ;;#ASMEND 9894; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 9895; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 9896; GFX940-NEXT: ;;#ASMSTART 9897; GFX940-NEXT: ; use s[8:9] 9898; GFX940-NEXT: ;;#ASMEND 9899; GFX940-NEXT: s_setpc_b64 s[30:31] 9900 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9901 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9902 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9903 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9904 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 1, i32 1> 9905 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9906 ret void 9907} 9908 9909define void @s_shuffle_v4i16_v3i16__5_4_1_1() { 9910; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_1_1: 9911; GFX900: ; %bb.0: 9912; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9913; GFX900-NEXT: ;;#ASMSTART 9914; GFX900-NEXT: ; def s[4:5] 9915; GFX900-NEXT: ;;#ASMEND 9916; GFX900-NEXT: ;;#ASMSTART 9917; GFX900-NEXT: ; def s[6:7] 9918; GFX900-NEXT: ;;#ASMEND 9919; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s6 9920; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9921; GFX900-NEXT: ;;#ASMSTART 9922; GFX900-NEXT: ; use s[8:9] 9923; GFX900-NEXT: ;;#ASMEND 9924; GFX900-NEXT: s_setpc_b64 s[30:31] 9925; 9926; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_1_1: 9927; GFX90A: ; %bb.0: 9928; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9929; GFX90A-NEXT: ;;#ASMSTART 9930; GFX90A-NEXT: ; def s[4:5] 9931; GFX90A-NEXT: ;;#ASMEND 9932; GFX90A-NEXT: ;;#ASMSTART 9933; GFX90A-NEXT: ; def s[6:7] 9934; GFX90A-NEXT: ;;#ASMEND 9935; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s6 9936; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9937; GFX90A-NEXT: ;;#ASMSTART 9938; GFX90A-NEXT: ; use s[8:9] 9939; GFX90A-NEXT: ;;#ASMEND 9940; GFX90A-NEXT: s_setpc_b64 s[30:31] 9941; 9942; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_1_1: 9943; GFX940: ; %bb.0: 9944; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9945; GFX940-NEXT: ;;#ASMSTART 9946; GFX940-NEXT: ; def s[0:1] 9947; GFX940-NEXT: ;;#ASMEND 9948; GFX940-NEXT: ;;#ASMSTART 9949; GFX940-NEXT: ; def s[2:3] 9950; GFX940-NEXT: ;;#ASMEND 9951; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 9952; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 9953; GFX940-NEXT: ;;#ASMSTART 9954; GFX940-NEXT: ; use s[8:9] 9955; GFX940-NEXT: ;;#ASMEND 9956; GFX940-NEXT: s_setpc_b64 s[30:31] 9957 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9958 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9959 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9960 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 9961 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 1, i32 1> 9962 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 9963 ret void 9964} 9965 9966define void @s_shuffle_v4i16_v3i16__5_5_1_1() { 9967; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_1: 9968; GFX900: ; %bb.0: 9969; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9970; GFX900-NEXT: ;;#ASMSTART 9971; GFX900-NEXT: ; def s[4:5] 9972; GFX900-NEXT: ;;#ASMEND 9973; GFX900-NEXT: ;;#ASMSTART 9974; GFX900-NEXT: ; def s[6:7] 9975; GFX900-NEXT: ;;#ASMEND 9976; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9977; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9978; GFX900-NEXT: ;;#ASMSTART 9979; GFX900-NEXT: ; use s[8:9] 9980; GFX900-NEXT: ;;#ASMEND 9981; GFX900-NEXT: s_setpc_b64 s[30:31] 9982; 9983; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_1: 9984; GFX90A: ; %bb.0: 9985; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9986; GFX90A-NEXT: ;;#ASMSTART 9987; GFX90A-NEXT: ; def s[4:5] 9988; GFX90A-NEXT: ;;#ASMEND 9989; GFX90A-NEXT: ;;#ASMSTART 9990; GFX90A-NEXT: ; def s[6:7] 9991; GFX90A-NEXT: ;;#ASMEND 9992; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 9993; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 9994; GFX90A-NEXT: ;;#ASMSTART 9995; GFX90A-NEXT: ; use s[8:9] 9996; GFX90A-NEXT: ;;#ASMEND 9997; GFX90A-NEXT: s_setpc_b64 s[30:31] 9998; 9999; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_1: 10000; GFX940: ; %bb.0: 10001; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10002; GFX940-NEXT: ;;#ASMSTART 10003; GFX940-NEXT: ; def s[0:1] 10004; GFX940-NEXT: ;;#ASMEND 10005; GFX940-NEXT: ;;#ASMSTART 10006; GFX940-NEXT: ; def s[2:3] 10007; GFX940-NEXT: ;;#ASMEND 10008; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 10009; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 10010; GFX940-NEXT: ;;#ASMSTART 10011; GFX940-NEXT: ; use s[8:9] 10012; GFX940-NEXT: ;;#ASMEND 10013; GFX940-NEXT: s_setpc_b64 s[30:31] 10014 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10015 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10016 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10017 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10018 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 1> 10019 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10020 ret void 10021} 10022 10023define void @s_shuffle_v4i16_v3i16__5_5_u_1() { 10024; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_1: 10025; GFX900: ; %bb.0: 10026; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10027; GFX900-NEXT: ;;#ASMSTART 10028; GFX900-NEXT: ; def s[4:5] 10029; GFX900-NEXT: ;;#ASMEND 10030; GFX900-NEXT: ;;#ASMSTART 10031; GFX900-NEXT: ; def s[6:7] 10032; GFX900-NEXT: ;;#ASMEND 10033; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10034; GFX900-NEXT: s_mov_b32 s9, s4 10035; GFX900-NEXT: ;;#ASMSTART 10036; GFX900-NEXT: ; use s[8:9] 10037; GFX900-NEXT: ;;#ASMEND 10038; GFX900-NEXT: s_setpc_b64 s[30:31] 10039; 10040; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_1: 10041; GFX90A: ; %bb.0: 10042; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10043; GFX90A-NEXT: ;;#ASMSTART 10044; GFX90A-NEXT: ; def s[4:5] 10045; GFX90A-NEXT: ;;#ASMEND 10046; GFX90A-NEXT: ;;#ASMSTART 10047; GFX90A-NEXT: ; def s[6:7] 10048; GFX90A-NEXT: ;;#ASMEND 10049; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10050; GFX90A-NEXT: s_mov_b32 s9, s4 10051; GFX90A-NEXT: ;;#ASMSTART 10052; GFX90A-NEXT: ; use s[8:9] 10053; GFX90A-NEXT: ;;#ASMEND 10054; GFX90A-NEXT: s_setpc_b64 s[30:31] 10055; 10056; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_1: 10057; GFX940: ; %bb.0: 10058; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10059; GFX940-NEXT: ;;#ASMSTART 10060; GFX940-NEXT: ; def s[0:1] 10061; GFX940-NEXT: ;;#ASMEND 10062; GFX940-NEXT: ;;#ASMSTART 10063; GFX940-NEXT: ; def s[2:3] 10064; GFX940-NEXT: ;;#ASMEND 10065; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 10066; GFX940-NEXT: s_mov_b32 s9, s0 10067; GFX940-NEXT: ;;#ASMSTART 10068; GFX940-NEXT: ; use s[8:9] 10069; GFX940-NEXT: ;;#ASMEND 10070; GFX940-NEXT: s_setpc_b64 s[30:31] 10071 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10072 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10073 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10074 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10075 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 1> 10076 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10077 ret void 10078} 10079 10080define void @s_shuffle_v4i16_v3i16__5_5_0_1() { 10081; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_1: 10082; GFX900: ; %bb.0: 10083; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10084; GFX900-NEXT: ;;#ASMSTART 10085; GFX900-NEXT: ; def s[4:5] 10086; GFX900-NEXT: ;;#ASMEND 10087; GFX900-NEXT: ;;#ASMSTART 10088; GFX900-NEXT: ; def s[6:7] 10089; GFX900-NEXT: ;;#ASMEND 10090; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10091; GFX900-NEXT: s_mov_b32 s9, s4 10092; GFX900-NEXT: ;;#ASMSTART 10093; GFX900-NEXT: ; use s[8:9] 10094; GFX900-NEXT: ;;#ASMEND 10095; GFX900-NEXT: s_setpc_b64 s[30:31] 10096; 10097; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_1: 10098; GFX90A: ; %bb.0: 10099; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10100; GFX90A-NEXT: ;;#ASMSTART 10101; GFX90A-NEXT: ; def s[4:5] 10102; GFX90A-NEXT: ;;#ASMEND 10103; GFX90A-NEXT: ;;#ASMSTART 10104; GFX90A-NEXT: ; def s[6:7] 10105; GFX90A-NEXT: ;;#ASMEND 10106; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10107; GFX90A-NEXT: s_mov_b32 s9, s4 10108; GFX90A-NEXT: ;;#ASMSTART 10109; GFX90A-NEXT: ; use s[8:9] 10110; GFX90A-NEXT: ;;#ASMEND 10111; GFX90A-NEXT: s_setpc_b64 s[30:31] 10112; 10113; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_1: 10114; GFX940: ; %bb.0: 10115; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10116; GFX940-NEXT: ;;#ASMSTART 10117; GFX940-NEXT: ; def s[0:1] 10118; GFX940-NEXT: ;;#ASMEND 10119; GFX940-NEXT: ;;#ASMSTART 10120; GFX940-NEXT: ; def s[2:3] 10121; GFX940-NEXT: ;;#ASMEND 10122; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 10123; GFX940-NEXT: s_mov_b32 s9, s0 10124; GFX940-NEXT: ;;#ASMSTART 10125; GFX940-NEXT: ; use s[8:9] 10126; GFX940-NEXT: ;;#ASMEND 10127; GFX940-NEXT: s_setpc_b64 s[30:31] 10128 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10129 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10130 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10131 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10132 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 1> 10133 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10134 ret void 10135} 10136 10137define void @s_shuffle_v4i16_v3i16__5_5_2_1() { 10138; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_1: 10139; GFX900: ; %bb.0: 10140; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10141; GFX900-NEXT: ;;#ASMSTART 10142; GFX900-NEXT: ; def s[4:5] 10143; GFX900-NEXT: ;;#ASMEND 10144; GFX900-NEXT: ;;#ASMSTART 10145; GFX900-NEXT: ; def s[6:7] 10146; GFX900-NEXT: ;;#ASMEND 10147; GFX900-NEXT: s_pack_lh_b32_b16 s9, s5, s4 10148; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10149; GFX900-NEXT: ;;#ASMSTART 10150; GFX900-NEXT: ; use s[8:9] 10151; GFX900-NEXT: ;;#ASMEND 10152; GFX900-NEXT: s_setpc_b64 s[30:31] 10153; 10154; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_1: 10155; GFX90A: ; %bb.0: 10156; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10157; GFX90A-NEXT: ;;#ASMSTART 10158; GFX90A-NEXT: ; def s[4:5] 10159; GFX90A-NEXT: ;;#ASMEND 10160; GFX90A-NEXT: ;;#ASMSTART 10161; GFX90A-NEXT: ; def s[6:7] 10162; GFX90A-NEXT: ;;#ASMEND 10163; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s5, s4 10164; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10165; GFX90A-NEXT: ;;#ASMSTART 10166; GFX90A-NEXT: ; use s[8:9] 10167; GFX90A-NEXT: ;;#ASMEND 10168; GFX90A-NEXT: s_setpc_b64 s[30:31] 10169; 10170; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_1: 10171; GFX940: ; %bb.0: 10172; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10173; GFX940-NEXT: ;;#ASMSTART 10174; GFX940-NEXT: ; def s[0:1] 10175; GFX940-NEXT: ;;#ASMEND 10176; GFX940-NEXT: ;;#ASMSTART 10177; GFX940-NEXT: ; def s[2:3] 10178; GFX940-NEXT: ;;#ASMEND 10179; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s0 10180; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 10181; GFX940-NEXT: ;;#ASMSTART 10182; GFX940-NEXT: ; use s[8:9] 10183; GFX940-NEXT: ;;#ASMEND 10184; GFX940-NEXT: s_setpc_b64 s[30:31] 10185 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10186 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10187 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10188 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10189 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 1> 10190 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10191 ret void 10192} 10193 10194define void @s_shuffle_v4i16_v3i16__5_5_3_1() { 10195; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_1: 10196; GFX900: ; %bb.0: 10197; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10198; GFX900-NEXT: ;;#ASMSTART 10199; GFX900-NEXT: ; def s[4:5] 10200; GFX900-NEXT: ;;#ASMEND 10201; GFX900-NEXT: ;;#ASMSTART 10202; GFX900-NEXT: ; def s[6:7] 10203; GFX900-NEXT: ;;#ASMEND 10204; GFX900-NEXT: s_pack_lh_b32_b16 s9, s6, s4 10205; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10206; GFX900-NEXT: ;;#ASMSTART 10207; GFX900-NEXT: ; use s[8:9] 10208; GFX900-NEXT: ;;#ASMEND 10209; GFX900-NEXT: s_setpc_b64 s[30:31] 10210; 10211; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_1: 10212; GFX90A: ; %bb.0: 10213; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10214; GFX90A-NEXT: ;;#ASMSTART 10215; GFX90A-NEXT: ; def s[4:5] 10216; GFX90A-NEXT: ;;#ASMEND 10217; GFX90A-NEXT: ;;#ASMSTART 10218; GFX90A-NEXT: ; def s[6:7] 10219; GFX90A-NEXT: ;;#ASMEND 10220; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s6, s4 10221; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10222; GFX90A-NEXT: ;;#ASMSTART 10223; GFX90A-NEXT: ; use s[8:9] 10224; GFX90A-NEXT: ;;#ASMEND 10225; GFX90A-NEXT: s_setpc_b64 s[30:31] 10226; 10227; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_1: 10228; GFX940: ; %bb.0: 10229; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10230; GFX940-NEXT: ;;#ASMSTART 10231; GFX940-NEXT: ; def s[0:1] 10232; GFX940-NEXT: ;;#ASMEND 10233; GFX940-NEXT: ;;#ASMSTART 10234; GFX940-NEXT: ; def s[2:3] 10235; GFX940-NEXT: ;;#ASMEND 10236; GFX940-NEXT: s_pack_lh_b32_b16 s9, s2, s0 10237; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 10238; GFX940-NEXT: ;;#ASMSTART 10239; GFX940-NEXT: ; use s[8:9] 10240; GFX940-NEXT: ;;#ASMEND 10241; GFX940-NEXT: s_setpc_b64 s[30:31] 10242 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10243 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10244 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10245 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10246 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 1> 10247 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10248 ret void 10249} 10250 10251define void @s_shuffle_v4i16_v3i16__5_5_4_1() { 10252; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_1: 10253; GFX900: ; %bb.0: 10254; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10255; GFX900-NEXT: ;;#ASMSTART 10256; GFX900-NEXT: ; def s[4:5] 10257; GFX900-NEXT: ;;#ASMEND 10258; GFX900-NEXT: ;;#ASMSTART 10259; GFX900-NEXT: ; def s[6:7] 10260; GFX900-NEXT: ;;#ASMEND 10261; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s4 10262; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10263; GFX900-NEXT: ;;#ASMSTART 10264; GFX900-NEXT: ; use s[8:9] 10265; GFX900-NEXT: ;;#ASMEND 10266; GFX900-NEXT: s_setpc_b64 s[30:31] 10267; 10268; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_1: 10269; GFX90A: ; %bb.0: 10270; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10271; GFX90A-NEXT: ;;#ASMSTART 10272; GFX90A-NEXT: ; def s[4:5] 10273; GFX90A-NEXT: ;;#ASMEND 10274; GFX90A-NEXT: ;;#ASMSTART 10275; GFX90A-NEXT: ; def s[6:7] 10276; GFX90A-NEXT: ;;#ASMEND 10277; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s4 10278; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10279; GFX90A-NEXT: ;;#ASMSTART 10280; GFX90A-NEXT: ; use s[8:9] 10281; GFX90A-NEXT: ;;#ASMEND 10282; GFX90A-NEXT: s_setpc_b64 s[30:31] 10283; 10284; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_1: 10285; GFX940: ; %bb.0: 10286; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10287; GFX940-NEXT: ;;#ASMSTART 10288; GFX940-NEXT: ; def s[0:1] 10289; GFX940-NEXT: ;;#ASMEND 10290; GFX940-NEXT: ;;#ASMSTART 10291; GFX940-NEXT: ; def s[2:3] 10292; GFX940-NEXT: ;;#ASMEND 10293; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s0 10294; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 10295; GFX940-NEXT: ;;#ASMSTART 10296; GFX940-NEXT: ; use s[8:9] 10297; GFX940-NEXT: ;;#ASMEND 10298; GFX940-NEXT: s_setpc_b64 s[30:31] 10299 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10300 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10301 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10302 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10303 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 1> 10304 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10305 ret void 10306} 10307 10308define void @s_shuffle_v4i16_v3i16__u_2_2_2() { 10309; GFX900-LABEL: s_shuffle_v4i16_v3i16__u_2_2_2: 10310; GFX900: ; %bb.0: 10311; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10312; GFX900-NEXT: ;;#ASMSTART 10313; GFX900-NEXT: ; def s[4:5] 10314; GFX900-NEXT: ;;#ASMEND 10315; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10316; GFX900-NEXT: s_lshl_b32 s8, s5, 16 10317; GFX900-NEXT: ;;#ASMSTART 10318; GFX900-NEXT: ; use s[8:9] 10319; GFX900-NEXT: ;;#ASMEND 10320; GFX900-NEXT: s_setpc_b64 s[30:31] 10321; 10322; GFX90A-LABEL: s_shuffle_v4i16_v3i16__u_2_2_2: 10323; GFX90A: ; %bb.0: 10324; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10325; GFX90A-NEXT: ;;#ASMSTART 10326; GFX90A-NEXT: ; def s[4:5] 10327; GFX90A-NEXT: ;;#ASMEND 10328; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10329; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 10330; GFX90A-NEXT: ;;#ASMSTART 10331; GFX90A-NEXT: ; use s[8:9] 10332; GFX90A-NEXT: ;;#ASMEND 10333; GFX90A-NEXT: s_setpc_b64 s[30:31] 10334; 10335; GFX940-LABEL: s_shuffle_v4i16_v3i16__u_2_2_2: 10336; GFX940: ; %bb.0: 10337; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10338; GFX940-NEXT: ;;#ASMSTART 10339; GFX940-NEXT: ; def s[0:1] 10340; GFX940-NEXT: ;;#ASMEND 10341; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10342; GFX940-NEXT: s_lshl_b32 s8, s1, 16 10343; GFX940-NEXT: ;;#ASMSTART 10344; GFX940-NEXT: ; use s[8:9] 10345; GFX940-NEXT: ;;#ASMEND 10346; GFX940-NEXT: s_setpc_b64 s[30:31] 10347 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10348 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10349 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2> 10350 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10351 ret void 10352} 10353 10354define void @s_shuffle_v4i16_v3i16__0_2_2_2() { 10355; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_2_2_2: 10356; GFX900: ; %bb.0: 10357; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10358; GFX900-NEXT: ;;#ASMSTART 10359; GFX900-NEXT: ; def s[4:5] 10360; GFX900-NEXT: ;;#ASMEND 10361; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 10362; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10363; GFX900-NEXT: ;;#ASMSTART 10364; GFX900-NEXT: ; use s[8:9] 10365; GFX900-NEXT: ;;#ASMEND 10366; GFX900-NEXT: s_setpc_b64 s[30:31] 10367; 10368; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_2_2_2: 10369; GFX90A: ; %bb.0: 10370; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10371; GFX90A-NEXT: ;;#ASMSTART 10372; GFX90A-NEXT: ; def s[4:5] 10373; GFX90A-NEXT: ;;#ASMEND 10374; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 10375; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10376; GFX90A-NEXT: ;;#ASMSTART 10377; GFX90A-NEXT: ; use s[8:9] 10378; GFX90A-NEXT: ;;#ASMEND 10379; GFX90A-NEXT: s_setpc_b64 s[30:31] 10380; 10381; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_2_2_2: 10382; GFX940: ; %bb.0: 10383; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10384; GFX940-NEXT: ;;#ASMSTART 10385; GFX940-NEXT: ; def s[0:1] 10386; GFX940-NEXT: ;;#ASMEND 10387; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 10388; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10389; GFX940-NEXT: ;;#ASMSTART 10390; GFX940-NEXT: ; use s[8:9] 10391; GFX940-NEXT: ;;#ASMEND 10392; GFX940-NEXT: s_setpc_b64 s[30:31] 10393 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10394 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10395 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2> 10396 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10397 ret void 10398} 10399 10400define void @s_shuffle_v4i16_v3i16__1_2_2_2() { 10401; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_2_2_2: 10402; GFX900: ; %bb.0: 10403; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10404; GFX900-NEXT: ;;#ASMSTART 10405; GFX900-NEXT: ; def s[4:5] 10406; GFX900-NEXT: ;;#ASMEND 10407; GFX900-NEXT: s_lshr_b32 s4, s4, 16 10408; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 10409; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10410; GFX900-NEXT: ;;#ASMSTART 10411; GFX900-NEXT: ; use s[8:9] 10412; GFX900-NEXT: ;;#ASMEND 10413; GFX900-NEXT: s_setpc_b64 s[30:31] 10414; 10415; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_2_2_2: 10416; GFX90A: ; %bb.0: 10417; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10418; GFX90A-NEXT: ;;#ASMSTART 10419; GFX90A-NEXT: ; def s[4:5] 10420; GFX90A-NEXT: ;;#ASMEND 10421; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 10422; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 10423; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10424; GFX90A-NEXT: ;;#ASMSTART 10425; GFX90A-NEXT: ; use s[8:9] 10426; GFX90A-NEXT: ;;#ASMEND 10427; GFX90A-NEXT: s_setpc_b64 s[30:31] 10428; 10429; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_2_2_2: 10430; GFX940: ; %bb.0: 10431; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10432; GFX940-NEXT: ;;#ASMSTART 10433; GFX940-NEXT: ; def s[0:1] 10434; GFX940-NEXT: ;;#ASMEND 10435; GFX940-NEXT: s_lshr_b32 s0, s0, 16 10436; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 10437; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10438; GFX940-NEXT: ;;#ASMSTART 10439; GFX940-NEXT: ; use s[8:9] 10440; GFX940-NEXT: ;;#ASMEND 10441; GFX940-NEXT: s_setpc_b64 s[30:31] 10442 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10443 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10444 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2> 10445 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10446 ret void 10447} 10448 10449define void @s_shuffle_v4i16_v3i16__2_2_2_2() { 10450; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_2_2_2: 10451; GFX900: ; %bb.0: 10452; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10453; GFX900-NEXT: ;;#ASMSTART 10454; GFX900-NEXT: ; def s[4:5] 10455; GFX900-NEXT: ;;#ASMEND 10456; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 10457; GFX900-NEXT: s_mov_b32 s9, s8 10458; GFX900-NEXT: ;;#ASMSTART 10459; GFX900-NEXT: ; use s[8:9] 10460; GFX900-NEXT: ;;#ASMEND 10461; GFX900-NEXT: s_setpc_b64 s[30:31] 10462; 10463; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_2_2_2: 10464; GFX90A: ; %bb.0: 10465; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10466; GFX90A-NEXT: ;;#ASMSTART 10467; GFX90A-NEXT: ; def s[4:5] 10468; GFX90A-NEXT: ;;#ASMEND 10469; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 10470; GFX90A-NEXT: s_mov_b32 s9, s8 10471; GFX90A-NEXT: ;;#ASMSTART 10472; GFX90A-NEXT: ; use s[8:9] 10473; GFX90A-NEXT: ;;#ASMEND 10474; GFX90A-NEXT: s_setpc_b64 s[30:31] 10475; 10476; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_2_2_2: 10477; GFX940: ; %bb.0: 10478; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10479; GFX940-NEXT: ;;#ASMSTART 10480; GFX940-NEXT: ; def s[0:1] 10481; GFX940-NEXT: ;;#ASMEND 10482; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 10483; GFX940-NEXT: s_mov_b32 s9, s8 10484; GFX940-NEXT: ;;#ASMSTART 10485; GFX940-NEXT: ; use s[8:9] 10486; GFX940-NEXT: ;;#ASMEND 10487; GFX940-NEXT: s_setpc_b64 s[30:31] 10488 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10489 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10490 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 10491 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10492 ret void 10493} 10494 10495define void @s_shuffle_v4i16_v3i16__3_2_2_2() { 10496; GFX900-LABEL: s_shuffle_v4i16_v3i16__3_2_2_2: 10497; GFX900: ; %bb.0: 10498; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10499; GFX900-NEXT: ;;#ASMSTART 10500; GFX900-NEXT: ; def s[4:5] 10501; GFX900-NEXT: ;;#ASMEND 10502; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10503; GFX900-NEXT: s_lshl_b32 s8, s5, 16 10504; GFX900-NEXT: ;;#ASMSTART 10505; GFX900-NEXT: ; use s[8:9] 10506; GFX900-NEXT: ;;#ASMEND 10507; GFX900-NEXT: s_setpc_b64 s[30:31] 10508; 10509; GFX90A-LABEL: s_shuffle_v4i16_v3i16__3_2_2_2: 10510; GFX90A: ; %bb.0: 10511; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10512; GFX90A-NEXT: ;;#ASMSTART 10513; GFX90A-NEXT: ; def s[4:5] 10514; GFX90A-NEXT: ;;#ASMEND 10515; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10516; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 10517; GFX90A-NEXT: ;;#ASMSTART 10518; GFX90A-NEXT: ; use s[8:9] 10519; GFX90A-NEXT: ;;#ASMEND 10520; GFX90A-NEXT: s_setpc_b64 s[30:31] 10521; 10522; GFX940-LABEL: s_shuffle_v4i16_v3i16__3_2_2_2: 10523; GFX940: ; %bb.0: 10524; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10525; GFX940-NEXT: ;;#ASMSTART 10526; GFX940-NEXT: ; def s[0:1] 10527; GFX940-NEXT: ;;#ASMEND 10528; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10529; GFX940-NEXT: s_lshl_b32 s8, s1, 16 10530; GFX940-NEXT: ;;#ASMSTART 10531; GFX940-NEXT: ; use s[8:9] 10532; GFX940-NEXT: ;;#ASMEND 10533; GFX940-NEXT: s_setpc_b64 s[30:31] 10534 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10535 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10536 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2> 10537 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10538 ret void 10539} 10540 10541define void @s_shuffle_v4i16_v3i16__4_2_2_2() { 10542; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_2_2_2: 10543; GFX900: ; %bb.0: 10544; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10545; GFX900-NEXT: ;;#ASMSTART 10546; GFX900-NEXT: ; def s[4:5] 10547; GFX900-NEXT: ;;#ASMEND 10548; GFX900-NEXT: ;;#ASMSTART 10549; GFX900-NEXT: ; def s[6:7] 10550; GFX900-NEXT: ;;#ASMEND 10551; GFX900-NEXT: s_lshr_b32 s4, s6, 16 10552; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 10553; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10554; GFX900-NEXT: ;;#ASMSTART 10555; GFX900-NEXT: ; use s[8:9] 10556; GFX900-NEXT: ;;#ASMEND 10557; GFX900-NEXT: s_setpc_b64 s[30:31] 10558; 10559; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_2_2_2: 10560; GFX90A: ; %bb.0: 10561; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10562; GFX90A-NEXT: ;;#ASMSTART 10563; GFX90A-NEXT: ; def s[4:5] 10564; GFX90A-NEXT: ;;#ASMEND 10565; GFX90A-NEXT: ;;#ASMSTART 10566; GFX90A-NEXT: ; def s[6:7] 10567; GFX90A-NEXT: ;;#ASMEND 10568; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 10569; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 10570; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10571; GFX90A-NEXT: ;;#ASMSTART 10572; GFX90A-NEXT: ; use s[8:9] 10573; GFX90A-NEXT: ;;#ASMEND 10574; GFX90A-NEXT: s_setpc_b64 s[30:31] 10575; 10576; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_2_2_2: 10577; GFX940: ; %bb.0: 10578; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10579; GFX940-NEXT: ;;#ASMSTART 10580; GFX940-NEXT: ; def s[0:1] 10581; GFX940-NEXT: ;;#ASMEND 10582; GFX940-NEXT: ;;#ASMSTART 10583; GFX940-NEXT: ; def s[2:3] 10584; GFX940-NEXT: ;;#ASMEND 10585; GFX940-NEXT: s_lshr_b32 s0, s2, 16 10586; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 10587; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10588; GFX940-NEXT: ;;#ASMSTART 10589; GFX940-NEXT: ; use s[8:9] 10590; GFX940-NEXT: ;;#ASMEND 10591; GFX940-NEXT: s_setpc_b64 s[30:31] 10592 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10593 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10594 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10595 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10596 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 2, i32 2, i32 2> 10597 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10598 ret void 10599} 10600 10601define void @s_shuffle_v4i16_v3i16__5_2_2_2() { 10602; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_2_2: 10603; GFX900: ; %bb.0: 10604; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10605; GFX900-NEXT: ;;#ASMSTART 10606; GFX900-NEXT: ; def s[4:5] 10607; GFX900-NEXT: ;;#ASMEND 10608; GFX900-NEXT: ;;#ASMSTART 10609; GFX900-NEXT: ; def s[6:7] 10610; GFX900-NEXT: ;;#ASMEND 10611; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 10612; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10613; GFX900-NEXT: ;;#ASMSTART 10614; GFX900-NEXT: ; use s[8:9] 10615; GFX900-NEXT: ;;#ASMEND 10616; GFX900-NEXT: s_setpc_b64 s[30:31] 10617; 10618; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_2_2: 10619; GFX90A: ; %bb.0: 10620; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10621; GFX90A-NEXT: ;;#ASMSTART 10622; GFX90A-NEXT: ; def s[4:5] 10623; GFX90A-NEXT: ;;#ASMEND 10624; GFX90A-NEXT: ;;#ASMSTART 10625; GFX90A-NEXT: ; def s[6:7] 10626; GFX90A-NEXT: ;;#ASMEND 10627; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 10628; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10629; GFX90A-NEXT: ;;#ASMSTART 10630; GFX90A-NEXT: ; use s[8:9] 10631; GFX90A-NEXT: ;;#ASMEND 10632; GFX90A-NEXT: s_setpc_b64 s[30:31] 10633; 10634; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_2_2: 10635; GFX940: ; %bb.0: 10636; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10637; GFX940-NEXT: ;;#ASMSTART 10638; GFX940-NEXT: ; def s[0:1] 10639; GFX940-NEXT: ;;#ASMEND 10640; GFX940-NEXT: ;;#ASMSTART 10641; GFX940-NEXT: ; def s[2:3] 10642; GFX940-NEXT: ;;#ASMEND 10643; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 10644; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10645; GFX940-NEXT: ;;#ASMSTART 10646; GFX940-NEXT: ; use s[8:9] 10647; GFX940-NEXT: ;;#ASMEND 10648; GFX940-NEXT: s_setpc_b64 s[30:31] 10649 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10650 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10651 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10652 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10653 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 2, i32 2> 10654 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10655 ret void 10656} 10657 10658define void @s_shuffle_v4i16_v3i16__5_u_2_2() { 10659; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_2_2: 10660; GFX900: ; %bb.0: 10661; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10662; GFX900-NEXT: ;;#ASMSTART 10663; GFX900-NEXT: ; def s[4:5] 10664; GFX900-NEXT: ;;#ASMEND 10665; GFX900-NEXT: ;;#ASMSTART 10666; GFX900-NEXT: ; def s[6:7] 10667; GFX900-NEXT: ;;#ASMEND 10668; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10669; GFX900-NEXT: s_mov_b32 s8, s7 10670; GFX900-NEXT: ;;#ASMSTART 10671; GFX900-NEXT: ; use s[8:9] 10672; GFX900-NEXT: ;;#ASMEND 10673; GFX900-NEXT: s_setpc_b64 s[30:31] 10674; 10675; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_2_2: 10676; GFX90A: ; %bb.0: 10677; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10678; GFX90A-NEXT: ;;#ASMSTART 10679; GFX90A-NEXT: ; def s[4:5] 10680; GFX90A-NEXT: ;;#ASMEND 10681; GFX90A-NEXT: ;;#ASMSTART 10682; GFX90A-NEXT: ; def s[6:7] 10683; GFX90A-NEXT: ;;#ASMEND 10684; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10685; GFX90A-NEXT: s_mov_b32 s8, s7 10686; GFX90A-NEXT: ;;#ASMSTART 10687; GFX90A-NEXT: ; use s[8:9] 10688; GFX90A-NEXT: ;;#ASMEND 10689; GFX90A-NEXT: s_setpc_b64 s[30:31] 10690; 10691; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_2_2: 10692; GFX940: ; %bb.0: 10693; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10694; GFX940-NEXT: ;;#ASMSTART 10695; GFX940-NEXT: ; def s[0:1] 10696; GFX940-NEXT: ;;#ASMEND 10697; GFX940-NEXT: ;;#ASMSTART 10698; GFX940-NEXT: ; def s[2:3] 10699; GFX940-NEXT: ;;#ASMEND 10700; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10701; GFX940-NEXT: s_mov_b32 s8, s3 10702; GFX940-NEXT: ;;#ASMSTART 10703; GFX940-NEXT: ; use s[8:9] 10704; GFX940-NEXT: ;;#ASMEND 10705; GFX940-NEXT: s_setpc_b64 s[30:31] 10706 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10707 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10708 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10709 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10710 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 2, i32 2> 10711 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10712 ret void 10713} 10714 10715define void @s_shuffle_v4i16_v3i16__5_0_2_2() { 10716; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_2_2: 10717; GFX900: ; %bb.0: 10718; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10719; GFX900-NEXT: ;;#ASMSTART 10720; GFX900-NEXT: ; def s[4:5] 10721; GFX900-NEXT: ;;#ASMEND 10722; GFX900-NEXT: ;;#ASMSTART 10723; GFX900-NEXT: ; def s[6:7] 10724; GFX900-NEXT: ;;#ASMEND 10725; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 10726; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10727; GFX900-NEXT: ;;#ASMSTART 10728; GFX900-NEXT: ; use s[8:9] 10729; GFX900-NEXT: ;;#ASMEND 10730; GFX900-NEXT: s_setpc_b64 s[30:31] 10731; 10732; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_2_2: 10733; GFX90A: ; %bb.0: 10734; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10735; GFX90A-NEXT: ;;#ASMSTART 10736; GFX90A-NEXT: ; def s[4:5] 10737; GFX90A-NEXT: ;;#ASMEND 10738; GFX90A-NEXT: ;;#ASMSTART 10739; GFX90A-NEXT: ; def s[6:7] 10740; GFX90A-NEXT: ;;#ASMEND 10741; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 10742; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10743; GFX90A-NEXT: ;;#ASMSTART 10744; GFX90A-NEXT: ; use s[8:9] 10745; GFX90A-NEXT: ;;#ASMEND 10746; GFX90A-NEXT: s_setpc_b64 s[30:31] 10747; 10748; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_2_2: 10749; GFX940: ; %bb.0: 10750; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10751; GFX940-NEXT: ;;#ASMSTART 10752; GFX940-NEXT: ; def s[0:1] 10753; GFX940-NEXT: ;;#ASMEND 10754; GFX940-NEXT: ;;#ASMSTART 10755; GFX940-NEXT: ; def s[2:3] 10756; GFX940-NEXT: ;;#ASMEND 10757; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 10758; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10759; GFX940-NEXT: ;;#ASMSTART 10760; GFX940-NEXT: ; use s[8:9] 10761; GFX940-NEXT: ;;#ASMEND 10762; GFX940-NEXT: s_setpc_b64 s[30:31] 10763 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10764 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10765 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10766 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10767 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 2, i32 2> 10768 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10769 ret void 10770} 10771 10772define void @s_shuffle_v4i16_v3i16__5_1_2_2() { 10773; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_2_2: 10774; GFX900: ; %bb.0: 10775; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10776; GFX900-NEXT: ;;#ASMSTART 10777; GFX900-NEXT: ; def s[4:5] 10778; GFX900-NEXT: ;;#ASMEND 10779; GFX900-NEXT: ;;#ASMSTART 10780; GFX900-NEXT: ; def s[6:7] 10781; GFX900-NEXT: ;;#ASMEND 10782; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 10783; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10784; GFX900-NEXT: ;;#ASMSTART 10785; GFX900-NEXT: ; use s[8:9] 10786; GFX900-NEXT: ;;#ASMEND 10787; GFX900-NEXT: s_setpc_b64 s[30:31] 10788; 10789; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_2_2: 10790; GFX90A: ; %bb.0: 10791; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10792; GFX90A-NEXT: ;;#ASMSTART 10793; GFX90A-NEXT: ; def s[4:5] 10794; GFX90A-NEXT: ;;#ASMEND 10795; GFX90A-NEXT: ;;#ASMSTART 10796; GFX90A-NEXT: ; def s[6:7] 10797; GFX90A-NEXT: ;;#ASMEND 10798; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 10799; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10800; GFX90A-NEXT: ;;#ASMSTART 10801; GFX90A-NEXT: ; use s[8:9] 10802; GFX90A-NEXT: ;;#ASMEND 10803; GFX90A-NEXT: s_setpc_b64 s[30:31] 10804; 10805; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_2_2: 10806; GFX940: ; %bb.0: 10807; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10808; GFX940-NEXT: ;;#ASMSTART 10809; GFX940-NEXT: ; def s[0:1] 10810; GFX940-NEXT: ;;#ASMEND 10811; GFX940-NEXT: ;;#ASMSTART 10812; GFX940-NEXT: ; def s[2:3] 10813; GFX940-NEXT: ;;#ASMEND 10814; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 10815; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10816; GFX940-NEXT: ;;#ASMSTART 10817; GFX940-NEXT: ; use s[8:9] 10818; GFX940-NEXT: ;;#ASMEND 10819; GFX940-NEXT: s_setpc_b64 s[30:31] 10820 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10821 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10822 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10823 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10824 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 2, i32 2> 10825 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10826 ret void 10827} 10828 10829define void @s_shuffle_v4i16_v3i16__5_3_2_2() { 10830; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_2_2: 10831; GFX900: ; %bb.0: 10832; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10833; GFX900-NEXT: ;;#ASMSTART 10834; GFX900-NEXT: ; def s[4:5] 10835; GFX900-NEXT: ;;#ASMEND 10836; GFX900-NEXT: ;;#ASMSTART 10837; GFX900-NEXT: ; def s[6:7] 10838; GFX900-NEXT: ;;#ASMEND 10839; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s6 10840; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10841; GFX900-NEXT: ;;#ASMSTART 10842; GFX900-NEXT: ; use s[8:9] 10843; GFX900-NEXT: ;;#ASMEND 10844; GFX900-NEXT: s_setpc_b64 s[30:31] 10845; 10846; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_2_2: 10847; GFX90A: ; %bb.0: 10848; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10849; GFX90A-NEXT: ;;#ASMSTART 10850; GFX90A-NEXT: ; def s[4:5] 10851; GFX90A-NEXT: ;;#ASMEND 10852; GFX90A-NEXT: ;;#ASMSTART 10853; GFX90A-NEXT: ; def s[6:7] 10854; GFX90A-NEXT: ;;#ASMEND 10855; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s6 10856; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10857; GFX90A-NEXT: ;;#ASMSTART 10858; GFX90A-NEXT: ; use s[8:9] 10859; GFX90A-NEXT: ;;#ASMEND 10860; GFX90A-NEXT: s_setpc_b64 s[30:31] 10861; 10862; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_2_2: 10863; GFX940: ; %bb.0: 10864; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10865; GFX940-NEXT: ;;#ASMSTART 10866; GFX940-NEXT: ; def s[0:1] 10867; GFX940-NEXT: ;;#ASMEND 10868; GFX940-NEXT: ;;#ASMSTART 10869; GFX940-NEXT: ; def s[2:3] 10870; GFX940-NEXT: ;;#ASMEND 10871; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 10872; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10873; GFX940-NEXT: ;;#ASMSTART 10874; GFX940-NEXT: ; use s[8:9] 10875; GFX940-NEXT: ;;#ASMEND 10876; GFX940-NEXT: s_setpc_b64 s[30:31] 10877 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10878 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10879 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10880 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10881 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 2, i32 2> 10882 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10883 ret void 10884} 10885 10886define void @s_shuffle_v4i16_v3i16__5_4_2_2() { 10887; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_2_2: 10888; GFX900: ; %bb.0: 10889; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10890; GFX900-NEXT: ;;#ASMSTART 10891; GFX900-NEXT: ; def s[4:5] 10892; GFX900-NEXT: ;;#ASMEND 10893; GFX900-NEXT: ;;#ASMSTART 10894; GFX900-NEXT: ; def s[6:7] 10895; GFX900-NEXT: ;;#ASMEND 10896; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s6 10897; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10898; GFX900-NEXT: ;;#ASMSTART 10899; GFX900-NEXT: ; use s[8:9] 10900; GFX900-NEXT: ;;#ASMEND 10901; GFX900-NEXT: s_setpc_b64 s[30:31] 10902; 10903; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_2_2: 10904; GFX90A: ; %bb.0: 10905; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10906; GFX90A-NEXT: ;;#ASMSTART 10907; GFX90A-NEXT: ; def s[4:5] 10908; GFX90A-NEXT: ;;#ASMEND 10909; GFX90A-NEXT: ;;#ASMSTART 10910; GFX90A-NEXT: ; def s[6:7] 10911; GFX90A-NEXT: ;;#ASMEND 10912; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s6 10913; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10914; GFX90A-NEXT: ;;#ASMSTART 10915; GFX90A-NEXT: ; use s[8:9] 10916; GFX90A-NEXT: ;;#ASMEND 10917; GFX90A-NEXT: s_setpc_b64 s[30:31] 10918; 10919; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_2_2: 10920; GFX940: ; %bb.0: 10921; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10922; GFX940-NEXT: ;;#ASMSTART 10923; GFX940-NEXT: ; def s[0:1] 10924; GFX940-NEXT: ;;#ASMEND 10925; GFX940-NEXT: ;;#ASMSTART 10926; GFX940-NEXT: ; def s[2:3] 10927; GFX940-NEXT: ;;#ASMEND 10928; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 10929; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10930; GFX940-NEXT: ;;#ASMSTART 10931; GFX940-NEXT: ; use s[8:9] 10932; GFX940-NEXT: ;;#ASMEND 10933; GFX940-NEXT: s_setpc_b64 s[30:31] 10934 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10935 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10936 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10937 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10938 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 2, i32 2> 10939 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10940 ret void 10941} 10942 10943define void @s_shuffle_v4i16_v3i16__5_5_2_2() { 10944; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_2: 10945; GFX900: ; %bb.0: 10946; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10947; GFX900-NEXT: ;;#ASMSTART 10948; GFX900-NEXT: ; def s[4:5] 10949; GFX900-NEXT: ;;#ASMEND 10950; GFX900-NEXT: ;;#ASMSTART 10951; GFX900-NEXT: ; def s[6:7] 10952; GFX900-NEXT: ;;#ASMEND 10953; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10954; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10955; GFX900-NEXT: ;;#ASMSTART 10956; GFX900-NEXT: ; use s[8:9] 10957; GFX900-NEXT: ;;#ASMEND 10958; GFX900-NEXT: s_setpc_b64 s[30:31] 10959; 10960; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_2: 10961; GFX90A: ; %bb.0: 10962; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10963; GFX90A-NEXT: ;;#ASMSTART 10964; GFX90A-NEXT: ; def s[4:5] 10965; GFX90A-NEXT: ;;#ASMEND 10966; GFX90A-NEXT: ;;#ASMSTART 10967; GFX90A-NEXT: ; def s[6:7] 10968; GFX90A-NEXT: ;;#ASMEND 10969; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 10970; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 10971; GFX90A-NEXT: ;;#ASMSTART 10972; GFX90A-NEXT: ; use s[8:9] 10973; GFX90A-NEXT: ;;#ASMEND 10974; GFX90A-NEXT: s_setpc_b64 s[30:31] 10975; 10976; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_2: 10977; GFX940: ; %bb.0: 10978; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10979; GFX940-NEXT: ;;#ASMSTART 10980; GFX940-NEXT: ; def s[0:1] 10981; GFX940-NEXT: ;;#ASMEND 10982; GFX940-NEXT: ;;#ASMSTART 10983; GFX940-NEXT: ; def s[2:3] 10984; GFX940-NEXT: ;;#ASMEND 10985; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 10986; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 10987; GFX940-NEXT: ;;#ASMSTART 10988; GFX940-NEXT: ; use s[8:9] 10989; GFX940-NEXT: ;;#ASMEND 10990; GFX940-NEXT: s_setpc_b64 s[30:31] 10991 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10992 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10993 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10994 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 10995 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 2> 10996 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 10997 ret void 10998} 10999 11000define void @s_shuffle_v4i16_v3i16__5_5_u_2() { 11001; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_2: 11002; GFX900: ; %bb.0: 11003; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11004; GFX900-NEXT: ;;#ASMSTART 11005; GFX900-NEXT: ; def s[4:5] 11006; GFX900-NEXT: ;;#ASMEND 11007; GFX900-NEXT: ;;#ASMSTART 11008; GFX900-NEXT: ; def s[6:7] 11009; GFX900-NEXT: ;;#ASMEND 11010; GFX900-NEXT: s_lshl_b32 s9, s5, 16 11011; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11012; GFX900-NEXT: ;;#ASMSTART 11013; GFX900-NEXT: ; use s[8:9] 11014; GFX900-NEXT: ;;#ASMEND 11015; GFX900-NEXT: s_setpc_b64 s[30:31] 11016; 11017; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_2: 11018; GFX90A: ; %bb.0: 11019; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11020; GFX90A-NEXT: ;;#ASMSTART 11021; GFX90A-NEXT: ; def s[4:5] 11022; GFX90A-NEXT: ;;#ASMEND 11023; GFX90A-NEXT: ;;#ASMSTART 11024; GFX90A-NEXT: ; def s[6:7] 11025; GFX90A-NEXT: ;;#ASMEND 11026; GFX90A-NEXT: s_lshl_b32 s9, s5, 16 11027; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11028; GFX90A-NEXT: ;;#ASMSTART 11029; GFX90A-NEXT: ; use s[8:9] 11030; GFX90A-NEXT: ;;#ASMEND 11031; GFX90A-NEXT: s_setpc_b64 s[30:31] 11032; 11033; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_2: 11034; GFX940: ; %bb.0: 11035; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11036; GFX940-NEXT: ;;#ASMSTART 11037; GFX940-NEXT: ; def s[0:1] 11038; GFX940-NEXT: ;;#ASMEND 11039; GFX940-NEXT: ;;#ASMSTART 11040; GFX940-NEXT: ; def s[2:3] 11041; GFX940-NEXT: ;;#ASMEND 11042; GFX940-NEXT: s_lshl_b32 s9, s1, 16 11043; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 11044; GFX940-NEXT: ;;#ASMSTART 11045; GFX940-NEXT: ; use s[8:9] 11046; GFX940-NEXT: ;;#ASMEND 11047; GFX940-NEXT: s_setpc_b64 s[30:31] 11048 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11049 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11050 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11051 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11052 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 2> 11053 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11054 ret void 11055} 11056 11057define void @s_shuffle_v4i16_v3i16__5_5_0_2() { 11058; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_2: 11059; GFX900: ; %bb.0: 11060; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11061; GFX900-NEXT: ;;#ASMSTART 11062; GFX900-NEXT: ; def s[4:5] 11063; GFX900-NEXT: ;;#ASMEND 11064; GFX900-NEXT: ;;#ASMSTART 11065; GFX900-NEXT: ; def s[6:7] 11066; GFX900-NEXT: ;;#ASMEND 11067; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 11068; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11069; GFX900-NEXT: ;;#ASMSTART 11070; GFX900-NEXT: ; use s[8:9] 11071; GFX900-NEXT: ;;#ASMEND 11072; GFX900-NEXT: s_setpc_b64 s[30:31] 11073; 11074; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_2: 11075; GFX90A: ; %bb.0: 11076; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11077; GFX90A-NEXT: ;;#ASMSTART 11078; GFX90A-NEXT: ; def s[4:5] 11079; GFX90A-NEXT: ;;#ASMEND 11080; GFX90A-NEXT: ;;#ASMSTART 11081; GFX90A-NEXT: ; def s[6:7] 11082; GFX90A-NEXT: ;;#ASMEND 11083; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 11084; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11085; GFX90A-NEXT: ;;#ASMSTART 11086; GFX90A-NEXT: ; use s[8:9] 11087; GFX90A-NEXT: ;;#ASMEND 11088; GFX90A-NEXT: s_setpc_b64 s[30:31] 11089; 11090; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_2: 11091; GFX940: ; %bb.0: 11092; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11093; GFX940-NEXT: ;;#ASMSTART 11094; GFX940-NEXT: ; def s[0:1] 11095; GFX940-NEXT: ;;#ASMEND 11096; GFX940-NEXT: ;;#ASMSTART 11097; GFX940-NEXT: ; def s[2:3] 11098; GFX940-NEXT: ;;#ASMEND 11099; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 11100; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 11101; GFX940-NEXT: ;;#ASMSTART 11102; GFX940-NEXT: ; use s[8:9] 11103; GFX940-NEXT: ;;#ASMEND 11104; GFX940-NEXT: s_setpc_b64 s[30:31] 11105 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11106 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11107 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11108 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11109 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 2> 11110 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11111 ret void 11112} 11113 11114define void @s_shuffle_v4i16_v3i16__5_5_1_2() { 11115; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_2: 11116; GFX900: ; %bb.0: 11117; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11118; GFX900-NEXT: ;;#ASMSTART 11119; GFX900-NEXT: ; def s[4:5] 11120; GFX900-NEXT: ;;#ASMEND 11121; GFX900-NEXT: s_lshr_b32 s4, s4, 16 11122; GFX900-NEXT: ;;#ASMSTART 11123; GFX900-NEXT: ; def s[6:7] 11124; GFX900-NEXT: ;;#ASMEND 11125; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 11126; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11127; GFX900-NEXT: ;;#ASMSTART 11128; GFX900-NEXT: ; use s[8:9] 11129; GFX900-NEXT: ;;#ASMEND 11130; GFX900-NEXT: s_setpc_b64 s[30:31] 11131; 11132; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_2: 11133; GFX90A: ; %bb.0: 11134; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11135; GFX90A-NEXT: ;;#ASMSTART 11136; GFX90A-NEXT: ; def s[4:5] 11137; GFX90A-NEXT: ;;#ASMEND 11138; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 11139; GFX90A-NEXT: ;;#ASMSTART 11140; GFX90A-NEXT: ; def s[6:7] 11141; GFX90A-NEXT: ;;#ASMEND 11142; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 11143; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11144; GFX90A-NEXT: ;;#ASMSTART 11145; GFX90A-NEXT: ; use s[8:9] 11146; GFX90A-NEXT: ;;#ASMEND 11147; GFX90A-NEXT: s_setpc_b64 s[30:31] 11148; 11149; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_2: 11150; GFX940: ; %bb.0: 11151; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11152; GFX940-NEXT: ;;#ASMSTART 11153; GFX940-NEXT: ; def s[0:1] 11154; GFX940-NEXT: ;;#ASMEND 11155; GFX940-NEXT: s_lshr_b32 s0, s0, 16 11156; GFX940-NEXT: ;;#ASMSTART 11157; GFX940-NEXT: ; def s[2:3] 11158; GFX940-NEXT: ;;#ASMEND 11159; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 11160; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 11161; GFX940-NEXT: ;;#ASMSTART 11162; GFX940-NEXT: ; use s[8:9] 11163; GFX940-NEXT: ;;#ASMEND 11164; GFX940-NEXT: s_setpc_b64 s[30:31] 11165 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11166 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11167 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11168 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11169 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 2> 11170 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11171 ret void 11172} 11173 11174define void @s_shuffle_v4i16_v3i16__5_5_3_2() { 11175; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_2: 11176; GFX900: ; %bb.0: 11177; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11178; GFX900-NEXT: ;;#ASMSTART 11179; GFX900-NEXT: ; def s[4:5] 11180; GFX900-NEXT: ;;#ASMEND 11181; GFX900-NEXT: ;;#ASMSTART 11182; GFX900-NEXT: ; def s[6:7] 11183; GFX900-NEXT: ;;#ASMEND 11184; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s5 11185; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11186; GFX900-NEXT: ;;#ASMSTART 11187; GFX900-NEXT: ; use s[8:9] 11188; GFX900-NEXT: ;;#ASMEND 11189; GFX900-NEXT: s_setpc_b64 s[30:31] 11190; 11191; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_2: 11192; GFX90A: ; %bb.0: 11193; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11194; GFX90A-NEXT: ;;#ASMSTART 11195; GFX90A-NEXT: ; def s[4:5] 11196; GFX90A-NEXT: ;;#ASMEND 11197; GFX90A-NEXT: ;;#ASMSTART 11198; GFX90A-NEXT: ; def s[6:7] 11199; GFX90A-NEXT: ;;#ASMEND 11200; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s5 11201; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11202; GFX90A-NEXT: ;;#ASMSTART 11203; GFX90A-NEXT: ; use s[8:9] 11204; GFX90A-NEXT: ;;#ASMEND 11205; GFX90A-NEXT: s_setpc_b64 s[30:31] 11206; 11207; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_2: 11208; GFX940: ; %bb.0: 11209; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11210; GFX940-NEXT: ;;#ASMSTART 11211; GFX940-NEXT: ; def s[0:1] 11212; GFX940-NEXT: ;;#ASMEND 11213; GFX940-NEXT: ;;#ASMSTART 11214; GFX940-NEXT: ; def s[2:3] 11215; GFX940-NEXT: ;;#ASMEND 11216; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s1 11217; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 11218; GFX940-NEXT: ;;#ASMSTART 11219; GFX940-NEXT: ; use s[8:9] 11220; GFX940-NEXT: ;;#ASMEND 11221; GFX940-NEXT: s_setpc_b64 s[30:31] 11222 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11223 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11224 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11225 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11226 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 2> 11227 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11228 ret void 11229} 11230 11231define void @s_shuffle_v4i16_v3i16__5_5_4_2() { 11232; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_2: 11233; GFX900: ; %bb.0: 11234; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11235; GFX900-NEXT: ;;#ASMSTART 11236; GFX900-NEXT: ; def s[4:5] 11237; GFX900-NEXT: ;;#ASMEND 11238; GFX900-NEXT: ;;#ASMSTART 11239; GFX900-NEXT: ; def s[6:7] 11240; GFX900-NEXT: ;;#ASMEND 11241; GFX900-NEXT: s_lshr_b32 s4, s6, 16 11242; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 11243; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11244; GFX900-NEXT: ;;#ASMSTART 11245; GFX900-NEXT: ; use s[8:9] 11246; GFX900-NEXT: ;;#ASMEND 11247; GFX900-NEXT: s_setpc_b64 s[30:31] 11248; 11249; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_2: 11250; GFX90A: ; %bb.0: 11251; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11252; GFX90A-NEXT: ;;#ASMSTART 11253; GFX90A-NEXT: ; def s[4:5] 11254; GFX90A-NEXT: ;;#ASMEND 11255; GFX90A-NEXT: ;;#ASMSTART 11256; GFX90A-NEXT: ; def s[6:7] 11257; GFX90A-NEXT: ;;#ASMEND 11258; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 11259; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 11260; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11261; GFX90A-NEXT: ;;#ASMSTART 11262; GFX90A-NEXT: ; use s[8:9] 11263; GFX90A-NEXT: ;;#ASMEND 11264; GFX90A-NEXT: s_setpc_b64 s[30:31] 11265; 11266; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_2: 11267; GFX940: ; %bb.0: 11268; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11269; GFX940-NEXT: ;;#ASMSTART 11270; GFX940-NEXT: ; def s[0:1] 11271; GFX940-NEXT: ;;#ASMEND 11272; GFX940-NEXT: ;;#ASMSTART 11273; GFX940-NEXT: ; def s[2:3] 11274; GFX940-NEXT: ;;#ASMEND 11275; GFX940-NEXT: s_lshr_b32 s0, s2, 16 11276; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 11277; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 11278; GFX940-NEXT: ;;#ASMSTART 11279; GFX940-NEXT: ; use s[8:9] 11280; GFX940-NEXT: ;;#ASMEND 11281; GFX940-NEXT: s_setpc_b64 s[30:31] 11282 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11283 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11284 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11285 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11286 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 2> 11287 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11288 ret void 11289} 11290 11291define void @s_shuffle_v4i16_v3i16__u_3_3_3() { 11292; GFX9-LABEL: s_shuffle_v4i16_v3i16__u_3_3_3: 11293; GFX9: ; %bb.0: 11294; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11295; GFX9-NEXT: ;;#ASMSTART 11296; GFX9-NEXT: ; use s[8:9] 11297; GFX9-NEXT: ;;#ASMEND 11298; GFX9-NEXT: s_setpc_b64 s[30:31] 11299 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11300 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11301 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3> 11302 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11303 ret void 11304} 11305 11306define void @s_shuffle_v4i16_v3i16__0_3_3_3() { 11307; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_3_3_3: 11308; GFX900: ; %bb.0: 11309; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11310; GFX900-NEXT: ;;#ASMSTART 11311; GFX900-NEXT: ; def s[8:9] 11312; GFX900-NEXT: ;;#ASMEND 11313; GFX900-NEXT: ;;#ASMSTART 11314; GFX900-NEXT: ; use s[8:9] 11315; GFX900-NEXT: ;;#ASMEND 11316; GFX900-NEXT: s_setpc_b64 s[30:31] 11317; 11318; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_3_3_3: 11319; GFX90A: ; %bb.0: 11320; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11321; GFX90A-NEXT: ;;#ASMSTART 11322; GFX90A-NEXT: ; def s[8:9] 11323; GFX90A-NEXT: ;;#ASMEND 11324; GFX90A-NEXT: ;;#ASMSTART 11325; GFX90A-NEXT: ; use s[8:9] 11326; GFX90A-NEXT: ;;#ASMEND 11327; GFX90A-NEXT: s_setpc_b64 s[30:31] 11328; 11329; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_3_3_3: 11330; GFX940: ; %bb.0: 11331; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11332; GFX940-NEXT: ;;#ASMSTART 11333; GFX940-NEXT: ; def s[8:9] 11334; GFX940-NEXT: ;;#ASMEND 11335; GFX940-NEXT: s_nop 0 11336; GFX940-NEXT: ;;#ASMSTART 11337; GFX940-NEXT: ; use s[8:9] 11338; GFX940-NEXT: ;;#ASMEND 11339; GFX940-NEXT: s_setpc_b64 s[30:31] 11340 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11341 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11342 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3> 11343 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11344 ret void 11345} 11346 11347define void @s_shuffle_v4i16_v3i16__1_3_3_3() { 11348; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_3_3_3: 11349; GFX900: ; %bb.0: 11350; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11351; GFX900-NEXT: ;;#ASMSTART 11352; GFX900-NEXT: ; def s[4:5] 11353; GFX900-NEXT: ;;#ASMEND 11354; GFX900-NEXT: s_lshr_b32 s8, s4, 16 11355; GFX900-NEXT: ;;#ASMSTART 11356; GFX900-NEXT: ; use s[8:9] 11357; GFX900-NEXT: ;;#ASMEND 11358; GFX900-NEXT: s_setpc_b64 s[30:31] 11359; 11360; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_3_3_3: 11361; GFX90A: ; %bb.0: 11362; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11363; GFX90A-NEXT: ;;#ASMSTART 11364; GFX90A-NEXT: ; def s[4:5] 11365; GFX90A-NEXT: ;;#ASMEND 11366; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 11367; GFX90A-NEXT: ;;#ASMSTART 11368; GFX90A-NEXT: ; use s[8:9] 11369; GFX90A-NEXT: ;;#ASMEND 11370; GFX90A-NEXT: s_setpc_b64 s[30:31] 11371; 11372; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_3_3_3: 11373; GFX940: ; %bb.0: 11374; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11375; GFX940-NEXT: ;;#ASMSTART 11376; GFX940-NEXT: ; def s[0:1] 11377; GFX940-NEXT: ;;#ASMEND 11378; GFX940-NEXT: s_lshr_b32 s8, s0, 16 11379; GFX940-NEXT: ;;#ASMSTART 11380; GFX940-NEXT: ; use s[8:9] 11381; GFX940-NEXT: ;;#ASMEND 11382; GFX940-NEXT: s_setpc_b64 s[30:31] 11383 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11384 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11385 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3> 11386 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11387 ret void 11388} 11389 11390define void @s_shuffle_v4i16_v3i16__2_3_3_3() { 11391; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_3_3_3: 11392; GFX900: ; %bb.0: 11393; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11394; GFX900-NEXT: ;;#ASMSTART 11395; GFX900-NEXT: ; def s[4:5] 11396; GFX900-NEXT: ;;#ASMEND 11397; GFX900-NEXT: s_mov_b32 s8, s5 11398; GFX900-NEXT: ;;#ASMSTART 11399; GFX900-NEXT: ; use s[8:9] 11400; GFX900-NEXT: ;;#ASMEND 11401; GFX900-NEXT: s_setpc_b64 s[30:31] 11402; 11403; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_3_3_3: 11404; GFX90A: ; %bb.0: 11405; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11406; GFX90A-NEXT: ;;#ASMSTART 11407; GFX90A-NEXT: ; def s[4:5] 11408; GFX90A-NEXT: ;;#ASMEND 11409; GFX90A-NEXT: s_mov_b32 s8, s5 11410; GFX90A-NEXT: ;;#ASMSTART 11411; GFX90A-NEXT: ; use s[8:9] 11412; GFX90A-NEXT: ;;#ASMEND 11413; GFX90A-NEXT: s_setpc_b64 s[30:31] 11414; 11415; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_3_3_3: 11416; GFX940: ; %bb.0: 11417; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11418; GFX940-NEXT: ;;#ASMSTART 11419; GFX940-NEXT: ; def s[0:1] 11420; GFX940-NEXT: ;;#ASMEND 11421; GFX940-NEXT: s_mov_b32 s8, s1 11422; GFX940-NEXT: ;;#ASMSTART 11423; GFX940-NEXT: ; use s[8:9] 11424; GFX940-NEXT: ;;#ASMEND 11425; GFX940-NEXT: s_setpc_b64 s[30:31] 11426 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11427 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11428 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3> 11429 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11430 ret void 11431} 11432 11433define void @s_shuffle_v4i16_v3i16__3_3_3_3() { 11434; GFX9-LABEL: s_shuffle_v4i16_v3i16__3_3_3_3: 11435; GFX9: ; %bb.0: 11436; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11437; GFX9-NEXT: ;;#ASMSTART 11438; GFX9-NEXT: ; use s[8:9] 11439; GFX9-NEXT: ;;#ASMEND 11440; GFX9-NEXT: s_setpc_b64 s[30:31] 11441 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11442 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11443 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 11444 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11445 ret void 11446} 11447 11448define void @s_shuffle_v4i16_v3i16__4_3_3_3() { 11449; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_3_3_3: 11450; GFX900: ; %bb.0: 11451; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11452; GFX900-NEXT: ;;#ASMSTART 11453; GFX900-NEXT: ; def s[4:5] 11454; GFX900-NEXT: ;;#ASMEND 11455; GFX900-NEXT: s_lshr_b32 s5, s4, 16 11456; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11457; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 11458; GFX900-NEXT: ;;#ASMSTART 11459; GFX900-NEXT: ; use s[8:9] 11460; GFX900-NEXT: ;;#ASMEND 11461; GFX900-NEXT: s_setpc_b64 s[30:31] 11462; 11463; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_3_3_3: 11464; GFX90A: ; %bb.0: 11465; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11466; GFX90A-NEXT: ;;#ASMSTART 11467; GFX90A-NEXT: ; def s[4:5] 11468; GFX90A-NEXT: ;;#ASMEND 11469; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 11470; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11471; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 11472; GFX90A-NEXT: ;;#ASMSTART 11473; GFX90A-NEXT: ; use s[8:9] 11474; GFX90A-NEXT: ;;#ASMEND 11475; GFX90A-NEXT: s_setpc_b64 s[30:31] 11476; 11477; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_3_3_3: 11478; GFX940: ; %bb.0: 11479; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11480; GFX940-NEXT: ;;#ASMSTART 11481; GFX940-NEXT: ; def s[0:1] 11482; GFX940-NEXT: ;;#ASMEND 11483; GFX940-NEXT: s_lshr_b32 s1, s0, 16 11484; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 11485; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 11486; GFX940-NEXT: ;;#ASMSTART 11487; GFX940-NEXT: ; use s[8:9] 11488; GFX940-NEXT: ;;#ASMEND 11489; GFX940-NEXT: s_setpc_b64 s[30:31] 11490 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11491 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11492 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11493 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11494 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 3, i32 3, i32 3> 11495 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11496 ret void 11497} 11498 11499define void @s_shuffle_v4i16_v3i16__5_3_3_3() { 11500; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_3_3: 11501; GFX900: ; %bb.0: 11502; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11503; GFX900-NEXT: ;;#ASMSTART 11504; GFX900-NEXT: ; def s[4:5] 11505; GFX900-NEXT: ;;#ASMEND 11506; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11507; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 11508; GFX900-NEXT: ;;#ASMSTART 11509; GFX900-NEXT: ; use s[8:9] 11510; GFX900-NEXT: ;;#ASMEND 11511; GFX900-NEXT: s_setpc_b64 s[30:31] 11512; 11513; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_3_3: 11514; GFX90A: ; %bb.0: 11515; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11516; GFX90A-NEXT: ;;#ASMSTART 11517; GFX90A-NEXT: ; def s[4:5] 11518; GFX90A-NEXT: ;;#ASMEND 11519; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11520; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 11521; GFX90A-NEXT: ;;#ASMSTART 11522; GFX90A-NEXT: ; use s[8:9] 11523; GFX90A-NEXT: ;;#ASMEND 11524; GFX90A-NEXT: s_setpc_b64 s[30:31] 11525; 11526; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_3_3: 11527; GFX940: ; %bb.0: 11528; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11529; GFX940-NEXT: ;;#ASMSTART 11530; GFX940-NEXT: ; def s[0:1] 11531; GFX940-NEXT: ;;#ASMEND 11532; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 11533; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 11534; GFX940-NEXT: ;;#ASMSTART 11535; GFX940-NEXT: ; use s[8:9] 11536; GFX940-NEXT: ;;#ASMEND 11537; GFX940-NEXT: s_setpc_b64 s[30:31] 11538 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11539 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11540 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11541 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11542 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 3, i32 3> 11543 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11544 ret void 11545} 11546 11547define void @s_shuffle_v4i16_v3i16__5_u_3_3() { 11548; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_3_3: 11549; GFX900: ; %bb.0: 11550; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11551; GFX900-NEXT: ;;#ASMSTART 11552; GFX900-NEXT: ; def s[4:5] 11553; GFX900-NEXT: ;;#ASMEND 11554; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 11555; GFX900-NEXT: s_mov_b32 s8, s5 11556; GFX900-NEXT: ;;#ASMSTART 11557; GFX900-NEXT: ; use s[8:9] 11558; GFX900-NEXT: ;;#ASMEND 11559; GFX900-NEXT: s_setpc_b64 s[30:31] 11560; 11561; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_3_3: 11562; GFX90A: ; %bb.0: 11563; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11564; GFX90A-NEXT: ;;#ASMSTART 11565; GFX90A-NEXT: ; def s[4:5] 11566; GFX90A-NEXT: ;;#ASMEND 11567; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 11568; GFX90A-NEXT: s_mov_b32 s8, s5 11569; GFX90A-NEXT: ;;#ASMSTART 11570; GFX90A-NEXT: ; use s[8:9] 11571; GFX90A-NEXT: ;;#ASMEND 11572; GFX90A-NEXT: s_setpc_b64 s[30:31] 11573; 11574; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_3_3: 11575; GFX940: ; %bb.0: 11576; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11577; GFX940-NEXT: ;;#ASMSTART 11578; GFX940-NEXT: ; def s[0:1] 11579; GFX940-NEXT: ;;#ASMEND 11580; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 11581; GFX940-NEXT: s_mov_b32 s8, s1 11582; GFX940-NEXT: ;;#ASMSTART 11583; GFX940-NEXT: ; use s[8:9] 11584; GFX940-NEXT: ;;#ASMEND 11585; GFX940-NEXT: s_setpc_b64 s[30:31] 11586 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11587 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11588 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11589 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11590 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 3, i32 3> 11591 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11592 ret void 11593} 11594 11595define void @s_shuffle_v4i16_v3i16__5_0_3_3() { 11596; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_3_3: 11597; GFX900: ; %bb.0: 11598; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11599; GFX900-NEXT: ;;#ASMSTART 11600; GFX900-NEXT: ; def s[4:5] 11601; GFX900-NEXT: ;;#ASMEND 11602; GFX900-NEXT: ;;#ASMSTART 11603; GFX900-NEXT: ; def s[6:7] 11604; GFX900-NEXT: ;;#ASMEND 11605; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 11606; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s6 11607; GFX900-NEXT: ;;#ASMSTART 11608; GFX900-NEXT: ; use s[8:9] 11609; GFX900-NEXT: ;;#ASMEND 11610; GFX900-NEXT: s_setpc_b64 s[30:31] 11611; 11612; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_3_3: 11613; GFX90A: ; %bb.0: 11614; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11615; GFX90A-NEXT: ;;#ASMSTART 11616; GFX90A-NEXT: ; def s[4:5] 11617; GFX90A-NEXT: ;;#ASMEND 11618; GFX90A-NEXT: ;;#ASMSTART 11619; GFX90A-NEXT: ; def s[6:7] 11620; GFX90A-NEXT: ;;#ASMEND 11621; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 11622; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s6 11623; GFX90A-NEXT: ;;#ASMSTART 11624; GFX90A-NEXT: ; use s[8:9] 11625; GFX90A-NEXT: ;;#ASMEND 11626; GFX90A-NEXT: s_setpc_b64 s[30:31] 11627; 11628; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_3_3: 11629; GFX940: ; %bb.0: 11630; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11631; GFX940-NEXT: ;;#ASMSTART 11632; GFX940-NEXT: ; def s[0:1] 11633; GFX940-NEXT: ;;#ASMEND 11634; GFX940-NEXT: ;;#ASMSTART 11635; GFX940-NEXT: ; def s[2:3] 11636; GFX940-NEXT: ;;#ASMEND 11637; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 11638; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 11639; GFX940-NEXT: ;;#ASMSTART 11640; GFX940-NEXT: ; use s[8:9] 11641; GFX940-NEXT: ;;#ASMEND 11642; GFX940-NEXT: s_setpc_b64 s[30:31] 11643 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11644 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11645 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11646 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11647 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 3, i32 3> 11648 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11649 ret void 11650} 11651 11652define void @s_shuffle_v4i16_v3i16__5_1_3_3() { 11653; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_3_3: 11654; GFX900: ; %bb.0: 11655; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11656; GFX900-NEXT: ;;#ASMSTART 11657; GFX900-NEXT: ; def s[4:5] 11658; GFX900-NEXT: ;;#ASMEND 11659; GFX900-NEXT: ;;#ASMSTART 11660; GFX900-NEXT: ; def s[6:7] 11661; GFX900-NEXT: ;;#ASMEND 11662; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 11663; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s6 11664; GFX900-NEXT: ;;#ASMSTART 11665; GFX900-NEXT: ; use s[8:9] 11666; GFX900-NEXT: ;;#ASMEND 11667; GFX900-NEXT: s_setpc_b64 s[30:31] 11668; 11669; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_3_3: 11670; GFX90A: ; %bb.0: 11671; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11672; GFX90A-NEXT: ;;#ASMSTART 11673; GFX90A-NEXT: ; def s[4:5] 11674; GFX90A-NEXT: ;;#ASMEND 11675; GFX90A-NEXT: ;;#ASMSTART 11676; GFX90A-NEXT: ; def s[6:7] 11677; GFX90A-NEXT: ;;#ASMEND 11678; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 11679; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s6 11680; GFX90A-NEXT: ;;#ASMSTART 11681; GFX90A-NEXT: ; use s[8:9] 11682; GFX90A-NEXT: ;;#ASMEND 11683; GFX90A-NEXT: s_setpc_b64 s[30:31] 11684; 11685; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_3_3: 11686; GFX940: ; %bb.0: 11687; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11688; GFX940-NEXT: ;;#ASMSTART 11689; GFX940-NEXT: ; def s[0:1] 11690; GFX940-NEXT: ;;#ASMEND 11691; GFX940-NEXT: ;;#ASMSTART 11692; GFX940-NEXT: ; def s[2:3] 11693; GFX940-NEXT: ;;#ASMEND 11694; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 11695; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 11696; GFX940-NEXT: ;;#ASMSTART 11697; GFX940-NEXT: ; use s[8:9] 11698; GFX940-NEXT: ;;#ASMEND 11699; GFX940-NEXT: s_setpc_b64 s[30:31] 11700 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11701 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11702 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11703 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11704 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 3, i32 3> 11705 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11706 ret void 11707} 11708 11709define void @s_shuffle_v4i16_v3i16__5_2_3_3() { 11710; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_3_3: 11711; GFX900: ; %bb.0: 11712; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11713; GFX900-NEXT: ;;#ASMSTART 11714; GFX900-NEXT: ; def s[4:5] 11715; GFX900-NEXT: ;;#ASMEND 11716; GFX900-NEXT: ;;#ASMSTART 11717; GFX900-NEXT: ; def s[6:7] 11718; GFX900-NEXT: ;;#ASMEND 11719; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 11720; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s6 11721; GFX900-NEXT: ;;#ASMSTART 11722; GFX900-NEXT: ; use s[8:9] 11723; GFX900-NEXT: ;;#ASMEND 11724; GFX900-NEXT: s_setpc_b64 s[30:31] 11725; 11726; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_3_3: 11727; GFX90A: ; %bb.0: 11728; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11729; GFX90A-NEXT: ;;#ASMSTART 11730; GFX90A-NEXT: ; def s[4:5] 11731; GFX90A-NEXT: ;;#ASMEND 11732; GFX90A-NEXT: ;;#ASMSTART 11733; GFX90A-NEXT: ; def s[6:7] 11734; GFX90A-NEXT: ;;#ASMEND 11735; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 11736; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s6 11737; GFX90A-NEXT: ;;#ASMSTART 11738; GFX90A-NEXT: ; use s[8:9] 11739; GFX90A-NEXT: ;;#ASMEND 11740; GFX90A-NEXT: s_setpc_b64 s[30:31] 11741; 11742; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_3_3: 11743; GFX940: ; %bb.0: 11744; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11745; GFX940-NEXT: ;;#ASMSTART 11746; GFX940-NEXT: ; def s[0:1] 11747; GFX940-NEXT: ;;#ASMEND 11748; GFX940-NEXT: ;;#ASMSTART 11749; GFX940-NEXT: ; def s[2:3] 11750; GFX940-NEXT: ;;#ASMEND 11751; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 11752; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 11753; GFX940-NEXT: ;;#ASMSTART 11754; GFX940-NEXT: ; use s[8:9] 11755; GFX940-NEXT: ;;#ASMEND 11756; GFX940-NEXT: s_setpc_b64 s[30:31] 11757 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11758 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11759 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11760 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11761 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 3, i32 3> 11762 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11763 ret void 11764} 11765 11766define void @s_shuffle_v4i16_v3i16__5_4_3_3() { 11767; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_3_3: 11768; GFX900: ; %bb.0: 11769; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11770; GFX900-NEXT: ;;#ASMSTART 11771; GFX900-NEXT: ; def s[4:5] 11772; GFX900-NEXT: ;;#ASMEND 11773; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 11774; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 11775; GFX900-NEXT: ;;#ASMSTART 11776; GFX900-NEXT: ; use s[8:9] 11777; GFX900-NEXT: ;;#ASMEND 11778; GFX900-NEXT: s_setpc_b64 s[30:31] 11779; 11780; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_3_3: 11781; GFX90A: ; %bb.0: 11782; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11783; GFX90A-NEXT: ;;#ASMSTART 11784; GFX90A-NEXT: ; def s[4:5] 11785; GFX90A-NEXT: ;;#ASMEND 11786; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 11787; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 11788; GFX90A-NEXT: ;;#ASMSTART 11789; GFX90A-NEXT: ; use s[8:9] 11790; GFX90A-NEXT: ;;#ASMEND 11791; GFX90A-NEXT: s_setpc_b64 s[30:31] 11792; 11793; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_3_3: 11794; GFX940: ; %bb.0: 11795; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11796; GFX940-NEXT: ;;#ASMSTART 11797; GFX940-NEXT: ; def s[0:1] 11798; GFX940-NEXT: ;;#ASMEND 11799; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 11800; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 11801; GFX940-NEXT: ;;#ASMSTART 11802; GFX940-NEXT: ; use s[8:9] 11803; GFX940-NEXT: ;;#ASMEND 11804; GFX940-NEXT: s_setpc_b64 s[30:31] 11805 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11806 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11807 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11808 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11809 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 3, i32 3> 11810 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11811 ret void 11812} 11813 11814define void @s_shuffle_v4i16_v3i16__5_5_3_3() { 11815; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_3: 11816; GFX900: ; %bb.0: 11817; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11818; GFX900-NEXT: ;;#ASMSTART 11819; GFX900-NEXT: ; def s[4:5] 11820; GFX900-NEXT: ;;#ASMEND 11821; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 11822; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 11823; GFX900-NEXT: ;;#ASMSTART 11824; GFX900-NEXT: ; use s[8:9] 11825; GFX900-NEXT: ;;#ASMEND 11826; GFX900-NEXT: s_setpc_b64 s[30:31] 11827; 11828; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_3: 11829; GFX90A: ; %bb.0: 11830; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11831; GFX90A-NEXT: ;;#ASMSTART 11832; GFX90A-NEXT: ; def s[4:5] 11833; GFX90A-NEXT: ;;#ASMEND 11834; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 11835; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 11836; GFX90A-NEXT: ;;#ASMSTART 11837; GFX90A-NEXT: ; use s[8:9] 11838; GFX90A-NEXT: ;;#ASMEND 11839; GFX90A-NEXT: s_setpc_b64 s[30:31] 11840; 11841; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_3: 11842; GFX940: ; %bb.0: 11843; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11844; GFX940-NEXT: ;;#ASMSTART 11845; GFX940-NEXT: ; def s[0:1] 11846; GFX940-NEXT: ;;#ASMEND 11847; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 11848; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 11849; GFX940-NEXT: ;;#ASMSTART 11850; GFX940-NEXT: ; use s[8:9] 11851; GFX940-NEXT: ;;#ASMEND 11852; GFX940-NEXT: s_setpc_b64 s[30:31] 11853 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11854 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11855 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11856 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11857 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 3> 11858 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11859 ret void 11860} 11861 11862define void @s_shuffle_v4i16_v3i16__5_5_u_3() { 11863; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_3: 11864; GFX900: ; %bb.0: 11865; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11866; GFX900-NEXT: ;;#ASMSTART 11867; GFX900-NEXT: ; def s[4:5] 11868; GFX900-NEXT: ;;#ASMEND 11869; GFX900-NEXT: s_lshl_b32 s9, s4, 16 11870; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 11871; GFX900-NEXT: ;;#ASMSTART 11872; GFX900-NEXT: ; use s[8:9] 11873; GFX900-NEXT: ;;#ASMEND 11874; GFX900-NEXT: s_setpc_b64 s[30:31] 11875; 11876; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_3: 11877; GFX90A: ; %bb.0: 11878; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11879; GFX90A-NEXT: ;;#ASMSTART 11880; GFX90A-NEXT: ; def s[4:5] 11881; GFX90A-NEXT: ;;#ASMEND 11882; GFX90A-NEXT: s_lshl_b32 s9, s4, 16 11883; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 11884; GFX90A-NEXT: ;;#ASMSTART 11885; GFX90A-NEXT: ; use s[8:9] 11886; GFX90A-NEXT: ;;#ASMEND 11887; GFX90A-NEXT: s_setpc_b64 s[30:31] 11888; 11889; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_3: 11890; GFX940: ; %bb.0: 11891; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11892; GFX940-NEXT: ;;#ASMSTART 11893; GFX940-NEXT: ; def s[0:1] 11894; GFX940-NEXT: ;;#ASMEND 11895; GFX940-NEXT: s_lshl_b32 s9, s0, 16 11896; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 11897; GFX940-NEXT: ;;#ASMSTART 11898; GFX940-NEXT: ; use s[8:9] 11899; GFX940-NEXT: ;;#ASMEND 11900; GFX940-NEXT: s_setpc_b64 s[30:31] 11901 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11902 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11903 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11904 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11905 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 3> 11906 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11907 ret void 11908} 11909 11910define void @s_shuffle_v4i16_v3i16__5_5_0_3() { 11911; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_3: 11912; GFX900: ; %bb.0: 11913; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11914; GFX900-NEXT: ;;#ASMSTART 11915; GFX900-NEXT: ; def s[4:5] 11916; GFX900-NEXT: ;;#ASMEND 11917; GFX900-NEXT: ;;#ASMSTART 11918; GFX900-NEXT: ; def s[6:7] 11919; GFX900-NEXT: ;;#ASMEND 11920; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s6 11921; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11922; GFX900-NEXT: ;;#ASMSTART 11923; GFX900-NEXT: ; use s[8:9] 11924; GFX900-NEXT: ;;#ASMEND 11925; GFX900-NEXT: s_setpc_b64 s[30:31] 11926; 11927; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_3: 11928; GFX90A: ; %bb.0: 11929; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11930; GFX90A-NEXT: ;;#ASMSTART 11931; GFX90A-NEXT: ; def s[4:5] 11932; GFX90A-NEXT: ;;#ASMEND 11933; GFX90A-NEXT: ;;#ASMSTART 11934; GFX90A-NEXT: ; def s[6:7] 11935; GFX90A-NEXT: ;;#ASMEND 11936; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s6 11937; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11938; GFX90A-NEXT: ;;#ASMSTART 11939; GFX90A-NEXT: ; use s[8:9] 11940; GFX90A-NEXT: ;;#ASMEND 11941; GFX90A-NEXT: s_setpc_b64 s[30:31] 11942; 11943; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_3: 11944; GFX940: ; %bb.0: 11945; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11946; GFX940-NEXT: ;;#ASMSTART 11947; GFX940-NEXT: ; def s[0:1] 11948; GFX940-NEXT: ;;#ASMEND 11949; GFX940-NEXT: ;;#ASMSTART 11950; GFX940-NEXT: ; def s[2:3] 11951; GFX940-NEXT: ;;#ASMEND 11952; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 11953; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 11954; GFX940-NEXT: ;;#ASMSTART 11955; GFX940-NEXT: ; use s[8:9] 11956; GFX940-NEXT: ;;#ASMEND 11957; GFX940-NEXT: s_setpc_b64 s[30:31] 11958 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11959 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11960 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11961 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 11962 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 3> 11963 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 11964 ret void 11965} 11966 11967define void @s_shuffle_v4i16_v3i16__5_5_1_3() { 11968; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_3: 11969; GFX900: ; %bb.0: 11970; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11971; GFX900-NEXT: ;;#ASMSTART 11972; GFX900-NEXT: ; def s[4:5] 11973; GFX900-NEXT: ;;#ASMEND 11974; GFX900-NEXT: s_lshr_b32 s4, s4, 16 11975; GFX900-NEXT: ;;#ASMSTART 11976; GFX900-NEXT: ; def s[6:7] 11977; GFX900-NEXT: ;;#ASMEND 11978; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s6 11979; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11980; GFX900-NEXT: ;;#ASMSTART 11981; GFX900-NEXT: ; use s[8:9] 11982; GFX900-NEXT: ;;#ASMEND 11983; GFX900-NEXT: s_setpc_b64 s[30:31] 11984; 11985; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_3: 11986; GFX90A: ; %bb.0: 11987; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11988; GFX90A-NEXT: ;;#ASMSTART 11989; GFX90A-NEXT: ; def s[4:5] 11990; GFX90A-NEXT: ;;#ASMEND 11991; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 11992; GFX90A-NEXT: ;;#ASMSTART 11993; GFX90A-NEXT: ; def s[6:7] 11994; GFX90A-NEXT: ;;#ASMEND 11995; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s6 11996; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 11997; GFX90A-NEXT: ;;#ASMSTART 11998; GFX90A-NEXT: ; use s[8:9] 11999; GFX90A-NEXT: ;;#ASMEND 12000; GFX90A-NEXT: s_setpc_b64 s[30:31] 12001; 12002; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_3: 12003; GFX940: ; %bb.0: 12004; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12005; GFX940-NEXT: ;;#ASMSTART 12006; GFX940-NEXT: ; def s[0:1] 12007; GFX940-NEXT: ;;#ASMEND 12008; GFX940-NEXT: s_lshr_b32 s0, s0, 16 12009; GFX940-NEXT: ;;#ASMSTART 12010; GFX940-NEXT: ; def s[2:3] 12011; GFX940-NEXT: ;;#ASMEND 12012; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 12013; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 12014; GFX940-NEXT: ;;#ASMSTART 12015; GFX940-NEXT: ; use s[8:9] 12016; GFX940-NEXT: ;;#ASMEND 12017; GFX940-NEXT: s_setpc_b64 s[30:31] 12018 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12019 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12020 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12021 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12022 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 3> 12023 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12024 ret void 12025} 12026 12027define void @s_shuffle_v4i16_v3i16__5_5_2_3() { 12028; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_3: 12029; GFX900: ; %bb.0: 12030; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12031; GFX900-NEXT: ;;#ASMSTART 12032; GFX900-NEXT: ; def s[4:5] 12033; GFX900-NEXT: ;;#ASMEND 12034; GFX900-NEXT: ;;#ASMSTART 12035; GFX900-NEXT: ; def s[6:7] 12036; GFX900-NEXT: ;;#ASMEND 12037; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s6 12038; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 12039; GFX900-NEXT: ;;#ASMSTART 12040; GFX900-NEXT: ; use s[8:9] 12041; GFX900-NEXT: ;;#ASMEND 12042; GFX900-NEXT: s_setpc_b64 s[30:31] 12043; 12044; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_3: 12045; GFX90A: ; %bb.0: 12046; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12047; GFX90A-NEXT: ;;#ASMSTART 12048; GFX90A-NEXT: ; def s[4:5] 12049; GFX90A-NEXT: ;;#ASMEND 12050; GFX90A-NEXT: ;;#ASMSTART 12051; GFX90A-NEXT: ; def s[6:7] 12052; GFX90A-NEXT: ;;#ASMEND 12053; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s6 12054; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 12055; GFX90A-NEXT: ;;#ASMSTART 12056; GFX90A-NEXT: ; use s[8:9] 12057; GFX90A-NEXT: ;;#ASMEND 12058; GFX90A-NEXT: s_setpc_b64 s[30:31] 12059; 12060; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_3: 12061; GFX940: ; %bb.0: 12062; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12063; GFX940-NEXT: ;;#ASMSTART 12064; GFX940-NEXT: ; def s[0:1] 12065; GFX940-NEXT: ;;#ASMEND 12066; GFX940-NEXT: ;;#ASMSTART 12067; GFX940-NEXT: ; def s[2:3] 12068; GFX940-NEXT: ;;#ASMEND 12069; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s2 12070; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 12071; GFX940-NEXT: ;;#ASMSTART 12072; GFX940-NEXT: ; use s[8:9] 12073; GFX940-NEXT: ;;#ASMEND 12074; GFX940-NEXT: s_setpc_b64 s[30:31] 12075 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12076 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12077 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12078 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12079 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 3> 12080 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12081 ret void 12082} 12083 12084define void @s_shuffle_v4i16_v3i16__5_5_4_3() { 12085; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_3: 12086; GFX900: ; %bb.0: 12087; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12088; GFX900-NEXT: ;;#ASMSTART 12089; GFX900-NEXT: ; def s[4:5] 12090; GFX900-NEXT: ;;#ASMEND 12091; GFX900-NEXT: s_lshr_b32 s6, s4, 16 12092; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s4 12093; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 12094; GFX900-NEXT: ;;#ASMSTART 12095; GFX900-NEXT: ; use s[8:9] 12096; GFX900-NEXT: ;;#ASMEND 12097; GFX900-NEXT: s_setpc_b64 s[30:31] 12098; 12099; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_3: 12100; GFX90A: ; %bb.0: 12101; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12102; GFX90A-NEXT: ;;#ASMSTART 12103; GFX90A-NEXT: ; def s[4:5] 12104; GFX90A-NEXT: ;;#ASMEND 12105; GFX90A-NEXT: s_lshr_b32 s6, s4, 16 12106; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s4 12107; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 12108; GFX90A-NEXT: ;;#ASMSTART 12109; GFX90A-NEXT: ; use s[8:9] 12110; GFX90A-NEXT: ;;#ASMEND 12111; GFX90A-NEXT: s_setpc_b64 s[30:31] 12112; 12113; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_3: 12114; GFX940: ; %bb.0: 12115; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12116; GFX940-NEXT: ;;#ASMSTART 12117; GFX940-NEXT: ; def s[0:1] 12118; GFX940-NEXT: ;;#ASMEND 12119; GFX940-NEXT: s_lshr_b32 s2, s0, 16 12120; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 12121; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 12122; GFX940-NEXT: ;;#ASMSTART 12123; GFX940-NEXT: ; use s[8:9] 12124; GFX940-NEXT: ;;#ASMEND 12125; GFX940-NEXT: s_setpc_b64 s[30:31] 12126 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12127 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12128 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12129 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12130 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 3> 12131 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12132 ret void 12133} 12134 12135define void @s_shuffle_v4i16_v3i16__u_4_4_4() { 12136; GFX9-LABEL: s_shuffle_v4i16_v3i16__u_4_4_4: 12137; GFX9: ; %bb.0: 12138; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12139; GFX9-NEXT: ;;#ASMSTART 12140; GFX9-NEXT: ; def s[8:9] 12141; GFX9-NEXT: ;;#ASMEND 12142; GFX9-NEXT: s_pack_hh_b32_b16 s9, s8, s8 12143; GFX9-NEXT: ;;#ASMSTART 12144; GFX9-NEXT: ; use s[8:9] 12145; GFX9-NEXT: ;;#ASMEND 12146; GFX9-NEXT: s_setpc_b64 s[30:31] 12147 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12148 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12149 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12150 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12151 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 poison, i32 4, i32 4, i32 4> 12152 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12153 ret void 12154} 12155 12156define void @s_shuffle_v4i16_v3i16__0_4_4_4() { 12157; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_4_4_4: 12158; GFX900: ; %bb.0: 12159; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12160; GFX900-NEXT: ;;#ASMSTART 12161; GFX900-NEXT: ; def s[4:5] 12162; GFX900-NEXT: ;;#ASMEND 12163; GFX900-NEXT: ;;#ASMSTART 12164; GFX900-NEXT: ; def s[6:7] 12165; GFX900-NEXT: ;;#ASMEND 12166; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s6 12167; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12168; GFX900-NEXT: ;;#ASMSTART 12169; GFX900-NEXT: ; use s[8:9] 12170; GFX900-NEXT: ;;#ASMEND 12171; GFX900-NEXT: s_setpc_b64 s[30:31] 12172; 12173; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_4_4_4: 12174; GFX90A: ; %bb.0: 12175; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12176; GFX90A-NEXT: ;;#ASMSTART 12177; GFX90A-NEXT: ; def s[4:5] 12178; GFX90A-NEXT: ;;#ASMEND 12179; GFX90A-NEXT: ;;#ASMSTART 12180; GFX90A-NEXT: ; def s[6:7] 12181; GFX90A-NEXT: ;;#ASMEND 12182; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s6 12183; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12184; GFX90A-NEXT: ;;#ASMSTART 12185; GFX90A-NEXT: ; use s[8:9] 12186; GFX90A-NEXT: ;;#ASMEND 12187; GFX90A-NEXT: s_setpc_b64 s[30:31] 12188; 12189; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_4_4_4: 12190; GFX940: ; %bb.0: 12191; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12192; GFX940-NEXT: ;;#ASMSTART 12193; GFX940-NEXT: ; def s[0:1] 12194; GFX940-NEXT: ;;#ASMEND 12195; GFX940-NEXT: ;;#ASMSTART 12196; GFX940-NEXT: ; def s[2:3] 12197; GFX940-NEXT: ;;#ASMEND 12198; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 12199; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 12200; GFX940-NEXT: ;;#ASMSTART 12201; GFX940-NEXT: ; use s[8:9] 12202; GFX940-NEXT: ;;#ASMEND 12203; GFX940-NEXT: s_setpc_b64 s[30:31] 12204 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12205 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12206 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12207 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12208 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 12209 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12210 ret void 12211} 12212 12213define void @s_shuffle_v4i16_v3i16__1_4_4_4() { 12214; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_4_4_4: 12215; GFX900: ; %bb.0: 12216; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12217; GFX900-NEXT: ;;#ASMSTART 12218; GFX900-NEXT: ; def s[4:5] 12219; GFX900-NEXT: ;;#ASMEND 12220; GFX900-NEXT: ;;#ASMSTART 12221; GFX900-NEXT: ; def s[6:7] 12222; GFX900-NEXT: ;;#ASMEND 12223; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s6 12224; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12225; GFX900-NEXT: ;;#ASMSTART 12226; GFX900-NEXT: ; use s[8:9] 12227; GFX900-NEXT: ;;#ASMEND 12228; GFX900-NEXT: s_setpc_b64 s[30:31] 12229; 12230; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_4_4_4: 12231; GFX90A: ; %bb.0: 12232; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12233; GFX90A-NEXT: ;;#ASMSTART 12234; GFX90A-NEXT: ; def s[4:5] 12235; GFX90A-NEXT: ;;#ASMEND 12236; GFX90A-NEXT: ;;#ASMSTART 12237; GFX90A-NEXT: ; def s[6:7] 12238; GFX90A-NEXT: ;;#ASMEND 12239; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s6 12240; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12241; GFX90A-NEXT: ;;#ASMSTART 12242; GFX90A-NEXT: ; use s[8:9] 12243; GFX90A-NEXT: ;;#ASMEND 12244; GFX90A-NEXT: s_setpc_b64 s[30:31] 12245; 12246; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_4_4_4: 12247; GFX940: ; %bb.0: 12248; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12249; GFX940-NEXT: ;;#ASMSTART 12250; GFX940-NEXT: ; def s[0:1] 12251; GFX940-NEXT: ;;#ASMEND 12252; GFX940-NEXT: ;;#ASMSTART 12253; GFX940-NEXT: ; def s[2:3] 12254; GFX940-NEXT: ;;#ASMEND 12255; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 12256; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 12257; GFX940-NEXT: ;;#ASMSTART 12258; GFX940-NEXT: ; use s[8:9] 12259; GFX940-NEXT: ;;#ASMEND 12260; GFX940-NEXT: s_setpc_b64 s[30:31] 12261 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12262 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12263 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12264 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12265 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 1, i32 4, i32 4, i32 4> 12266 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12267 ret void 12268} 12269 12270define void @s_shuffle_v4i16_v3i16__2_4_4_4() { 12271; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_4_4_4: 12272; GFX900: ; %bb.0: 12273; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12274; GFX900-NEXT: ;;#ASMSTART 12275; GFX900-NEXT: ; def s[4:5] 12276; GFX900-NEXT: ;;#ASMEND 12277; GFX900-NEXT: ;;#ASMSTART 12278; GFX900-NEXT: ; def s[6:7] 12279; GFX900-NEXT: ;;#ASMEND 12280; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s6 12281; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12282; GFX900-NEXT: ;;#ASMSTART 12283; GFX900-NEXT: ; use s[8:9] 12284; GFX900-NEXT: ;;#ASMEND 12285; GFX900-NEXT: s_setpc_b64 s[30:31] 12286; 12287; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_4_4_4: 12288; GFX90A: ; %bb.0: 12289; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12290; GFX90A-NEXT: ;;#ASMSTART 12291; GFX90A-NEXT: ; def s[4:5] 12292; GFX90A-NEXT: ;;#ASMEND 12293; GFX90A-NEXT: ;;#ASMSTART 12294; GFX90A-NEXT: ; def s[6:7] 12295; GFX90A-NEXT: ;;#ASMEND 12296; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s6 12297; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12298; GFX90A-NEXT: ;;#ASMSTART 12299; GFX90A-NEXT: ; use s[8:9] 12300; GFX90A-NEXT: ;;#ASMEND 12301; GFX90A-NEXT: s_setpc_b64 s[30:31] 12302; 12303; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_4_4_4: 12304; GFX940: ; %bb.0: 12305; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12306; GFX940-NEXT: ;;#ASMSTART 12307; GFX940-NEXT: ; def s[0:1] 12308; GFX940-NEXT: ;;#ASMEND 12309; GFX940-NEXT: ;;#ASMSTART 12310; GFX940-NEXT: ; def s[2:3] 12311; GFX940-NEXT: ;;#ASMEND 12312; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 12313; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 12314; GFX940-NEXT: ;;#ASMSTART 12315; GFX940-NEXT: ; use s[8:9] 12316; GFX940-NEXT: ;;#ASMEND 12317; GFX940-NEXT: s_setpc_b64 s[30:31] 12318 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12319 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12320 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12321 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12322 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 2, i32 4, i32 4, i32 4> 12323 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12324 ret void 12325} 12326 12327define void @s_shuffle_v4i16_v3i16__3_4_4_4() { 12328; GFX9-LABEL: s_shuffle_v4i16_v3i16__3_4_4_4: 12329; GFX9: ; %bb.0: 12330; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12331; GFX9-NEXT: ;;#ASMSTART 12332; GFX9-NEXT: ; def s[8:9] 12333; GFX9-NEXT: ;;#ASMEND 12334; GFX9-NEXT: s_pack_hh_b32_b16 s9, s8, s8 12335; GFX9-NEXT: ;;#ASMSTART 12336; GFX9-NEXT: ; use s[8:9] 12337; GFX9-NEXT: ;;#ASMEND 12338; GFX9-NEXT: s_setpc_b64 s[30:31] 12339 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12340 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12341 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12342 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12343 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 3, i32 4, i32 4, i32 4> 12344 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12345 ret void 12346} 12347 12348define void @s_shuffle_v4i16_v3i16__4_4_4_4() { 12349; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_4_4_4: 12350; GFX900: ; %bb.0: 12351; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12352; GFX900-NEXT: ;;#ASMSTART 12353; GFX900-NEXT: ; def s[4:5] 12354; GFX900-NEXT: ;;#ASMEND 12355; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4 12356; GFX900-NEXT: s_mov_b32 s9, s8 12357; GFX900-NEXT: ;;#ASMSTART 12358; GFX900-NEXT: ; use s[8:9] 12359; GFX900-NEXT: ;;#ASMEND 12360; GFX900-NEXT: s_setpc_b64 s[30:31] 12361; 12362; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_4_4_4: 12363; GFX90A: ; %bb.0: 12364; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12365; GFX90A-NEXT: ;;#ASMSTART 12366; GFX90A-NEXT: ; def s[4:5] 12367; GFX90A-NEXT: ;;#ASMEND 12368; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4 12369; GFX90A-NEXT: s_mov_b32 s9, s8 12370; GFX90A-NEXT: ;;#ASMSTART 12371; GFX90A-NEXT: ; use s[8:9] 12372; GFX90A-NEXT: ;;#ASMEND 12373; GFX90A-NEXT: s_setpc_b64 s[30:31] 12374; 12375; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_4_4_4: 12376; GFX940: ; %bb.0: 12377; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12378; GFX940-NEXT: ;;#ASMSTART 12379; GFX940-NEXT: ; def s[0:1] 12380; GFX940-NEXT: ;;#ASMEND 12381; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 12382; GFX940-NEXT: s_mov_b32 s9, s8 12383; GFX940-NEXT: ;;#ASMSTART 12384; GFX940-NEXT: ; use s[8:9] 12385; GFX940-NEXT: ;;#ASMEND 12386; GFX940-NEXT: s_setpc_b64 s[30:31] 12387 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12388 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12389 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12390 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12391 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 4, i32 4, i32 4> 12392 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12393 ret void 12394} 12395 12396define void @s_shuffle_v4i16_v3i16__5_4_4_4() { 12397; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_4_4: 12398; GFX900: ; %bb.0: 12399; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12400; GFX900-NEXT: ;;#ASMSTART 12401; GFX900-NEXT: ; def s[4:5] 12402; GFX900-NEXT: ;;#ASMEND 12403; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 12404; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 12405; GFX900-NEXT: ;;#ASMSTART 12406; GFX900-NEXT: ; use s[8:9] 12407; GFX900-NEXT: ;;#ASMEND 12408; GFX900-NEXT: s_setpc_b64 s[30:31] 12409; 12410; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_4_4: 12411; GFX90A: ; %bb.0: 12412; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12413; GFX90A-NEXT: ;;#ASMSTART 12414; GFX90A-NEXT: ; def s[4:5] 12415; GFX90A-NEXT: ;;#ASMEND 12416; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 12417; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 12418; GFX90A-NEXT: ;;#ASMSTART 12419; GFX90A-NEXT: ; use s[8:9] 12420; GFX90A-NEXT: ;;#ASMEND 12421; GFX90A-NEXT: s_setpc_b64 s[30:31] 12422; 12423; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_4_4: 12424; GFX940: ; %bb.0: 12425; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12426; GFX940-NEXT: ;;#ASMSTART 12427; GFX940-NEXT: ; def s[0:1] 12428; GFX940-NEXT: ;;#ASMEND 12429; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 12430; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 12431; GFX940-NEXT: ;;#ASMSTART 12432; GFX940-NEXT: ; use s[8:9] 12433; GFX940-NEXT: ;;#ASMEND 12434; GFX940-NEXT: s_setpc_b64 s[30:31] 12435 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12436 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12437 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12438 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12439 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 4, i32 4> 12440 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12441 ret void 12442} 12443 12444define void @s_shuffle_v4i16_v3i16__5_u_4_4() { 12445; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_4_4: 12446; GFX900: ; %bb.0: 12447; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12448; GFX900-NEXT: ;;#ASMSTART 12449; GFX900-NEXT: ; def s[4:5] 12450; GFX900-NEXT: ;;#ASMEND 12451; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 12452; GFX900-NEXT: s_mov_b32 s8, s5 12453; GFX900-NEXT: ;;#ASMSTART 12454; GFX900-NEXT: ; use s[8:9] 12455; GFX900-NEXT: ;;#ASMEND 12456; GFX900-NEXT: s_setpc_b64 s[30:31] 12457; 12458; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_4_4: 12459; GFX90A: ; %bb.0: 12460; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12461; GFX90A-NEXT: ;;#ASMSTART 12462; GFX90A-NEXT: ; def s[4:5] 12463; GFX90A-NEXT: ;;#ASMEND 12464; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 12465; GFX90A-NEXT: s_mov_b32 s8, s5 12466; GFX90A-NEXT: ;;#ASMSTART 12467; GFX90A-NEXT: ; use s[8:9] 12468; GFX90A-NEXT: ;;#ASMEND 12469; GFX90A-NEXT: s_setpc_b64 s[30:31] 12470; 12471; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_4_4: 12472; GFX940: ; %bb.0: 12473; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12474; GFX940-NEXT: ;;#ASMSTART 12475; GFX940-NEXT: ; def s[0:1] 12476; GFX940-NEXT: ;;#ASMEND 12477; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 12478; GFX940-NEXT: s_mov_b32 s8, s1 12479; GFX940-NEXT: ;;#ASMSTART 12480; GFX940-NEXT: ; use s[8:9] 12481; GFX940-NEXT: ;;#ASMEND 12482; GFX940-NEXT: s_setpc_b64 s[30:31] 12483 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12484 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12485 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12486 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12487 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 4, i32 4> 12488 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12489 ret void 12490} 12491 12492define void @s_shuffle_v4i16_v3i16__5_0_4_4() { 12493; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_4_4: 12494; GFX900: ; %bb.0: 12495; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12496; GFX900-NEXT: ;;#ASMSTART 12497; GFX900-NEXT: ; def s[4:5] 12498; GFX900-NEXT: ;;#ASMEND 12499; GFX900-NEXT: ;;#ASMSTART 12500; GFX900-NEXT: ; def s[6:7] 12501; GFX900-NEXT: ;;#ASMEND 12502; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 12503; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12504; GFX900-NEXT: ;;#ASMSTART 12505; GFX900-NEXT: ; use s[8:9] 12506; GFX900-NEXT: ;;#ASMEND 12507; GFX900-NEXT: s_setpc_b64 s[30:31] 12508; 12509; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_4_4: 12510; GFX90A: ; %bb.0: 12511; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12512; GFX90A-NEXT: ;;#ASMSTART 12513; GFX90A-NEXT: ; def s[4:5] 12514; GFX90A-NEXT: ;;#ASMEND 12515; GFX90A-NEXT: ;;#ASMSTART 12516; GFX90A-NEXT: ; def s[6:7] 12517; GFX90A-NEXT: ;;#ASMEND 12518; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 12519; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12520; GFX90A-NEXT: ;;#ASMSTART 12521; GFX90A-NEXT: ; use s[8:9] 12522; GFX90A-NEXT: ;;#ASMEND 12523; GFX90A-NEXT: s_setpc_b64 s[30:31] 12524; 12525; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_4_4: 12526; GFX940: ; %bb.0: 12527; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12528; GFX940-NEXT: ;;#ASMSTART 12529; GFX940-NEXT: ; def s[0:1] 12530; GFX940-NEXT: ;;#ASMEND 12531; GFX940-NEXT: ;;#ASMSTART 12532; GFX940-NEXT: ; def s[2:3] 12533; GFX940-NEXT: ;;#ASMEND 12534; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 12535; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 12536; GFX940-NEXT: ;;#ASMSTART 12537; GFX940-NEXT: ; use s[8:9] 12538; GFX940-NEXT: ;;#ASMEND 12539; GFX940-NEXT: s_setpc_b64 s[30:31] 12540 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12541 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12542 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12543 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12544 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 4, i32 4> 12545 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12546 ret void 12547} 12548 12549define void @s_shuffle_v4i16_v3i16__5_1_4_4() { 12550; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_4_4: 12551; GFX900: ; %bb.0: 12552; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12553; GFX900-NEXT: ;;#ASMSTART 12554; GFX900-NEXT: ; def s[4:5] 12555; GFX900-NEXT: ;;#ASMEND 12556; GFX900-NEXT: ;;#ASMSTART 12557; GFX900-NEXT: ; def s[6:7] 12558; GFX900-NEXT: ;;#ASMEND 12559; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 12560; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12561; GFX900-NEXT: ;;#ASMSTART 12562; GFX900-NEXT: ; use s[8:9] 12563; GFX900-NEXT: ;;#ASMEND 12564; GFX900-NEXT: s_setpc_b64 s[30:31] 12565; 12566; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_4_4: 12567; GFX90A: ; %bb.0: 12568; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12569; GFX90A-NEXT: ;;#ASMSTART 12570; GFX90A-NEXT: ; def s[4:5] 12571; GFX90A-NEXT: ;;#ASMEND 12572; GFX90A-NEXT: ;;#ASMSTART 12573; GFX90A-NEXT: ; def s[6:7] 12574; GFX90A-NEXT: ;;#ASMEND 12575; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 12576; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12577; GFX90A-NEXT: ;;#ASMSTART 12578; GFX90A-NEXT: ; use s[8:9] 12579; GFX90A-NEXT: ;;#ASMEND 12580; GFX90A-NEXT: s_setpc_b64 s[30:31] 12581; 12582; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_4_4: 12583; GFX940: ; %bb.0: 12584; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12585; GFX940-NEXT: ;;#ASMSTART 12586; GFX940-NEXT: ; def s[0:1] 12587; GFX940-NEXT: ;;#ASMEND 12588; GFX940-NEXT: ;;#ASMSTART 12589; GFX940-NEXT: ; def s[2:3] 12590; GFX940-NEXT: ;;#ASMEND 12591; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 12592; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 12593; GFX940-NEXT: ;;#ASMSTART 12594; GFX940-NEXT: ; use s[8:9] 12595; GFX940-NEXT: ;;#ASMEND 12596; GFX940-NEXT: s_setpc_b64 s[30:31] 12597 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12598 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12599 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12600 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12601 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 4, i32 4> 12602 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12603 ret void 12604} 12605 12606define void @s_shuffle_v4i16_v3i16__5_2_4_4() { 12607; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_4_4: 12608; GFX900: ; %bb.0: 12609; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12610; GFX900-NEXT: ;;#ASMSTART 12611; GFX900-NEXT: ; def s[4:5] 12612; GFX900-NEXT: ;;#ASMEND 12613; GFX900-NEXT: ;;#ASMSTART 12614; GFX900-NEXT: ; def s[6:7] 12615; GFX900-NEXT: ;;#ASMEND 12616; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 12617; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12618; GFX900-NEXT: ;;#ASMSTART 12619; GFX900-NEXT: ; use s[8:9] 12620; GFX900-NEXT: ;;#ASMEND 12621; GFX900-NEXT: s_setpc_b64 s[30:31] 12622; 12623; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_4_4: 12624; GFX90A: ; %bb.0: 12625; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12626; GFX90A-NEXT: ;;#ASMSTART 12627; GFX90A-NEXT: ; def s[4:5] 12628; GFX90A-NEXT: ;;#ASMEND 12629; GFX90A-NEXT: ;;#ASMSTART 12630; GFX90A-NEXT: ; def s[6:7] 12631; GFX90A-NEXT: ;;#ASMEND 12632; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 12633; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 12634; GFX90A-NEXT: ;;#ASMSTART 12635; GFX90A-NEXT: ; use s[8:9] 12636; GFX90A-NEXT: ;;#ASMEND 12637; GFX90A-NEXT: s_setpc_b64 s[30:31] 12638; 12639; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_4_4: 12640; GFX940: ; %bb.0: 12641; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12642; GFX940-NEXT: ;;#ASMSTART 12643; GFX940-NEXT: ; def s[0:1] 12644; GFX940-NEXT: ;;#ASMEND 12645; GFX940-NEXT: ;;#ASMSTART 12646; GFX940-NEXT: ; def s[2:3] 12647; GFX940-NEXT: ;;#ASMEND 12648; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 12649; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 12650; GFX940-NEXT: ;;#ASMSTART 12651; GFX940-NEXT: ; use s[8:9] 12652; GFX940-NEXT: ;;#ASMEND 12653; GFX940-NEXT: s_setpc_b64 s[30:31] 12654 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12655 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12656 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12657 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12658 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 4, i32 4> 12659 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12660 ret void 12661} 12662 12663define void @s_shuffle_v4i16_v3i16__5_3_4_4() { 12664; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_4_4: 12665; GFX900: ; %bb.0: 12666; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12667; GFX900-NEXT: ;;#ASMSTART 12668; GFX900-NEXT: ; def s[4:5] 12669; GFX900-NEXT: ;;#ASMEND 12670; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12671; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 12672; GFX900-NEXT: ;;#ASMSTART 12673; GFX900-NEXT: ; use s[8:9] 12674; GFX900-NEXT: ;;#ASMEND 12675; GFX900-NEXT: s_setpc_b64 s[30:31] 12676; 12677; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_4_4: 12678; GFX90A: ; %bb.0: 12679; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12680; GFX90A-NEXT: ;;#ASMSTART 12681; GFX90A-NEXT: ; def s[4:5] 12682; GFX90A-NEXT: ;;#ASMEND 12683; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12684; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 12685; GFX90A-NEXT: ;;#ASMSTART 12686; GFX90A-NEXT: ; use s[8:9] 12687; GFX90A-NEXT: ;;#ASMEND 12688; GFX90A-NEXT: s_setpc_b64 s[30:31] 12689; 12690; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_4_4: 12691; GFX940: ; %bb.0: 12692; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12693; GFX940-NEXT: ;;#ASMSTART 12694; GFX940-NEXT: ; def s[0:1] 12695; GFX940-NEXT: ;;#ASMEND 12696; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12697; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 12698; GFX940-NEXT: ;;#ASMSTART 12699; GFX940-NEXT: ; use s[8:9] 12700; GFX940-NEXT: ;;#ASMEND 12701; GFX940-NEXT: s_setpc_b64 s[30:31] 12702 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12703 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12704 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12705 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12706 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 4, i32 4> 12707 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12708 ret void 12709} 12710 12711define void @s_shuffle_v4i16_v3i16__5_5_4_4() { 12712; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_4: 12713; GFX900: ; %bb.0: 12714; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12715; GFX900-NEXT: ;;#ASMSTART 12716; GFX900-NEXT: ; def s[4:5] 12717; GFX900-NEXT: ;;#ASMEND 12718; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 12719; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 12720; GFX900-NEXT: ;;#ASMSTART 12721; GFX900-NEXT: ; use s[8:9] 12722; GFX900-NEXT: ;;#ASMEND 12723; GFX900-NEXT: s_setpc_b64 s[30:31] 12724; 12725; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_4: 12726; GFX90A: ; %bb.0: 12727; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12728; GFX90A-NEXT: ;;#ASMSTART 12729; GFX90A-NEXT: ; def s[4:5] 12730; GFX90A-NEXT: ;;#ASMEND 12731; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 12732; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 12733; GFX90A-NEXT: ;;#ASMSTART 12734; GFX90A-NEXT: ; use s[8:9] 12735; GFX90A-NEXT: ;;#ASMEND 12736; GFX90A-NEXT: s_setpc_b64 s[30:31] 12737; 12738; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_4: 12739; GFX940: ; %bb.0: 12740; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12741; GFX940-NEXT: ;;#ASMSTART 12742; GFX940-NEXT: ; def s[0:1] 12743; GFX940-NEXT: ;;#ASMEND 12744; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 12745; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 12746; GFX940-NEXT: ;;#ASMSTART 12747; GFX940-NEXT: ; use s[8:9] 12748; GFX940-NEXT: ;;#ASMEND 12749; GFX940-NEXT: s_setpc_b64 s[30:31] 12750 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12751 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12752 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12753 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12754 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 4> 12755 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12756 ret void 12757} 12758 12759define void @s_shuffle_v4i16_v3i16__5_5_u_4() { 12760; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_4: 12761; GFX900: ; %bb.0: 12762; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12763; GFX900-NEXT: ;;#ASMSTART 12764; GFX900-NEXT: ; def s[4:5] 12765; GFX900-NEXT: ;;#ASMEND 12766; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 12767; GFX900-NEXT: s_mov_b32 s9, s4 12768; GFX900-NEXT: ;;#ASMSTART 12769; GFX900-NEXT: ; use s[8:9] 12770; GFX900-NEXT: ;;#ASMEND 12771; GFX900-NEXT: s_setpc_b64 s[30:31] 12772; 12773; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_4: 12774; GFX90A: ; %bb.0: 12775; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12776; GFX90A-NEXT: ;;#ASMSTART 12777; GFX90A-NEXT: ; def s[4:5] 12778; GFX90A-NEXT: ;;#ASMEND 12779; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 12780; GFX90A-NEXT: s_mov_b32 s9, s4 12781; GFX90A-NEXT: ;;#ASMSTART 12782; GFX90A-NEXT: ; use s[8:9] 12783; GFX90A-NEXT: ;;#ASMEND 12784; GFX90A-NEXT: s_setpc_b64 s[30:31] 12785; 12786; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_4: 12787; GFX940: ; %bb.0: 12788; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12789; GFX940-NEXT: ;;#ASMSTART 12790; GFX940-NEXT: ; def s[0:1] 12791; GFX940-NEXT: ;;#ASMEND 12792; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 12793; GFX940-NEXT: s_mov_b32 s9, s0 12794; GFX940-NEXT: ;;#ASMSTART 12795; GFX940-NEXT: ; use s[8:9] 12796; GFX940-NEXT: ;;#ASMEND 12797; GFX940-NEXT: s_setpc_b64 s[30:31] 12798 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12799 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12800 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12801 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12802 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 4> 12803 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12804 ret void 12805} 12806 12807define void @s_shuffle_v4i16_v3i16__5_5_0_4() { 12808; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_4: 12809; GFX900: ; %bb.0: 12810; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12811; GFX900-NEXT: ;;#ASMSTART 12812; GFX900-NEXT: ; def s[4:5] 12813; GFX900-NEXT: ;;#ASMEND 12814; GFX900-NEXT: ;;#ASMSTART 12815; GFX900-NEXT: ; def s[6:7] 12816; GFX900-NEXT: ;;#ASMEND 12817; GFX900-NEXT: s_pack_lh_b32_b16 s9, s4, s6 12818; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 12819; GFX900-NEXT: ;;#ASMSTART 12820; GFX900-NEXT: ; use s[8:9] 12821; GFX900-NEXT: ;;#ASMEND 12822; GFX900-NEXT: s_setpc_b64 s[30:31] 12823; 12824; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_4: 12825; GFX90A: ; %bb.0: 12826; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12827; GFX90A-NEXT: ;;#ASMSTART 12828; GFX90A-NEXT: ; def s[4:5] 12829; GFX90A-NEXT: ;;#ASMEND 12830; GFX90A-NEXT: ;;#ASMSTART 12831; GFX90A-NEXT: ; def s[6:7] 12832; GFX90A-NEXT: ;;#ASMEND 12833; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s4, s6 12834; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 12835; GFX90A-NEXT: ;;#ASMSTART 12836; GFX90A-NEXT: ; use s[8:9] 12837; GFX90A-NEXT: ;;#ASMEND 12838; GFX90A-NEXT: s_setpc_b64 s[30:31] 12839; 12840; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_4: 12841; GFX940: ; %bb.0: 12842; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12843; GFX940-NEXT: ;;#ASMSTART 12844; GFX940-NEXT: ; def s[0:1] 12845; GFX940-NEXT: ;;#ASMEND 12846; GFX940-NEXT: ;;#ASMSTART 12847; GFX940-NEXT: ; def s[2:3] 12848; GFX940-NEXT: ;;#ASMEND 12849; GFX940-NEXT: s_pack_lh_b32_b16 s9, s0, s2 12850; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 12851; GFX940-NEXT: ;;#ASMSTART 12852; GFX940-NEXT: ; use s[8:9] 12853; GFX940-NEXT: ;;#ASMEND 12854; GFX940-NEXT: s_setpc_b64 s[30:31] 12855 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12856 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12857 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12858 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12859 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 4> 12860 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12861 ret void 12862} 12863 12864define void @s_shuffle_v4i16_v3i16__5_5_1_4() { 12865; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_4: 12866; GFX900: ; %bb.0: 12867; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12868; GFX900-NEXT: ;;#ASMSTART 12869; GFX900-NEXT: ; def s[4:5] 12870; GFX900-NEXT: ;;#ASMEND 12871; GFX900-NEXT: ;;#ASMSTART 12872; GFX900-NEXT: ; def s[6:7] 12873; GFX900-NEXT: ;;#ASMEND 12874; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s6 12875; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 12876; GFX900-NEXT: ;;#ASMSTART 12877; GFX900-NEXT: ; use s[8:9] 12878; GFX900-NEXT: ;;#ASMEND 12879; GFX900-NEXT: s_setpc_b64 s[30:31] 12880; 12881; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_4: 12882; GFX90A: ; %bb.0: 12883; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12884; GFX90A-NEXT: ;;#ASMSTART 12885; GFX90A-NEXT: ; def s[4:5] 12886; GFX90A-NEXT: ;;#ASMEND 12887; GFX90A-NEXT: ;;#ASMSTART 12888; GFX90A-NEXT: ; def s[6:7] 12889; GFX90A-NEXT: ;;#ASMEND 12890; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s6 12891; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 12892; GFX90A-NEXT: ;;#ASMSTART 12893; GFX90A-NEXT: ; use s[8:9] 12894; GFX90A-NEXT: ;;#ASMEND 12895; GFX90A-NEXT: s_setpc_b64 s[30:31] 12896; 12897; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_4: 12898; GFX940: ; %bb.0: 12899; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12900; GFX940-NEXT: ;;#ASMSTART 12901; GFX940-NEXT: ; def s[0:1] 12902; GFX940-NEXT: ;;#ASMEND 12903; GFX940-NEXT: ;;#ASMSTART 12904; GFX940-NEXT: ; def s[2:3] 12905; GFX940-NEXT: ;;#ASMEND 12906; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s2 12907; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 12908; GFX940-NEXT: ;;#ASMSTART 12909; GFX940-NEXT: ; use s[8:9] 12910; GFX940-NEXT: ;;#ASMEND 12911; GFX940-NEXT: s_setpc_b64 s[30:31] 12912 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12913 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12914 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12915 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12916 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 4> 12917 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12918 ret void 12919} 12920 12921define void @s_shuffle_v4i16_v3i16__5_5_2_4() { 12922; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_4: 12923; GFX900: ; %bb.0: 12924; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12925; GFX900-NEXT: ;;#ASMSTART 12926; GFX900-NEXT: ; def s[4:5] 12927; GFX900-NEXT: ;;#ASMEND 12928; GFX900-NEXT: ;;#ASMSTART 12929; GFX900-NEXT: ; def s[6:7] 12930; GFX900-NEXT: ;;#ASMEND 12931; GFX900-NEXT: s_pack_lh_b32_b16 s9, s5, s6 12932; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 12933; GFX900-NEXT: ;;#ASMSTART 12934; GFX900-NEXT: ; use s[8:9] 12935; GFX900-NEXT: ;;#ASMEND 12936; GFX900-NEXT: s_setpc_b64 s[30:31] 12937; 12938; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_4: 12939; GFX90A: ; %bb.0: 12940; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12941; GFX90A-NEXT: ;;#ASMSTART 12942; GFX90A-NEXT: ; def s[4:5] 12943; GFX90A-NEXT: ;;#ASMEND 12944; GFX90A-NEXT: ;;#ASMSTART 12945; GFX90A-NEXT: ; def s[6:7] 12946; GFX90A-NEXT: ;;#ASMEND 12947; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s5, s6 12948; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 12949; GFX90A-NEXT: ;;#ASMSTART 12950; GFX90A-NEXT: ; use s[8:9] 12951; GFX90A-NEXT: ;;#ASMEND 12952; GFX90A-NEXT: s_setpc_b64 s[30:31] 12953; 12954; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_4: 12955; GFX940: ; %bb.0: 12956; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12957; GFX940-NEXT: ;;#ASMSTART 12958; GFX940-NEXT: ; def s[0:1] 12959; GFX940-NEXT: ;;#ASMEND 12960; GFX940-NEXT: ;;#ASMSTART 12961; GFX940-NEXT: ; def s[2:3] 12962; GFX940-NEXT: ;;#ASMEND 12963; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s2 12964; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 12965; GFX940-NEXT: ;;#ASMSTART 12966; GFX940-NEXT: ; use s[8:9] 12967; GFX940-NEXT: ;;#ASMEND 12968; GFX940-NEXT: s_setpc_b64 s[30:31] 12969 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12970 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12971 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12972 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 12973 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 4> 12974 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12975 ret void 12976} 12977 12978define void @s_shuffle_v4i16_v3i16__5_5_3_4() { 12979; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_4: 12980; GFX900: ; %bb.0: 12981; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12982; GFX900-NEXT: ;;#ASMSTART 12983; GFX900-NEXT: ; def s[4:5] 12984; GFX900-NEXT: ;;#ASMEND 12985; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 12986; GFX900-NEXT: s_mov_b32 s9, s4 12987; GFX900-NEXT: ;;#ASMSTART 12988; GFX900-NEXT: ; use s[8:9] 12989; GFX900-NEXT: ;;#ASMEND 12990; GFX900-NEXT: s_setpc_b64 s[30:31] 12991; 12992; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_4: 12993; GFX90A: ; %bb.0: 12994; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12995; GFX90A-NEXT: ;;#ASMSTART 12996; GFX90A-NEXT: ; def s[4:5] 12997; GFX90A-NEXT: ;;#ASMEND 12998; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 12999; GFX90A-NEXT: s_mov_b32 s9, s4 13000; GFX90A-NEXT: ;;#ASMSTART 13001; GFX90A-NEXT: ; use s[8:9] 13002; GFX90A-NEXT: ;;#ASMEND 13003; GFX90A-NEXT: s_setpc_b64 s[30:31] 13004; 13005; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_4: 13006; GFX940: ; %bb.0: 13007; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13008; GFX940-NEXT: ;;#ASMSTART 13009; GFX940-NEXT: ; def s[0:1] 13010; GFX940-NEXT: ;;#ASMEND 13011; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 13012; GFX940-NEXT: s_mov_b32 s9, s0 13013; GFX940-NEXT: ;;#ASMSTART 13014; GFX940-NEXT: ; use s[8:9] 13015; GFX940-NEXT: ;;#ASMEND 13016; GFX940-NEXT: s_setpc_b64 s[30:31] 13017 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13018 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13019 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13020 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13021 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 4> 13022 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13023 ret void 13024} 13025 13026define void @s_shuffle_v4i16_v3i16__u_5_5_5() { 13027; GFX900-LABEL: s_shuffle_v4i16_v3i16__u_5_5_5: 13028; GFX900: ; %bb.0: 13029; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13030; GFX900-NEXT: ;;#ASMSTART 13031; GFX900-NEXT: ; def s[4:5] 13032; GFX900-NEXT: ;;#ASMEND 13033; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13034; GFX900-NEXT: s_lshl_b32 s8, s5, 16 13035; GFX900-NEXT: ;;#ASMSTART 13036; GFX900-NEXT: ; use s[8:9] 13037; GFX900-NEXT: ;;#ASMEND 13038; GFX900-NEXT: s_setpc_b64 s[30:31] 13039; 13040; GFX90A-LABEL: s_shuffle_v4i16_v3i16__u_5_5_5: 13041; GFX90A: ; %bb.0: 13042; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13043; GFX90A-NEXT: ;;#ASMSTART 13044; GFX90A-NEXT: ; def s[4:5] 13045; GFX90A-NEXT: ;;#ASMEND 13046; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13047; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 13048; GFX90A-NEXT: ;;#ASMSTART 13049; GFX90A-NEXT: ; use s[8:9] 13050; GFX90A-NEXT: ;;#ASMEND 13051; GFX90A-NEXT: s_setpc_b64 s[30:31] 13052; 13053; GFX940-LABEL: s_shuffle_v4i16_v3i16__u_5_5_5: 13054; GFX940: ; %bb.0: 13055; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13056; GFX940-NEXT: ;;#ASMSTART 13057; GFX940-NEXT: ; def s[0:1] 13058; GFX940-NEXT: ;;#ASMEND 13059; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 13060; GFX940-NEXT: s_lshl_b32 s8, s1, 16 13061; GFX940-NEXT: ;;#ASMSTART 13062; GFX940-NEXT: ; use s[8:9] 13063; GFX940-NEXT: ;;#ASMEND 13064; GFX940-NEXT: s_setpc_b64 s[30:31] 13065 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13066 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13067 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13068 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13069 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 poison, i32 5, i32 5, i32 5> 13070 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13071 ret void 13072} 13073 13074define void @s_shuffle_v4i16_v3i16__0_5_5_5() { 13075; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_5_5_5: 13076; GFX900: ; %bb.0: 13077; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13078; GFX900-NEXT: ;;#ASMSTART 13079; GFX900-NEXT: ; def s[4:5] 13080; GFX900-NEXT: ;;#ASMEND 13081; GFX900-NEXT: ;;#ASMSTART 13082; GFX900-NEXT: ; def s[6:7] 13083; GFX900-NEXT: ;;#ASMEND 13084; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 13085; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13086; GFX900-NEXT: ;;#ASMSTART 13087; GFX900-NEXT: ; use s[8:9] 13088; GFX900-NEXT: ;;#ASMEND 13089; GFX900-NEXT: s_setpc_b64 s[30:31] 13090; 13091; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_5_5_5: 13092; GFX90A: ; %bb.0: 13093; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13094; GFX90A-NEXT: ;;#ASMSTART 13095; GFX90A-NEXT: ; def s[4:5] 13096; GFX90A-NEXT: ;;#ASMEND 13097; GFX90A-NEXT: ;;#ASMSTART 13098; GFX90A-NEXT: ; def s[6:7] 13099; GFX90A-NEXT: ;;#ASMEND 13100; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 13101; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13102; GFX90A-NEXT: ;;#ASMSTART 13103; GFX90A-NEXT: ; use s[8:9] 13104; GFX90A-NEXT: ;;#ASMEND 13105; GFX90A-NEXT: s_setpc_b64 s[30:31] 13106; 13107; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_5_5_5: 13108; GFX940: ; %bb.0: 13109; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13110; GFX940-NEXT: ;;#ASMSTART 13111; GFX940-NEXT: ; def s[0:1] 13112; GFX940-NEXT: ;;#ASMEND 13113; GFX940-NEXT: ;;#ASMSTART 13114; GFX940-NEXT: ; def s[2:3] 13115; GFX940-NEXT: ;;#ASMEND 13116; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 13117; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 13118; GFX940-NEXT: ;;#ASMSTART 13119; GFX940-NEXT: ; use s[8:9] 13120; GFX940-NEXT: ;;#ASMEND 13121; GFX940-NEXT: s_setpc_b64 s[30:31] 13122 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13123 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13124 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13125 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13126 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 0, i32 5, i32 5, i32 5> 13127 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13128 ret void 13129} 13130 13131define void @s_shuffle_v4i16_v3i16__1_5_5_5() { 13132; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_5_5_5: 13133; GFX900: ; %bb.0: 13134; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13135; GFX900-NEXT: ;;#ASMSTART 13136; GFX900-NEXT: ; def s[4:5] 13137; GFX900-NEXT: ;;#ASMEND 13138; GFX900-NEXT: s_lshr_b32 s4, s4, 16 13139; GFX900-NEXT: ;;#ASMSTART 13140; GFX900-NEXT: ; def s[6:7] 13141; GFX900-NEXT: ;;#ASMEND 13142; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 13143; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13144; GFX900-NEXT: ;;#ASMSTART 13145; GFX900-NEXT: ; use s[8:9] 13146; GFX900-NEXT: ;;#ASMEND 13147; GFX900-NEXT: s_setpc_b64 s[30:31] 13148; 13149; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_5_5_5: 13150; GFX90A: ; %bb.0: 13151; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13152; GFX90A-NEXT: ;;#ASMSTART 13153; GFX90A-NEXT: ; def s[4:5] 13154; GFX90A-NEXT: ;;#ASMEND 13155; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 13156; GFX90A-NEXT: ;;#ASMSTART 13157; GFX90A-NEXT: ; def s[6:7] 13158; GFX90A-NEXT: ;;#ASMEND 13159; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 13160; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13161; GFX90A-NEXT: ;;#ASMSTART 13162; GFX90A-NEXT: ; use s[8:9] 13163; GFX90A-NEXT: ;;#ASMEND 13164; GFX90A-NEXT: s_setpc_b64 s[30:31] 13165; 13166; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_5_5_5: 13167; GFX940: ; %bb.0: 13168; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13169; GFX940-NEXT: ;;#ASMSTART 13170; GFX940-NEXT: ; def s[0:1] 13171; GFX940-NEXT: ;;#ASMEND 13172; GFX940-NEXT: s_lshr_b32 s0, s0, 16 13173; GFX940-NEXT: ;;#ASMSTART 13174; GFX940-NEXT: ; def s[2:3] 13175; GFX940-NEXT: ;;#ASMEND 13176; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 13177; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 13178; GFX940-NEXT: ;;#ASMSTART 13179; GFX940-NEXT: ; use s[8:9] 13180; GFX940-NEXT: ;;#ASMEND 13181; GFX940-NEXT: s_setpc_b64 s[30:31] 13182 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13183 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13184 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13185 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13186 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 1, i32 5, i32 5, i32 5> 13187 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13188 ret void 13189} 13190 13191define void @s_shuffle_v4i16_v3i16__2_5_5_5() { 13192; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_5_5_5: 13193; GFX900: ; %bb.0: 13194; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13195; GFX900-NEXT: ;;#ASMSTART 13196; GFX900-NEXT: ; def s[4:5] 13197; GFX900-NEXT: ;;#ASMEND 13198; GFX900-NEXT: ;;#ASMSTART 13199; GFX900-NEXT: ; def s[6:7] 13200; GFX900-NEXT: ;;#ASMEND 13201; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 13202; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13203; GFX900-NEXT: ;;#ASMSTART 13204; GFX900-NEXT: ; use s[8:9] 13205; GFX900-NEXT: ;;#ASMEND 13206; GFX900-NEXT: s_setpc_b64 s[30:31] 13207; 13208; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_5_5_5: 13209; GFX90A: ; %bb.0: 13210; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13211; GFX90A-NEXT: ;;#ASMSTART 13212; GFX90A-NEXT: ; def s[4:5] 13213; GFX90A-NEXT: ;;#ASMEND 13214; GFX90A-NEXT: ;;#ASMSTART 13215; GFX90A-NEXT: ; def s[6:7] 13216; GFX90A-NEXT: ;;#ASMEND 13217; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 13218; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13219; GFX90A-NEXT: ;;#ASMSTART 13220; GFX90A-NEXT: ; use s[8:9] 13221; GFX90A-NEXT: ;;#ASMEND 13222; GFX90A-NEXT: s_setpc_b64 s[30:31] 13223; 13224; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_5_5_5: 13225; GFX940: ; %bb.0: 13226; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13227; GFX940-NEXT: ;;#ASMSTART 13228; GFX940-NEXT: ; def s[0:1] 13229; GFX940-NEXT: ;;#ASMEND 13230; GFX940-NEXT: ;;#ASMSTART 13231; GFX940-NEXT: ; def s[2:3] 13232; GFX940-NEXT: ;;#ASMEND 13233; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 13234; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 13235; GFX940-NEXT: ;;#ASMSTART 13236; GFX940-NEXT: ; use s[8:9] 13237; GFX940-NEXT: ;;#ASMEND 13238; GFX940-NEXT: s_setpc_b64 s[30:31] 13239 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13240 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13241 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13242 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13243 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 2, i32 5, i32 5, i32 5> 13244 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13245 ret void 13246} 13247 13248define void @s_shuffle_v4i16_v3i16__3_5_5_5() { 13249; GFX900-LABEL: s_shuffle_v4i16_v3i16__3_5_5_5: 13250; GFX900: ; %bb.0: 13251; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13252; GFX900-NEXT: ;;#ASMSTART 13253; GFX900-NEXT: ; def s[4:5] 13254; GFX900-NEXT: ;;#ASMEND 13255; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 13256; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13257; GFX900-NEXT: ;;#ASMSTART 13258; GFX900-NEXT: ; use s[8:9] 13259; GFX900-NEXT: ;;#ASMEND 13260; GFX900-NEXT: s_setpc_b64 s[30:31] 13261; 13262; GFX90A-LABEL: s_shuffle_v4i16_v3i16__3_5_5_5: 13263; GFX90A: ; %bb.0: 13264; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13265; GFX90A-NEXT: ;;#ASMSTART 13266; GFX90A-NEXT: ; def s[4:5] 13267; GFX90A-NEXT: ;;#ASMEND 13268; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 13269; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13270; GFX90A-NEXT: ;;#ASMSTART 13271; GFX90A-NEXT: ; use s[8:9] 13272; GFX90A-NEXT: ;;#ASMEND 13273; GFX90A-NEXT: s_setpc_b64 s[30:31] 13274; 13275; GFX940-LABEL: s_shuffle_v4i16_v3i16__3_5_5_5: 13276; GFX940: ; %bb.0: 13277; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13278; GFX940-NEXT: ;;#ASMSTART 13279; GFX940-NEXT: ; def s[0:1] 13280; GFX940-NEXT: ;;#ASMEND 13281; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 13282; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 13283; GFX940-NEXT: ;;#ASMSTART 13284; GFX940-NEXT: ; use s[8:9] 13285; GFX940-NEXT: ;;#ASMEND 13286; GFX940-NEXT: s_setpc_b64 s[30:31] 13287 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13288 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13289 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13290 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13291 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 3, i32 5, i32 5, i32 5> 13292 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13293 ret void 13294} 13295 13296define void @s_shuffle_v4i16_v3i16__4_5_5_5() { 13297; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_5_5_5: 13298; GFX900: ; %bb.0: 13299; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13300; GFX900-NEXT: ;;#ASMSTART 13301; GFX900-NEXT: ; def s[4:5] 13302; GFX900-NEXT: ;;#ASMEND 13303; GFX900-NEXT: s_lshr_b32 s4, s4, 16 13304; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 13305; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13306; GFX900-NEXT: ;;#ASMSTART 13307; GFX900-NEXT: ; use s[8:9] 13308; GFX900-NEXT: ;;#ASMEND 13309; GFX900-NEXT: s_setpc_b64 s[30:31] 13310; 13311; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_5_5_5: 13312; GFX90A: ; %bb.0: 13313; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13314; GFX90A-NEXT: ;;#ASMSTART 13315; GFX90A-NEXT: ; def s[4:5] 13316; GFX90A-NEXT: ;;#ASMEND 13317; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 13318; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 13319; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13320; GFX90A-NEXT: ;;#ASMSTART 13321; GFX90A-NEXT: ; use s[8:9] 13322; GFX90A-NEXT: ;;#ASMEND 13323; GFX90A-NEXT: s_setpc_b64 s[30:31] 13324; 13325; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_5_5_5: 13326; GFX940: ; %bb.0: 13327; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13328; GFX940-NEXT: ;;#ASMSTART 13329; GFX940-NEXT: ; def s[0:1] 13330; GFX940-NEXT: ;;#ASMEND 13331; GFX940-NEXT: s_lshr_b32 s0, s0, 16 13332; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 13333; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 13334; GFX940-NEXT: ;;#ASMSTART 13335; GFX940-NEXT: ; use s[8:9] 13336; GFX940-NEXT: ;;#ASMEND 13337; GFX940-NEXT: s_setpc_b64 s[30:31] 13338 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13339 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13340 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13341 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13342 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 5, i32 5, i32 5> 13343 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13344 ret void 13345} 13346 13347define void @s_shuffle_v4i16_v3i16__5_u_5_5() { 13348; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_5_5: 13349; GFX900: ; %bb.0: 13350; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13351; GFX900-NEXT: ;;#ASMSTART 13352; GFX900-NEXT: ; def s[4:5] 13353; GFX900-NEXT: ;;#ASMEND 13354; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13355; GFX900-NEXT: s_mov_b32 s8, s5 13356; GFX900-NEXT: ;;#ASMSTART 13357; GFX900-NEXT: ; use s[8:9] 13358; GFX900-NEXT: ;;#ASMEND 13359; GFX900-NEXT: s_setpc_b64 s[30:31] 13360; 13361; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_5_5: 13362; GFX90A: ; %bb.0: 13363; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13364; GFX90A-NEXT: ;;#ASMSTART 13365; GFX90A-NEXT: ; def s[4:5] 13366; GFX90A-NEXT: ;;#ASMEND 13367; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13368; GFX90A-NEXT: s_mov_b32 s8, s5 13369; GFX90A-NEXT: ;;#ASMSTART 13370; GFX90A-NEXT: ; use s[8:9] 13371; GFX90A-NEXT: ;;#ASMEND 13372; GFX90A-NEXT: s_setpc_b64 s[30:31] 13373; 13374; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_5_5: 13375; GFX940: ; %bb.0: 13376; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13377; GFX940-NEXT: ;;#ASMSTART 13378; GFX940-NEXT: ; def s[0:1] 13379; GFX940-NEXT: ;;#ASMEND 13380; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 13381; GFX940-NEXT: s_mov_b32 s8, s1 13382; GFX940-NEXT: ;;#ASMSTART 13383; GFX940-NEXT: ; use s[8:9] 13384; GFX940-NEXT: ;;#ASMEND 13385; GFX940-NEXT: s_setpc_b64 s[30:31] 13386 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13387 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13388 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13389 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13390 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 5, i32 5> 13391 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13392 ret void 13393} 13394 13395define void @s_shuffle_v4i16_v3i16__5_0_5_5() { 13396; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_5_5: 13397; GFX900: ; %bb.0: 13398; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13399; GFX900-NEXT: ;;#ASMSTART 13400; GFX900-NEXT: ; def s[4:5] 13401; GFX900-NEXT: ;;#ASMEND 13402; GFX900-NEXT: ;;#ASMSTART 13403; GFX900-NEXT: ; def s[6:7] 13404; GFX900-NEXT: ;;#ASMEND 13405; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 13406; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13407; GFX900-NEXT: ;;#ASMSTART 13408; GFX900-NEXT: ; use s[8:9] 13409; GFX900-NEXT: ;;#ASMEND 13410; GFX900-NEXT: s_setpc_b64 s[30:31] 13411; 13412; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_5_5: 13413; GFX90A: ; %bb.0: 13414; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13415; GFX90A-NEXT: ;;#ASMSTART 13416; GFX90A-NEXT: ; def s[4:5] 13417; GFX90A-NEXT: ;;#ASMEND 13418; GFX90A-NEXT: ;;#ASMSTART 13419; GFX90A-NEXT: ; def s[6:7] 13420; GFX90A-NEXT: ;;#ASMEND 13421; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 13422; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13423; GFX90A-NEXT: ;;#ASMSTART 13424; GFX90A-NEXT: ; use s[8:9] 13425; GFX90A-NEXT: ;;#ASMEND 13426; GFX90A-NEXT: s_setpc_b64 s[30:31] 13427; 13428; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_5_5: 13429; GFX940: ; %bb.0: 13430; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13431; GFX940-NEXT: ;;#ASMSTART 13432; GFX940-NEXT: ; def s[0:1] 13433; GFX940-NEXT: ;;#ASMEND 13434; GFX940-NEXT: ;;#ASMSTART 13435; GFX940-NEXT: ; def s[2:3] 13436; GFX940-NEXT: ;;#ASMEND 13437; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 13438; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 13439; GFX940-NEXT: ;;#ASMSTART 13440; GFX940-NEXT: ; use s[8:9] 13441; GFX940-NEXT: ;;#ASMEND 13442; GFX940-NEXT: s_setpc_b64 s[30:31] 13443 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13444 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13445 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13446 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13447 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 5, i32 5> 13448 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13449 ret void 13450} 13451 13452define void @s_shuffle_v4i16_v3i16__5_1_5_5() { 13453; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_5_5: 13454; GFX900: ; %bb.0: 13455; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13456; GFX900-NEXT: ;;#ASMSTART 13457; GFX900-NEXT: ; def s[4:5] 13458; GFX900-NEXT: ;;#ASMEND 13459; GFX900-NEXT: ;;#ASMSTART 13460; GFX900-NEXT: ; def s[6:7] 13461; GFX900-NEXT: ;;#ASMEND 13462; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 13463; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13464; GFX900-NEXT: ;;#ASMSTART 13465; GFX900-NEXT: ; use s[8:9] 13466; GFX900-NEXT: ;;#ASMEND 13467; GFX900-NEXT: s_setpc_b64 s[30:31] 13468; 13469; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_5_5: 13470; GFX90A: ; %bb.0: 13471; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13472; GFX90A-NEXT: ;;#ASMSTART 13473; GFX90A-NEXT: ; def s[4:5] 13474; GFX90A-NEXT: ;;#ASMEND 13475; GFX90A-NEXT: ;;#ASMSTART 13476; GFX90A-NEXT: ; def s[6:7] 13477; GFX90A-NEXT: ;;#ASMEND 13478; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 13479; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13480; GFX90A-NEXT: ;;#ASMSTART 13481; GFX90A-NEXT: ; use s[8:9] 13482; GFX90A-NEXT: ;;#ASMEND 13483; GFX90A-NEXT: s_setpc_b64 s[30:31] 13484; 13485; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_5_5: 13486; GFX940: ; %bb.0: 13487; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13488; GFX940-NEXT: ;;#ASMSTART 13489; GFX940-NEXT: ; def s[0:1] 13490; GFX940-NEXT: ;;#ASMEND 13491; GFX940-NEXT: ;;#ASMSTART 13492; GFX940-NEXT: ; def s[2:3] 13493; GFX940-NEXT: ;;#ASMEND 13494; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 13495; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 13496; GFX940-NEXT: ;;#ASMSTART 13497; GFX940-NEXT: ; use s[8:9] 13498; GFX940-NEXT: ;;#ASMEND 13499; GFX940-NEXT: s_setpc_b64 s[30:31] 13500 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13501 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13502 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13503 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13504 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 5, i32 5> 13505 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13506 ret void 13507} 13508 13509define void @s_shuffle_v4i16_v3i16__5_2_5_5() { 13510; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_5_5: 13511; GFX900: ; %bb.0: 13512; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13513; GFX900-NEXT: ;;#ASMSTART 13514; GFX900-NEXT: ; def s[4:5] 13515; GFX900-NEXT: ;;#ASMEND 13516; GFX900-NEXT: ;;#ASMSTART 13517; GFX900-NEXT: ; def s[6:7] 13518; GFX900-NEXT: ;;#ASMEND 13519; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 13520; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13521; GFX900-NEXT: ;;#ASMSTART 13522; GFX900-NEXT: ; use s[8:9] 13523; GFX900-NEXT: ;;#ASMEND 13524; GFX900-NEXT: s_setpc_b64 s[30:31] 13525; 13526; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_5_5: 13527; GFX90A: ; %bb.0: 13528; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13529; GFX90A-NEXT: ;;#ASMSTART 13530; GFX90A-NEXT: ; def s[4:5] 13531; GFX90A-NEXT: ;;#ASMEND 13532; GFX90A-NEXT: ;;#ASMSTART 13533; GFX90A-NEXT: ; def s[6:7] 13534; GFX90A-NEXT: ;;#ASMEND 13535; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 13536; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 13537; GFX90A-NEXT: ;;#ASMSTART 13538; GFX90A-NEXT: ; use s[8:9] 13539; GFX90A-NEXT: ;;#ASMEND 13540; GFX90A-NEXT: s_setpc_b64 s[30:31] 13541; 13542; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_5_5: 13543; GFX940: ; %bb.0: 13544; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13545; GFX940-NEXT: ;;#ASMSTART 13546; GFX940-NEXT: ; def s[0:1] 13547; GFX940-NEXT: ;;#ASMEND 13548; GFX940-NEXT: ;;#ASMSTART 13549; GFX940-NEXT: ; def s[2:3] 13550; GFX940-NEXT: ;;#ASMEND 13551; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 13552; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 13553; GFX940-NEXT: ;;#ASMSTART 13554; GFX940-NEXT: ; use s[8:9] 13555; GFX940-NEXT: ;;#ASMEND 13556; GFX940-NEXT: s_setpc_b64 s[30:31] 13557 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13558 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13559 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13560 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13561 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 5, i32 5> 13562 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13563 ret void 13564} 13565 13566define void @s_shuffle_v4i16_v3i16__5_3_5_5() { 13567; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_5_5: 13568; GFX900: ; %bb.0: 13569; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13570; GFX900-NEXT: ;;#ASMSTART 13571; GFX900-NEXT: ; def s[4:5] 13572; GFX900-NEXT: ;;#ASMEND 13573; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13574; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13575; GFX900-NEXT: ;;#ASMSTART 13576; GFX900-NEXT: ; use s[8:9] 13577; GFX900-NEXT: ;;#ASMEND 13578; GFX900-NEXT: s_setpc_b64 s[30:31] 13579; 13580; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_5_5: 13581; GFX90A: ; %bb.0: 13582; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13583; GFX90A-NEXT: ;;#ASMSTART 13584; GFX90A-NEXT: ; def s[4:5] 13585; GFX90A-NEXT: ;;#ASMEND 13586; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13587; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13588; GFX90A-NEXT: ;;#ASMSTART 13589; GFX90A-NEXT: ; use s[8:9] 13590; GFX90A-NEXT: ;;#ASMEND 13591; GFX90A-NEXT: s_setpc_b64 s[30:31] 13592; 13593; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_5_5: 13594; GFX940: ; %bb.0: 13595; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13596; GFX940-NEXT: ;;#ASMSTART 13597; GFX940-NEXT: ; def s[0:1] 13598; GFX940-NEXT: ;;#ASMEND 13599; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 13600; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 13601; GFX940-NEXT: ;;#ASMSTART 13602; GFX940-NEXT: ; use s[8:9] 13603; GFX940-NEXT: ;;#ASMEND 13604; GFX940-NEXT: s_setpc_b64 s[30:31] 13605 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13606 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13607 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13608 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13609 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 5, i32 5> 13610 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13611 ret void 13612} 13613 13614define void @s_shuffle_v4i16_v3i16__5_4_5_5() { 13615; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_5_5: 13616; GFX900: ; %bb.0: 13617; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13618; GFX900-NEXT: ;;#ASMSTART 13619; GFX900-NEXT: ; def s[4:5] 13620; GFX900-NEXT: ;;#ASMEND 13621; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 13622; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13623; GFX900-NEXT: ;;#ASMSTART 13624; GFX900-NEXT: ; use s[8:9] 13625; GFX900-NEXT: ;;#ASMEND 13626; GFX900-NEXT: s_setpc_b64 s[30:31] 13627; 13628; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_5_5: 13629; GFX90A: ; %bb.0: 13630; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13631; GFX90A-NEXT: ;;#ASMSTART 13632; GFX90A-NEXT: ; def s[4:5] 13633; GFX90A-NEXT: ;;#ASMEND 13634; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 13635; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 13636; GFX90A-NEXT: ;;#ASMSTART 13637; GFX90A-NEXT: ; use s[8:9] 13638; GFX90A-NEXT: ;;#ASMEND 13639; GFX90A-NEXT: s_setpc_b64 s[30:31] 13640; 13641; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_5_5: 13642; GFX940: ; %bb.0: 13643; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13644; GFX940-NEXT: ;;#ASMSTART 13645; GFX940-NEXT: ; def s[0:1] 13646; GFX940-NEXT: ;;#ASMEND 13647; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 13648; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 13649; GFX940-NEXT: ;;#ASMSTART 13650; GFX940-NEXT: ; use s[8:9] 13651; GFX940-NEXT: ;;#ASMEND 13652; GFX940-NEXT: s_setpc_b64 s[30:31] 13653 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13654 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13655 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13656 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13657 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 5, i32 5> 13658 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13659 ret void 13660} 13661 13662define void @s_shuffle_v4i16_v3i16__5_5_u_5() { 13663; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_5: 13664; GFX900: ; %bb.0: 13665; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13666; GFX900-NEXT: ;;#ASMSTART 13667; GFX900-NEXT: ; def s[4:5] 13668; GFX900-NEXT: ;;#ASMEND 13669; GFX900-NEXT: s_lshl_b32 s9, s5, 16 13670; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 13671; GFX900-NEXT: ;;#ASMSTART 13672; GFX900-NEXT: ; use s[8:9] 13673; GFX900-NEXT: ;;#ASMEND 13674; GFX900-NEXT: s_setpc_b64 s[30:31] 13675; 13676; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_5: 13677; GFX90A: ; %bb.0: 13678; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13679; GFX90A-NEXT: ;;#ASMSTART 13680; GFX90A-NEXT: ; def s[4:5] 13681; GFX90A-NEXT: ;;#ASMEND 13682; GFX90A-NEXT: s_lshl_b32 s9, s5, 16 13683; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 13684; GFX90A-NEXT: ;;#ASMSTART 13685; GFX90A-NEXT: ; use s[8:9] 13686; GFX90A-NEXT: ;;#ASMEND 13687; GFX90A-NEXT: s_setpc_b64 s[30:31] 13688; 13689; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_5: 13690; GFX940: ; %bb.0: 13691; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13692; GFX940-NEXT: ;;#ASMSTART 13693; GFX940-NEXT: ; def s[0:1] 13694; GFX940-NEXT: ;;#ASMEND 13695; GFX940-NEXT: s_lshl_b32 s9, s1, 16 13696; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 13697; GFX940-NEXT: ;;#ASMSTART 13698; GFX940-NEXT: ; use s[8:9] 13699; GFX940-NEXT: ;;#ASMEND 13700; GFX940-NEXT: s_setpc_b64 s[30:31] 13701 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13702 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13703 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13704 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13705 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 5> 13706 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13707 ret void 13708} 13709 13710define void @s_shuffle_v4i16_v3i16__5_5_0_5() { 13711; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_5: 13712; GFX900: ; %bb.0: 13713; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13714; GFX900-NEXT: ;;#ASMSTART 13715; GFX900-NEXT: ; def s[4:5] 13716; GFX900-NEXT: ;;#ASMEND 13717; GFX900-NEXT: ;;#ASMSTART 13718; GFX900-NEXT: ; def s[6:7] 13719; GFX900-NEXT: ;;#ASMEND 13720; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s7 13721; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 13722; GFX900-NEXT: ;;#ASMSTART 13723; GFX900-NEXT: ; use s[8:9] 13724; GFX900-NEXT: ;;#ASMEND 13725; GFX900-NEXT: s_setpc_b64 s[30:31] 13726; 13727; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_5: 13728; GFX90A: ; %bb.0: 13729; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13730; GFX90A-NEXT: ;;#ASMSTART 13731; GFX90A-NEXT: ; def s[4:5] 13732; GFX90A-NEXT: ;;#ASMEND 13733; GFX90A-NEXT: ;;#ASMSTART 13734; GFX90A-NEXT: ; def s[6:7] 13735; GFX90A-NEXT: ;;#ASMEND 13736; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s7 13737; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 13738; GFX90A-NEXT: ;;#ASMSTART 13739; GFX90A-NEXT: ; use s[8:9] 13740; GFX90A-NEXT: ;;#ASMEND 13741; GFX90A-NEXT: s_setpc_b64 s[30:31] 13742; 13743; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_5: 13744; GFX940: ; %bb.0: 13745; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13746; GFX940-NEXT: ;;#ASMSTART 13747; GFX940-NEXT: ; def s[0:1] 13748; GFX940-NEXT: ;;#ASMEND 13749; GFX940-NEXT: ;;#ASMSTART 13750; GFX940-NEXT: ; def s[2:3] 13751; GFX940-NEXT: ;;#ASMEND 13752; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 13753; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 13754; GFX940-NEXT: ;;#ASMSTART 13755; GFX940-NEXT: ; use s[8:9] 13756; GFX940-NEXT: ;;#ASMEND 13757; GFX940-NEXT: s_setpc_b64 s[30:31] 13758 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13759 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13760 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13761 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13762 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 5> 13763 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13764 ret void 13765} 13766 13767define void @s_shuffle_v4i16_v3i16__5_5_1_5() { 13768; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_5: 13769; GFX900: ; %bb.0: 13770; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13771; GFX900-NEXT: ;;#ASMSTART 13772; GFX900-NEXT: ; def s[4:5] 13773; GFX900-NEXT: ;;#ASMEND 13774; GFX900-NEXT: s_lshr_b32 s4, s4, 16 13775; GFX900-NEXT: ;;#ASMSTART 13776; GFX900-NEXT: ; def s[6:7] 13777; GFX900-NEXT: ;;#ASMEND 13778; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s7 13779; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 13780; GFX900-NEXT: ;;#ASMSTART 13781; GFX900-NEXT: ; use s[8:9] 13782; GFX900-NEXT: ;;#ASMEND 13783; GFX900-NEXT: s_setpc_b64 s[30:31] 13784; 13785; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_5: 13786; GFX90A: ; %bb.0: 13787; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13788; GFX90A-NEXT: ;;#ASMSTART 13789; GFX90A-NEXT: ; def s[4:5] 13790; GFX90A-NEXT: ;;#ASMEND 13791; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 13792; GFX90A-NEXT: ;;#ASMSTART 13793; GFX90A-NEXT: ; def s[6:7] 13794; GFX90A-NEXT: ;;#ASMEND 13795; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s7 13796; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 13797; GFX90A-NEXT: ;;#ASMSTART 13798; GFX90A-NEXT: ; use s[8:9] 13799; GFX90A-NEXT: ;;#ASMEND 13800; GFX90A-NEXT: s_setpc_b64 s[30:31] 13801; 13802; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_5: 13803; GFX940: ; %bb.0: 13804; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13805; GFX940-NEXT: ;;#ASMSTART 13806; GFX940-NEXT: ; def s[0:1] 13807; GFX940-NEXT: ;;#ASMEND 13808; GFX940-NEXT: s_lshr_b32 s0, s0, 16 13809; GFX940-NEXT: ;;#ASMSTART 13810; GFX940-NEXT: ; def s[2:3] 13811; GFX940-NEXT: ;;#ASMEND 13812; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 13813; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 13814; GFX940-NEXT: ;;#ASMSTART 13815; GFX940-NEXT: ; use s[8:9] 13816; GFX940-NEXT: ;;#ASMEND 13817; GFX940-NEXT: s_setpc_b64 s[30:31] 13818 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13819 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13820 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13821 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13822 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 5> 13823 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13824 ret void 13825} 13826 13827define void @s_shuffle_v4i16_v3i16__5_5_2_5() { 13828; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_5: 13829; GFX900: ; %bb.0: 13830; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13831; GFX900-NEXT: ;;#ASMSTART 13832; GFX900-NEXT: ; def s[4:5] 13833; GFX900-NEXT: ;;#ASMEND 13834; GFX900-NEXT: ;;#ASMSTART 13835; GFX900-NEXT: ; def s[6:7] 13836; GFX900-NEXT: ;;#ASMEND 13837; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s7 13838; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 13839; GFX900-NEXT: ;;#ASMSTART 13840; GFX900-NEXT: ; use s[8:9] 13841; GFX900-NEXT: ;;#ASMEND 13842; GFX900-NEXT: s_setpc_b64 s[30:31] 13843; 13844; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_5: 13845; GFX90A: ; %bb.0: 13846; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13847; GFX90A-NEXT: ;;#ASMSTART 13848; GFX90A-NEXT: ; def s[4:5] 13849; GFX90A-NEXT: ;;#ASMEND 13850; GFX90A-NEXT: ;;#ASMSTART 13851; GFX90A-NEXT: ; def s[6:7] 13852; GFX90A-NEXT: ;;#ASMEND 13853; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s7 13854; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 13855; GFX90A-NEXT: ;;#ASMSTART 13856; GFX90A-NEXT: ; use s[8:9] 13857; GFX90A-NEXT: ;;#ASMEND 13858; GFX90A-NEXT: s_setpc_b64 s[30:31] 13859; 13860; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_5: 13861; GFX940: ; %bb.0: 13862; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13863; GFX940-NEXT: ;;#ASMSTART 13864; GFX940-NEXT: ; def s[0:1] 13865; GFX940-NEXT: ;;#ASMEND 13866; GFX940-NEXT: ;;#ASMSTART 13867; GFX940-NEXT: ; def s[2:3] 13868; GFX940-NEXT: ;;#ASMEND 13869; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s3 13870; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 13871; GFX940-NEXT: ;;#ASMSTART 13872; GFX940-NEXT: ; use s[8:9] 13873; GFX940-NEXT: ;;#ASMEND 13874; GFX940-NEXT: s_setpc_b64 s[30:31] 13875 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13876 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13877 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13878 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13879 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 5> 13880 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13881 ret void 13882} 13883 13884define void @s_shuffle_v4i16_v3i16__5_5_3_5() { 13885; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_5: 13886; GFX900: ; %bb.0: 13887; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13888; GFX900-NEXT: ;;#ASMSTART 13889; GFX900-NEXT: ; def s[4:5] 13890; GFX900-NEXT: ;;#ASMEND 13891; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 13892; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 13893; GFX900-NEXT: ;;#ASMSTART 13894; GFX900-NEXT: ; use s[8:9] 13895; GFX900-NEXT: ;;#ASMEND 13896; GFX900-NEXT: s_setpc_b64 s[30:31] 13897; 13898; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_5: 13899; GFX90A: ; %bb.0: 13900; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13901; GFX90A-NEXT: ;;#ASMSTART 13902; GFX90A-NEXT: ; def s[4:5] 13903; GFX90A-NEXT: ;;#ASMEND 13904; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 13905; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 13906; GFX90A-NEXT: ;;#ASMSTART 13907; GFX90A-NEXT: ; use s[8:9] 13908; GFX90A-NEXT: ;;#ASMEND 13909; GFX90A-NEXT: s_setpc_b64 s[30:31] 13910; 13911; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_5: 13912; GFX940: ; %bb.0: 13913; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13914; GFX940-NEXT: ;;#ASMSTART 13915; GFX940-NEXT: ; def s[0:1] 13916; GFX940-NEXT: ;;#ASMEND 13917; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 13918; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 13919; GFX940-NEXT: ;;#ASMSTART 13920; GFX940-NEXT: ; use s[8:9] 13921; GFX940-NEXT: ;;#ASMEND 13922; GFX940-NEXT: s_setpc_b64 s[30:31] 13923 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13924 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13925 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13926 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13927 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 5> 13928 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13929 ret void 13930} 13931 13932define void @s_shuffle_v4i16_v3i16__5_5_4_5() { 13933; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_5: 13934; GFX900: ; %bb.0: 13935; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13936; GFX900-NEXT: ;;#ASMSTART 13937; GFX900-NEXT: ; def s[4:5] 13938; GFX900-NEXT: ;;#ASMEND 13939; GFX900-NEXT: s_lshr_b32 s4, s4, 16 13940; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 13941; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 13942; GFX900-NEXT: ;;#ASMSTART 13943; GFX900-NEXT: ; use s[8:9] 13944; GFX900-NEXT: ;;#ASMEND 13945; GFX900-NEXT: s_setpc_b64 s[30:31] 13946; 13947; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_5: 13948; GFX90A: ; %bb.0: 13949; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13950; GFX90A-NEXT: ;;#ASMSTART 13951; GFX90A-NEXT: ; def s[4:5] 13952; GFX90A-NEXT: ;;#ASMEND 13953; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 13954; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 13955; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 13956; GFX90A-NEXT: ;;#ASMSTART 13957; GFX90A-NEXT: ; use s[8:9] 13958; GFX90A-NEXT: ;;#ASMEND 13959; GFX90A-NEXT: s_setpc_b64 s[30:31] 13960; 13961; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_5: 13962; GFX940: ; %bb.0: 13963; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13964; GFX940-NEXT: ;;#ASMSTART 13965; GFX940-NEXT: ; def s[0:1] 13966; GFX940-NEXT: ;;#ASMEND 13967; GFX940-NEXT: s_lshr_b32 s0, s0, 16 13968; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 13969; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 13970; GFX940-NEXT: ;;#ASMSTART 13971; GFX940-NEXT: ; use s[8:9] 13972; GFX940-NEXT: ;;#ASMEND 13973; GFX940-NEXT: s_setpc_b64 s[30:31] 13974 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13975 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13976 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13977 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 13978 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 5> 13979 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13980 ret void 13981} 13982;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 13983; GFX90APLUS: {{.*}} 13984