1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v3i16_v3i16__u_u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v3i16_v3i16__u_u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <4 x i16> asm "; def $0", "=v"() 13 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 14 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> poison 15 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 16 ret void 17} 18 19define void @v_shuffle_v3i16_v3i16__0_u_u(ptr addrspace(1) inreg %ptr) { 20; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_u_u: 21; GFX900: ; %bb.0: 22; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GFX900-NEXT: v_mov_b32_e32 v2, 0 24; GFX900-NEXT: ;;#ASMSTART 25; GFX900-NEXT: ; def v[0:1] 26; GFX900-NEXT: ;;#ASMEND 27; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 28; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 29; GFX900-NEXT: s_waitcnt vmcnt(0) 30; GFX900-NEXT: s_setpc_b64 s[30:31] 31; 32; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_u_u: 33; GFX90A: ; %bb.0: 34; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX90A-NEXT: v_mov_b32_e32 v2, 0 36; GFX90A-NEXT: ;;#ASMSTART 37; GFX90A-NEXT: ; def v[0:1] 38; GFX90A-NEXT: ;;#ASMEND 39; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 40; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 41; GFX90A-NEXT: s_waitcnt vmcnt(0) 42; GFX90A-NEXT: s_setpc_b64 s[30:31] 43; 44; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_u_u: 45; GFX940: ; %bb.0: 46; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 47; GFX940-NEXT: v_mov_b32_e32 v2, 0 48; GFX940-NEXT: ;;#ASMSTART 49; GFX940-NEXT: ; def v[0:1] 50; GFX940-NEXT: ;;#ASMEND 51; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 52; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 53; GFX940-NEXT: s_waitcnt vmcnt(0) 54; GFX940-NEXT: s_setpc_b64 s[30:31] 55 %vec0 = call <4 x i16> asm "; def $0", "=v"() 56 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 57 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 poison, i32 poison> 58 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 59 ret void 60} 61 62define void @v_shuffle_v3i16_v3i16__1_u_u(ptr addrspace(1) inreg %ptr) { 63; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_u_u: 64; GFX900: ; %bb.0: 65; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; GFX900-NEXT: ;;#ASMSTART 67; GFX900-NEXT: ; def v[0:1] 68; GFX900-NEXT: ;;#ASMEND 69; GFX900-NEXT: v_mov_b32_e32 v2, 0 70; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 71; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 72; GFX900-NEXT: s_waitcnt vmcnt(0) 73; GFX900-NEXT: s_setpc_b64 s[30:31] 74; 75; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_u_u: 76; GFX90A: ; %bb.0: 77; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 78; GFX90A-NEXT: ;;#ASMSTART 79; GFX90A-NEXT: ; def v[0:1] 80; GFX90A-NEXT: ;;#ASMEND 81; GFX90A-NEXT: v_mov_b32_e32 v2, 0 82; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 83; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 84; GFX90A-NEXT: s_waitcnt vmcnt(0) 85; GFX90A-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_u_u: 88; GFX940: ; %bb.0: 89; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX940-NEXT: ;;#ASMSTART 91; GFX940-NEXT: ; def v[0:1] 92; GFX940-NEXT: ;;#ASMEND 93; GFX940-NEXT: v_mov_b32_e32 v2, 0 94; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 95; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 96; GFX940-NEXT: s_waitcnt vmcnt(0) 97; GFX940-NEXT: s_setpc_b64 s[30:31] 98 %vec0 = call <4 x i16> asm "; def $0", "=v"() 99 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 100 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 poison, i32 poison> 101 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 102 ret void 103} 104 105define void @v_shuffle_v3i16_v3i16__2_u_u(ptr addrspace(1) inreg %ptr) { 106; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_u_u: 107; GFX900: ; %bb.0: 108; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 109; GFX900-NEXT: v_mov_b32_e32 v2, 0 110; GFX900-NEXT: ;;#ASMSTART 111; GFX900-NEXT: ; def v[0:1] 112; GFX900-NEXT: ;;#ASMEND 113; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 114; GFX900-NEXT: s_waitcnt vmcnt(0) 115; GFX900-NEXT: s_setpc_b64 s[30:31] 116; 117; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_u_u: 118; GFX90A: ; %bb.0: 119; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX90A-NEXT: v_mov_b32_e32 v2, 0 121; GFX90A-NEXT: ;;#ASMSTART 122; GFX90A-NEXT: ; def v[0:1] 123; GFX90A-NEXT: ;;#ASMEND 124; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 125; GFX90A-NEXT: s_waitcnt vmcnt(0) 126; GFX90A-NEXT: s_setpc_b64 s[30:31] 127; 128; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_u_u: 129; GFX940: ; %bb.0: 130; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 131; GFX940-NEXT: v_mov_b32_e32 v2, 0 132; GFX940-NEXT: ;;#ASMSTART 133; GFX940-NEXT: ; def v[0:1] 134; GFX940-NEXT: ;;#ASMEND 135; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 136; GFX940-NEXT: s_waitcnt vmcnt(0) 137; GFX940-NEXT: s_setpc_b64 s[30:31] 138 %vec0 = call <4 x i16> asm "; def $0", "=v"() 139 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 140 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 poison, i32 poison> 141 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 142 ret void 143} 144 145define void @v_shuffle_v3i16_v3i16__3_u_u(ptr addrspace(1) inreg %ptr) { 146; GFX9-LABEL: v_shuffle_v3i16_v3i16__3_u_u: 147; GFX9: ; %bb.0: 148; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 149; GFX9-NEXT: s_setpc_b64 s[30:31] 150 %vec0 = call <4 x i16> asm "; def $0", "=v"() 151 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 152 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 poison, i32 poison> 153 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 154 ret void 155} 156 157define void @v_shuffle_v3i16_v3i16__4_u_u(ptr addrspace(1) inreg %ptr) { 158; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_u_u: 159; GFX900: ; %bb.0: 160; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 161; GFX900-NEXT: ;;#ASMSTART 162; GFX900-NEXT: ; def v[0:1] 163; GFX900-NEXT: ;;#ASMEND 164; GFX900-NEXT: v_mov_b32_e32 v2, 0 165; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 166; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 167; GFX900-NEXT: s_waitcnt vmcnt(0) 168; GFX900-NEXT: s_setpc_b64 s[30:31] 169; 170; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_u_u: 171; GFX90A: ; %bb.0: 172; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX90A-NEXT: ;;#ASMSTART 174; GFX90A-NEXT: ; def v[0:1] 175; GFX90A-NEXT: ;;#ASMEND 176; GFX90A-NEXT: v_mov_b32_e32 v2, 0 177; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 178; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 179; GFX90A-NEXT: s_waitcnt vmcnt(0) 180; GFX90A-NEXT: s_setpc_b64 s[30:31] 181; 182; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_u_u: 183; GFX940: ; %bb.0: 184; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 185; GFX940-NEXT: ;;#ASMSTART 186; GFX940-NEXT: ; def v[0:1] 187; GFX940-NEXT: ;;#ASMEND 188; GFX940-NEXT: v_mov_b32_e32 v2, 0 189; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 190; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 191; GFX940-NEXT: s_waitcnt vmcnt(0) 192; GFX940-NEXT: s_setpc_b64 s[30:31] 193 %vec0 = call <4 x i16> asm "; def $0", "=v"() 194 %vec1 = call <4 x i16> asm "; def $0", "=v"() 195 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 196 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 197 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 poison, i32 poison> 198 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 199 ret void 200} 201 202define void @v_shuffle_v3i16_v3i16__5_u_u(ptr addrspace(1) inreg %ptr) { 203; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_u: 204; GFX900: ; %bb.0: 205; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX900-NEXT: v_mov_b32_e32 v2, 0 207; GFX900-NEXT: ;;#ASMSTART 208; GFX900-NEXT: ; def v[0:1] 209; GFX900-NEXT: ;;#ASMEND 210; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 211; GFX900-NEXT: s_waitcnt vmcnt(0) 212; GFX900-NEXT: s_setpc_b64 s[30:31] 213; 214; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_u: 215; GFX90A: ; %bb.0: 216; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; GFX90A-NEXT: v_mov_b32_e32 v2, 0 218; GFX90A-NEXT: ;;#ASMSTART 219; GFX90A-NEXT: ; def v[0:1] 220; GFX90A-NEXT: ;;#ASMEND 221; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 222; GFX90A-NEXT: s_waitcnt vmcnt(0) 223; GFX90A-NEXT: s_setpc_b64 s[30:31] 224; 225; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_u: 226; GFX940: ; %bb.0: 227; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 228; GFX940-NEXT: v_mov_b32_e32 v2, 0 229; GFX940-NEXT: ;;#ASMSTART 230; GFX940-NEXT: ; def v[0:1] 231; GFX940-NEXT: ;;#ASMEND 232; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 233; GFX940-NEXT: s_waitcnt vmcnt(0) 234; GFX940-NEXT: s_setpc_b64 s[30:31] 235 %vec0 = call <4 x i16> asm "; def $0", "=v"() 236 %vec1 = call <4 x i16> asm "; def $0", "=v"() 237 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 238 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 239 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 poison> 240 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 241 ret void 242} 243 244define void @v_shuffle_v3i16_v3i16__5_0_u(ptr addrspace(1) inreg %ptr) { 245; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_u: 246; GFX900: ; %bb.0: 247; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 248; GFX900-NEXT: ;;#ASMSTART 249; GFX900-NEXT: ; def v[0:1] 250; GFX900-NEXT: ;;#ASMEND 251; GFX900-NEXT: s_mov_b32 s4, 0x5040100 252; GFX900-NEXT: v_mov_b32_e32 v3, 0 253; GFX900-NEXT: ;;#ASMSTART 254; GFX900-NEXT: ; def v[1:2] 255; GFX900-NEXT: ;;#ASMEND 256; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 257; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 258; GFX900-NEXT: s_waitcnt vmcnt(0) 259; GFX900-NEXT: s_setpc_b64 s[30:31] 260; 261; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_u: 262; GFX90A: ; %bb.0: 263; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 264; GFX90A-NEXT: ;;#ASMSTART 265; GFX90A-NEXT: ; def v[0:1] 266; GFX90A-NEXT: ;;#ASMEND 267; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 268; GFX90A-NEXT: v_mov_b32_e32 v4, 0 269; GFX90A-NEXT: ;;#ASMSTART 270; GFX90A-NEXT: ; def v[2:3] 271; GFX90A-NEXT: ;;#ASMEND 272; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 273; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 274; GFX90A-NEXT: s_waitcnt vmcnt(0) 275; GFX90A-NEXT: s_setpc_b64 s[30:31] 276; 277; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_u: 278; GFX940: ; %bb.0: 279; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 280; GFX940-NEXT: ;;#ASMSTART 281; GFX940-NEXT: ; def v[0:1] 282; GFX940-NEXT: ;;#ASMEND 283; GFX940-NEXT: s_mov_b32 s2, 0x5040100 284; GFX940-NEXT: v_mov_b32_e32 v4, 0 285; GFX940-NEXT: ;;#ASMSTART 286; GFX940-NEXT: ; def v[2:3] 287; GFX940-NEXT: ;;#ASMEND 288; GFX940-NEXT: s_nop 0 289; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 290; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 291; GFX940-NEXT: s_waitcnt vmcnt(0) 292; GFX940-NEXT: s_setpc_b64 s[30:31] 293 %vec0 = call <4 x i16> asm "; def $0", "=v"() 294 %vec1 = call <4 x i16> asm "; def $0", "=v"() 295 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 296 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 297 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 poison> 298 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 299 ret void 300} 301 302define void @v_shuffle_v3i16_v3i16__5_1_u(ptr addrspace(1) inreg %ptr) { 303; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_u: 304; GFX900: ; %bb.0: 305; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 306; GFX900-NEXT: ;;#ASMSTART 307; GFX900-NEXT: ; def v[0:1] 308; GFX900-NEXT: ;;#ASMEND 309; GFX900-NEXT: s_mov_b32 s4, 0xffff 310; GFX900-NEXT: v_mov_b32_e32 v3, 0 311; GFX900-NEXT: ;;#ASMSTART 312; GFX900-NEXT: ; def v[1:2] 313; GFX900-NEXT: ;;#ASMEND 314; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 315; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 316; GFX900-NEXT: s_waitcnt vmcnt(0) 317; GFX900-NEXT: s_setpc_b64 s[30:31] 318; 319; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_u: 320; GFX90A: ; %bb.0: 321; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; GFX90A-NEXT: ;;#ASMSTART 323; GFX90A-NEXT: ; def v[0:1] 324; GFX90A-NEXT: ;;#ASMEND 325; GFX90A-NEXT: s_mov_b32 s4, 0xffff 326; GFX90A-NEXT: v_mov_b32_e32 v4, 0 327; GFX90A-NEXT: ;;#ASMSTART 328; GFX90A-NEXT: ; def v[2:3] 329; GFX90A-NEXT: ;;#ASMEND 330; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 331; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 332; GFX90A-NEXT: s_waitcnt vmcnt(0) 333; GFX90A-NEXT: s_setpc_b64 s[30:31] 334; 335; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_u: 336; GFX940: ; %bb.0: 337; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 338; GFX940-NEXT: ;;#ASMSTART 339; GFX940-NEXT: ; def v[0:1] 340; GFX940-NEXT: ;;#ASMEND 341; GFX940-NEXT: s_mov_b32 s2, 0xffff 342; GFX940-NEXT: v_mov_b32_e32 v4, 0 343; GFX940-NEXT: ;;#ASMSTART 344; GFX940-NEXT: ; def v[2:3] 345; GFX940-NEXT: ;;#ASMEND 346; GFX940-NEXT: s_nop 0 347; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 348; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 349; GFX940-NEXT: s_waitcnt vmcnt(0) 350; GFX940-NEXT: s_setpc_b64 s[30:31] 351 %vec0 = call <4 x i16> asm "; def $0", "=v"() 352 %vec1 = call <4 x i16> asm "; def $0", "=v"() 353 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 354 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 355 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 poison> 356 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 357 ret void 358} 359 360define void @v_shuffle_v3i16_v3i16__5_2_u(ptr addrspace(1) inreg %ptr) { 361; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_u: 362; GFX900: ; %bb.0: 363; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GFX900-NEXT: ;;#ASMSTART 365; GFX900-NEXT: ; def v[0:1] 366; GFX900-NEXT: ;;#ASMEND 367; GFX900-NEXT: s_mov_b32 s4, 0x5040100 368; GFX900-NEXT: v_mov_b32_e32 v4, 0 369; GFX900-NEXT: ;;#ASMSTART 370; GFX900-NEXT: ; def v[2:3] 371; GFX900-NEXT: ;;#ASMEND 372; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 373; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 374; GFX900-NEXT: s_waitcnt vmcnt(0) 375; GFX900-NEXT: s_setpc_b64 s[30:31] 376; 377; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_u: 378; GFX90A: ; %bb.0: 379; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 380; GFX90A-NEXT: ;;#ASMSTART 381; GFX90A-NEXT: ; def v[0:1] 382; GFX90A-NEXT: ;;#ASMEND 383; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 384; GFX90A-NEXT: v_mov_b32_e32 v4, 0 385; GFX90A-NEXT: ;;#ASMSTART 386; GFX90A-NEXT: ; def v[2:3] 387; GFX90A-NEXT: ;;#ASMEND 388; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 389; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 390; GFX90A-NEXT: s_waitcnt vmcnt(0) 391; GFX90A-NEXT: s_setpc_b64 s[30:31] 392; 393; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_u: 394; GFX940: ; %bb.0: 395; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; GFX940-NEXT: ;;#ASMSTART 397; GFX940-NEXT: ; def v[0:1] 398; GFX940-NEXT: ;;#ASMEND 399; GFX940-NEXT: s_mov_b32 s2, 0x5040100 400; GFX940-NEXT: v_mov_b32_e32 v4, 0 401; GFX940-NEXT: ;;#ASMSTART 402; GFX940-NEXT: ; def v[2:3] 403; GFX940-NEXT: ;;#ASMEND 404; GFX940-NEXT: s_nop 0 405; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 406; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 407; GFX940-NEXT: s_waitcnt vmcnt(0) 408; GFX940-NEXT: s_setpc_b64 s[30:31] 409 %vec0 = call <4 x i16> asm "; def $0", "=v"() 410 %vec1 = call <4 x i16> asm "; def $0", "=v"() 411 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 412 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 413 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 poison> 414 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 415 ret void 416} 417 418define void @v_shuffle_v3i16_v3i16__5_3_u(ptr addrspace(1) inreg %ptr) { 419; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_u: 420; GFX900: ; %bb.0: 421; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 422; GFX900-NEXT: ;;#ASMSTART 423; GFX900-NEXT: ; def v[0:1] 424; GFX900-NEXT: ;;#ASMEND 425; GFX900-NEXT: s_mov_b32 s4, 0x5040100 426; GFX900-NEXT: v_mov_b32_e32 v2, 0 427; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 428; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 429; GFX900-NEXT: s_waitcnt vmcnt(0) 430; GFX900-NEXT: s_setpc_b64 s[30:31] 431; 432; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_u: 433; GFX90A: ; %bb.0: 434; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 435; GFX90A-NEXT: ;;#ASMSTART 436; GFX90A-NEXT: ; def v[0:1] 437; GFX90A-NEXT: ;;#ASMEND 438; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 439; GFX90A-NEXT: v_mov_b32_e32 v2, 0 440; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 441; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 442; GFX90A-NEXT: s_waitcnt vmcnt(0) 443; GFX90A-NEXT: s_setpc_b64 s[30:31] 444; 445; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_u: 446; GFX940: ; %bb.0: 447; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 448; GFX940-NEXT: ;;#ASMSTART 449; GFX940-NEXT: ; def v[0:1] 450; GFX940-NEXT: ;;#ASMEND 451; GFX940-NEXT: s_mov_b32 s2, 0x5040100 452; GFX940-NEXT: v_mov_b32_e32 v2, 0 453; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 454; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 455; GFX940-NEXT: s_waitcnt vmcnt(0) 456; GFX940-NEXT: s_setpc_b64 s[30:31] 457 %vec0 = call <4 x i16> asm "; def $0", "=v"() 458 %vec1 = call <4 x i16> asm "; def $0", "=v"() 459 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 460 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 461 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 poison> 462 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 463 ret void 464} 465 466define void @v_shuffle_v3i16_v3i16__5_4_u(ptr addrspace(1) inreg %ptr) { 467; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_u: 468; GFX900: ; %bb.0: 469; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 470; GFX900-NEXT: ;;#ASMSTART 471; GFX900-NEXT: ; def v[0:1] 472; GFX900-NEXT: ;;#ASMEND 473; GFX900-NEXT: s_mov_b32 s4, 0xffff 474; GFX900-NEXT: v_mov_b32_e32 v2, 0 475; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0 476; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 477; GFX900-NEXT: s_waitcnt vmcnt(0) 478; GFX900-NEXT: s_setpc_b64 s[30:31] 479; 480; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_u: 481; GFX90A: ; %bb.0: 482; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 483; GFX90A-NEXT: ;;#ASMSTART 484; GFX90A-NEXT: ; def v[0:1] 485; GFX90A-NEXT: ;;#ASMEND 486; GFX90A-NEXT: s_mov_b32 s4, 0xffff 487; GFX90A-NEXT: v_mov_b32_e32 v2, 0 488; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0 489; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 490; GFX90A-NEXT: s_waitcnt vmcnt(0) 491; GFX90A-NEXT: s_setpc_b64 s[30:31] 492; 493; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_u: 494; GFX940: ; %bb.0: 495; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 496; GFX940-NEXT: ;;#ASMSTART 497; GFX940-NEXT: ; def v[0:1] 498; GFX940-NEXT: ;;#ASMEND 499; GFX940-NEXT: s_mov_b32 s2, 0xffff 500; GFX940-NEXT: v_mov_b32_e32 v2, 0 501; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 502; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 503; GFX940-NEXT: s_waitcnt vmcnt(0) 504; GFX940-NEXT: s_setpc_b64 s[30:31] 505 %vec0 = call <4 x i16> asm "; def $0", "=v"() 506 %vec1 = call <4 x i16> asm "; def $0", "=v"() 507 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 508 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 509 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 poison> 510 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 511 ret void 512} 513 514define void @v_shuffle_v3i16_v3i16__5_5_u(ptr addrspace(1) inreg %ptr) { 515; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_u: 516; GFX900: ; %bb.0: 517; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 518; GFX900-NEXT: ;;#ASMSTART 519; GFX900-NEXT: ; def v[0:1] 520; GFX900-NEXT: ;;#ASMEND 521; GFX900-NEXT: s_mov_b32 s4, 0x5040100 522; GFX900-NEXT: v_mov_b32_e32 v2, 0 523; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 524; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 525; GFX900-NEXT: s_waitcnt vmcnt(0) 526; GFX900-NEXT: s_setpc_b64 s[30:31] 527; 528; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_u: 529; GFX90A: ; %bb.0: 530; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 531; GFX90A-NEXT: ;;#ASMSTART 532; GFX90A-NEXT: ; def v[0:1] 533; GFX90A-NEXT: ;;#ASMEND 534; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 535; GFX90A-NEXT: v_mov_b32_e32 v2, 0 536; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 537; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 538; GFX90A-NEXT: s_waitcnt vmcnt(0) 539; GFX90A-NEXT: s_setpc_b64 s[30:31] 540; 541; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_u: 542; GFX940: ; %bb.0: 543; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 544; GFX940-NEXT: ;;#ASMSTART 545; GFX940-NEXT: ; def v[0:1] 546; GFX940-NEXT: ;;#ASMEND 547; GFX940-NEXT: s_mov_b32 s2, 0x5040100 548; GFX940-NEXT: v_mov_b32_e32 v2, 0 549; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 550; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 551; GFX940-NEXT: s_waitcnt vmcnt(0) 552; GFX940-NEXT: s_setpc_b64 s[30:31] 553 %vec0 = call <4 x i16> asm "; def $0", "=v"() 554 %vec1 = call <4 x i16> asm "; def $0", "=v"() 555 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 556 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 557 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 poison> 558 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 559 ret void 560} 561 562define void @v_shuffle_v3i16_v3i16__5_5_0(ptr addrspace(1) inreg %ptr) { 563; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_0: 564; GFX900: ; %bb.0: 565; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 566; GFX900-NEXT: ;;#ASMSTART 567; GFX900-NEXT: ; def v[0:1] 568; GFX900-NEXT: ;;#ASMEND 569; GFX900-NEXT: v_mov_b32_e32 v3, 0 570; GFX900-NEXT: ;;#ASMSTART 571; GFX900-NEXT: ; def v[1:2] 572; GFX900-NEXT: ;;#ASMEND 573; GFX900-NEXT: s_mov_b32 s4, 0x5040100 574; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 575; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 576; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 577; GFX900-NEXT: s_waitcnt vmcnt(0) 578; GFX900-NEXT: s_setpc_b64 s[30:31] 579; 580; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_0: 581; GFX90A: ; %bb.0: 582; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 583; GFX90A-NEXT: v_mov_b32_e32 v4, 0 584; GFX90A-NEXT: ;;#ASMSTART 585; GFX90A-NEXT: ; def v[0:1] 586; GFX90A-NEXT: ;;#ASMEND 587; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 588; GFX90A-NEXT: ;;#ASMSTART 589; GFX90A-NEXT: ; def v[2:3] 590; GFX90A-NEXT: ;;#ASMEND 591; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 592; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 593; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 594; GFX90A-NEXT: s_waitcnt vmcnt(0) 595; GFX90A-NEXT: s_setpc_b64 s[30:31] 596; 597; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_0: 598; GFX940: ; %bb.0: 599; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 600; GFX940-NEXT: v_mov_b32_e32 v4, 0 601; GFX940-NEXT: ;;#ASMSTART 602; GFX940-NEXT: ; def v[0:1] 603; GFX940-NEXT: ;;#ASMEND 604; GFX940-NEXT: s_mov_b32 s2, 0x5040100 605; GFX940-NEXT: ;;#ASMSTART 606; GFX940-NEXT: ; def v[2:3] 607; GFX940-NEXT: ;;#ASMEND 608; GFX940-NEXT: s_nop 0 609; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 610; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 611; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 612; GFX940-NEXT: s_waitcnt vmcnt(0) 613; GFX940-NEXT: s_setpc_b64 s[30:31] 614 %vec0 = call <4 x i16> asm "; def $0", "=v"() 615 %vec1 = call <4 x i16> asm "; def $0", "=v"() 616 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 617 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 618 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 0> 619 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 620 ret void 621} 622 623define void @v_shuffle_v3i16_v3i16__5_5_1(ptr addrspace(1) inreg %ptr) { 624; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_1: 625; GFX900: ; %bb.0: 626; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 627; GFX900-NEXT: ;;#ASMSTART 628; GFX900-NEXT: ; def v[0:1] 629; GFX900-NEXT: ;;#ASMEND 630; GFX900-NEXT: v_mov_b32_e32 v3, 0 631; GFX900-NEXT: ;;#ASMSTART 632; GFX900-NEXT: ; def v[1:2] 633; GFX900-NEXT: ;;#ASMEND 634; GFX900-NEXT: s_mov_b32 s4, 0x5040100 635; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 636; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 637; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 638; GFX900-NEXT: s_waitcnt vmcnt(0) 639; GFX900-NEXT: s_setpc_b64 s[30:31] 640; 641; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_1: 642; GFX90A: ; %bb.0: 643; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 644; GFX90A-NEXT: v_mov_b32_e32 v4, 0 645; GFX90A-NEXT: ;;#ASMSTART 646; GFX90A-NEXT: ; def v[0:1] 647; GFX90A-NEXT: ;;#ASMEND 648; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 649; GFX90A-NEXT: ;;#ASMSTART 650; GFX90A-NEXT: ; def v[2:3] 651; GFX90A-NEXT: ;;#ASMEND 652; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 653; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 654; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 655; GFX90A-NEXT: s_waitcnt vmcnt(0) 656; GFX90A-NEXT: s_setpc_b64 s[30:31] 657; 658; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_1: 659; GFX940: ; %bb.0: 660; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 661; GFX940-NEXT: v_mov_b32_e32 v4, 0 662; GFX940-NEXT: ;;#ASMSTART 663; GFX940-NEXT: ; def v[0:1] 664; GFX940-NEXT: ;;#ASMEND 665; GFX940-NEXT: s_mov_b32 s2, 0x5040100 666; GFX940-NEXT: ;;#ASMSTART 667; GFX940-NEXT: ; def v[2:3] 668; GFX940-NEXT: ;;#ASMEND 669; GFX940-NEXT: s_nop 0 670; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 671; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 672; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 673; GFX940-NEXT: s_waitcnt vmcnt(0) 674; GFX940-NEXT: s_setpc_b64 s[30:31] 675 %vec0 = call <4 x i16> asm "; def $0", "=v"() 676 %vec1 = call <4 x i16> asm "; def $0", "=v"() 677 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 678 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 679 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 1> 680 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 681 ret void 682} 683 684define void @v_shuffle_v3i16_v3i16__5_5_2(ptr addrspace(1) inreg %ptr) { 685; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_2: 686; GFX900: ; %bb.0: 687; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 688; GFX900-NEXT: v_mov_b32_e32 v4, 0 689; GFX900-NEXT: ;;#ASMSTART 690; GFX900-NEXT: ; def v[0:1] 691; GFX900-NEXT: ;;#ASMEND 692; GFX900-NEXT: s_mov_b32 s4, 0x5040100 693; GFX900-NEXT: ;;#ASMSTART 694; GFX900-NEXT: ; def v[2:3] 695; GFX900-NEXT: ;;#ASMEND 696; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 697; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 698; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 699; GFX900-NEXT: s_waitcnt vmcnt(0) 700; GFX900-NEXT: s_setpc_b64 s[30:31] 701; 702; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_2: 703; GFX90A: ; %bb.0: 704; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 705; GFX90A-NEXT: v_mov_b32_e32 v4, 0 706; GFX90A-NEXT: ;;#ASMSTART 707; GFX90A-NEXT: ; def v[0:1] 708; GFX90A-NEXT: ;;#ASMEND 709; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 710; GFX90A-NEXT: ;;#ASMSTART 711; GFX90A-NEXT: ; def v[2:3] 712; GFX90A-NEXT: ;;#ASMEND 713; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 714; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 715; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 716; GFX90A-NEXT: s_waitcnt vmcnt(0) 717; GFX90A-NEXT: s_setpc_b64 s[30:31] 718; 719; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_2: 720; GFX940: ; %bb.0: 721; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 722; GFX940-NEXT: v_mov_b32_e32 v4, 0 723; GFX940-NEXT: ;;#ASMSTART 724; GFX940-NEXT: ; def v[0:1] 725; GFX940-NEXT: ;;#ASMEND 726; GFX940-NEXT: s_mov_b32 s2, 0x5040100 727; GFX940-NEXT: ;;#ASMSTART 728; GFX940-NEXT: ; def v[2:3] 729; GFX940-NEXT: ;;#ASMEND 730; GFX940-NEXT: s_nop 0 731; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 732; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 733; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 734; GFX940-NEXT: s_waitcnt vmcnt(0) 735; GFX940-NEXT: s_setpc_b64 s[30:31] 736 %vec0 = call <4 x i16> asm "; def $0", "=v"() 737 %vec1 = call <4 x i16> asm "; def $0", "=v"() 738 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 739 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 740 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 2> 741 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 742 ret void 743} 744 745define void @v_shuffle_v3i16_v3i16__5_5_3(ptr addrspace(1) inreg %ptr) { 746; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_3: 747; GFX900: ; %bb.0: 748; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 749; GFX900-NEXT: v_mov_b32_e32 v2, 0 750; GFX900-NEXT: ;;#ASMSTART 751; GFX900-NEXT: ; def v[0:1] 752; GFX900-NEXT: ;;#ASMEND 753; GFX900-NEXT: s_mov_b32 s4, 0x5040100 754; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 755; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 756; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 757; GFX900-NEXT: s_waitcnt vmcnt(0) 758; GFX900-NEXT: s_setpc_b64 s[30:31] 759; 760; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_3: 761; GFX90A: ; %bb.0: 762; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 763; GFX90A-NEXT: v_mov_b32_e32 v2, 0 764; GFX90A-NEXT: ;;#ASMSTART 765; GFX90A-NEXT: ; def v[0:1] 766; GFX90A-NEXT: ;;#ASMEND 767; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 768; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 769; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 770; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 771; GFX90A-NEXT: s_waitcnt vmcnt(0) 772; GFX90A-NEXT: s_setpc_b64 s[30:31] 773; 774; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_3: 775; GFX940: ; %bb.0: 776; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 777; GFX940-NEXT: v_mov_b32_e32 v2, 0 778; GFX940-NEXT: ;;#ASMSTART 779; GFX940-NEXT: ; def v[0:1] 780; GFX940-NEXT: ;;#ASMEND 781; GFX940-NEXT: s_mov_b32 s2, 0x5040100 782; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 783; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 784; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 785; GFX940-NEXT: s_waitcnt vmcnt(0) 786; GFX940-NEXT: s_setpc_b64 s[30:31] 787 %vec0 = call <4 x i16> asm "; def $0", "=v"() 788 %vec1 = call <4 x i16> asm "; def $0", "=v"() 789 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 790 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 791 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 3> 792 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 793 ret void 794} 795 796define void @v_shuffle_v3i16_v3i16__5_5_4(ptr addrspace(1) inreg %ptr) { 797; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_4: 798; GFX900: ; %bb.0: 799; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 800; GFX900-NEXT: v_mov_b32_e32 v2, 0 801; GFX900-NEXT: ;;#ASMSTART 802; GFX900-NEXT: ; def v[0:1] 803; GFX900-NEXT: ;;#ASMEND 804; GFX900-NEXT: s_mov_b32 s4, 0x5040100 805; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 806; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 807; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 808; GFX900-NEXT: s_waitcnt vmcnt(0) 809; GFX900-NEXT: s_setpc_b64 s[30:31] 810; 811; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_4: 812; GFX90A: ; %bb.0: 813; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 814; GFX90A-NEXT: v_mov_b32_e32 v2, 0 815; GFX90A-NEXT: ;;#ASMSTART 816; GFX90A-NEXT: ; def v[0:1] 817; GFX90A-NEXT: ;;#ASMEND 818; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 819; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 820; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 821; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 822; GFX90A-NEXT: s_waitcnt vmcnt(0) 823; GFX90A-NEXT: s_setpc_b64 s[30:31] 824; 825; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_4: 826; GFX940: ; %bb.0: 827; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 828; GFX940-NEXT: v_mov_b32_e32 v2, 0 829; GFX940-NEXT: ;;#ASMSTART 830; GFX940-NEXT: ; def v[0:1] 831; GFX940-NEXT: ;;#ASMEND 832; GFX940-NEXT: s_mov_b32 s2, 0x5040100 833; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 834; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 835; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 836; GFX940-NEXT: s_waitcnt vmcnt(0) 837; GFX940-NEXT: s_setpc_b64 s[30:31] 838 %vec0 = call <4 x i16> asm "; def $0", "=v"() 839 %vec1 = call <4 x i16> asm "; def $0", "=v"() 840 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 841 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 842 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 4> 843 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 844 ret void 845} 846 847define void @v_shuffle_v3i16_v3i16__5_5_5(ptr addrspace(1) inreg %ptr) { 848; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_5: 849; GFX900: ; %bb.0: 850; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 851; GFX900-NEXT: v_mov_b32_e32 v2, 0 852; GFX900-NEXT: ;;#ASMSTART 853; GFX900-NEXT: ; def v[0:1] 854; GFX900-NEXT: ;;#ASMEND 855; GFX900-NEXT: s_mov_b32 s4, 0x5040100 856; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 857; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 858; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 859; GFX900-NEXT: s_waitcnt vmcnt(0) 860; GFX900-NEXT: s_setpc_b64 s[30:31] 861; 862; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_5: 863; GFX90A: ; %bb.0: 864; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 865; GFX90A-NEXT: v_mov_b32_e32 v2, 0 866; GFX90A-NEXT: ;;#ASMSTART 867; GFX90A-NEXT: ; def v[0:1] 868; GFX90A-NEXT: ;;#ASMEND 869; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 870; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 871; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 872; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 873; GFX90A-NEXT: s_waitcnt vmcnt(0) 874; GFX90A-NEXT: s_setpc_b64 s[30:31] 875; 876; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_5: 877; GFX940: ; %bb.0: 878; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 879; GFX940-NEXT: v_mov_b32_e32 v2, 0 880; GFX940-NEXT: ;;#ASMSTART 881; GFX940-NEXT: ; def v[0:1] 882; GFX940-NEXT: ;;#ASMEND 883; GFX940-NEXT: s_mov_b32 s2, 0x5040100 884; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 885; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 886; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 887; GFX940-NEXT: s_waitcnt vmcnt(0) 888; GFX940-NEXT: s_setpc_b64 s[30:31] 889 %vec0 = call <4 x i16> asm "; def $0", "=v"() 890 %vec1 = call <4 x i16> asm "; def $0", "=v"() 891 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 892 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 893 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 5> 894 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 895 ret void 896} 897 898define void @v_shuffle_v3i16_v3i16__u_0_0(ptr addrspace(1) inreg %ptr) { 899; GFX900-LABEL: v_shuffle_v3i16_v3i16__u_0_0: 900; GFX900: ; %bb.0: 901; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 902; GFX900-NEXT: v_mov_b32_e32 v2, 0 903; GFX900-NEXT: ;;#ASMSTART 904; GFX900-NEXT: ; def v[0:1] 905; GFX900-NEXT: ;;#ASMEND 906; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v0 907; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 908; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 909; GFX900-NEXT: s_waitcnt vmcnt(0) 910; GFX900-NEXT: s_setpc_b64 s[30:31] 911; 912; GFX90A-LABEL: v_shuffle_v3i16_v3i16__u_0_0: 913; GFX90A: ; %bb.0: 914; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 915; GFX90A-NEXT: v_mov_b32_e32 v2, 0 916; GFX90A-NEXT: ;;#ASMSTART 917; GFX90A-NEXT: ; def v[0:1] 918; GFX90A-NEXT: ;;#ASMEND 919; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v0 920; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 921; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 922; GFX90A-NEXT: s_waitcnt vmcnt(0) 923; GFX90A-NEXT: s_setpc_b64 s[30:31] 924; 925; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_0_0: 926; GFX940: ; %bb.0: 927; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 928; GFX940-NEXT: v_mov_b32_e32 v2, 0 929; GFX940-NEXT: ;;#ASMSTART 930; GFX940-NEXT: ; def v[0:1] 931; GFX940-NEXT: ;;#ASMEND 932; GFX940-NEXT: s_nop 0 933; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 934; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 935; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 936; GFX940-NEXT: s_waitcnt vmcnt(0) 937; GFX940-NEXT: s_setpc_b64 s[30:31] 938 %vec0 = call <4 x i16> asm "; def $0", "=v"() 939 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 940 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 0, i32 0> 941 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 942 ret void 943} 944 945define void @v_shuffle_v3i16_v3i16__0_0_0(ptr addrspace(1) inreg %ptr) { 946; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_0_0: 947; GFX900: ; %bb.0: 948; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 949; GFX900-NEXT: v_mov_b32_e32 v2, 0 950; GFX900-NEXT: ;;#ASMSTART 951; GFX900-NEXT: ; def v[0:1] 952; GFX900-NEXT: ;;#ASMEND 953; GFX900-NEXT: s_mov_b32 s4, 0x5040100 954; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 955; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 956; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 957; GFX900-NEXT: s_waitcnt vmcnt(0) 958; GFX900-NEXT: s_setpc_b64 s[30:31] 959; 960; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_0_0: 961; GFX90A: ; %bb.0: 962; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 963; GFX90A-NEXT: v_mov_b32_e32 v2, 0 964; GFX90A-NEXT: ;;#ASMSTART 965; GFX90A-NEXT: ; def v[0:1] 966; GFX90A-NEXT: ;;#ASMEND 967; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 968; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 969; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 970; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 971; GFX90A-NEXT: s_waitcnt vmcnt(0) 972; GFX90A-NEXT: s_setpc_b64 s[30:31] 973; 974; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_0_0: 975; GFX940: ; %bb.0: 976; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 977; GFX940-NEXT: v_mov_b32_e32 v2, 0 978; GFX940-NEXT: ;;#ASMSTART 979; GFX940-NEXT: ; def v[0:1] 980; GFX940-NEXT: ;;#ASMEND 981; GFX940-NEXT: s_mov_b32 s2, 0x5040100 982; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 983; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 984; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 985; GFX940-NEXT: s_waitcnt vmcnt(0) 986; GFX940-NEXT: s_setpc_b64 s[30:31] 987 %vec0 = call <4 x i16> asm "; def $0", "=v"() 988 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 989 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> zeroinitializer 990 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 991 ret void 992} 993 994define void @v_shuffle_v3i16_v3i16__1_0_0(ptr addrspace(1) inreg %ptr) { 995; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_0_0: 996; GFX900: ; %bb.0: 997; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 998; GFX900-NEXT: v_mov_b32_e32 v2, 0 999; GFX900-NEXT: ;;#ASMSTART 1000; GFX900-NEXT: ; def v[0:1] 1001; GFX900-NEXT: ;;#ASMEND 1002; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16 1003; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1004; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1005; GFX900-NEXT: s_waitcnt vmcnt(0) 1006; GFX900-NEXT: s_setpc_b64 s[30:31] 1007; 1008; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_0_0: 1009; GFX90A: ; %bb.0: 1010; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1011; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1012; GFX90A-NEXT: ;;#ASMSTART 1013; GFX90A-NEXT: ; def v[0:1] 1014; GFX90A-NEXT: ;;#ASMEND 1015; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16 1016; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1017; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1018; GFX90A-NEXT: s_waitcnt vmcnt(0) 1019; GFX90A-NEXT: s_setpc_b64 s[30:31] 1020; 1021; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_0_0: 1022; GFX940: ; %bb.0: 1023; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1024; GFX940-NEXT: v_mov_b32_e32 v2, 0 1025; GFX940-NEXT: ;;#ASMSTART 1026; GFX940-NEXT: ; def v[0:1] 1027; GFX940-NEXT: ;;#ASMEND 1028; GFX940-NEXT: s_nop 0 1029; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 1030; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1031; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1032; GFX940-NEXT: s_waitcnt vmcnt(0) 1033; GFX940-NEXT: s_setpc_b64 s[30:31] 1034 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1035 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1036 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 0, i32 0> 1037 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1038 ret void 1039} 1040 1041define void @v_shuffle_v3i16_v3i16__2_0_0(ptr addrspace(1) inreg %ptr) { 1042; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_0_0: 1043; GFX900: ; %bb.0: 1044; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1045; GFX900-NEXT: v_mov_b32_e32 v2, 0 1046; GFX900-NEXT: ;;#ASMSTART 1047; GFX900-NEXT: ; def v[0:1] 1048; GFX900-NEXT: ;;#ASMEND 1049; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1050; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 1051; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1052; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1053; GFX900-NEXT: s_waitcnt vmcnt(0) 1054; GFX900-NEXT: s_setpc_b64 s[30:31] 1055; 1056; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_0_0: 1057; GFX90A: ; %bb.0: 1058; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1059; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1060; GFX90A-NEXT: ;;#ASMSTART 1061; GFX90A-NEXT: ; def v[0:1] 1062; GFX90A-NEXT: ;;#ASMEND 1063; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1064; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 1065; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1066; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1067; GFX90A-NEXT: s_waitcnt vmcnt(0) 1068; GFX90A-NEXT: s_setpc_b64 s[30:31] 1069; 1070; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_0_0: 1071; GFX940: ; %bb.0: 1072; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1073; GFX940-NEXT: v_mov_b32_e32 v2, 0 1074; GFX940-NEXT: ;;#ASMSTART 1075; GFX940-NEXT: ; def v[0:1] 1076; GFX940-NEXT: ;;#ASMEND 1077; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1078; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 1079; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1080; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1081; GFX940-NEXT: s_waitcnt vmcnt(0) 1082; GFX940-NEXT: s_setpc_b64 s[30:31] 1083 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1084 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1085 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 0, i32 0> 1086 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1087 ret void 1088} 1089 1090define void @v_shuffle_v3i16_v3i16__3_0_0(ptr addrspace(1) inreg %ptr) { 1091; GFX900-LABEL: v_shuffle_v3i16_v3i16__3_0_0: 1092; GFX900: ; %bb.0: 1093; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1094; GFX900-NEXT: v_mov_b32_e32 v2, 0 1095; GFX900-NEXT: ;;#ASMSTART 1096; GFX900-NEXT: ; def v[0:1] 1097; GFX900-NEXT: ;;#ASMEND 1098; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1099; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1100; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1101; GFX900-NEXT: s_waitcnt vmcnt(0) 1102; GFX900-NEXT: s_setpc_b64 s[30:31] 1103; 1104; GFX90A-LABEL: v_shuffle_v3i16_v3i16__3_0_0: 1105; GFX90A: ; %bb.0: 1106; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1107; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1108; GFX90A-NEXT: ;;#ASMSTART 1109; GFX90A-NEXT: ; def v[0:1] 1110; GFX90A-NEXT: ;;#ASMEND 1111; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1112; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1113; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1114; GFX90A-NEXT: s_waitcnt vmcnt(0) 1115; GFX90A-NEXT: s_setpc_b64 s[30:31] 1116; 1117; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_0_0: 1118; GFX940: ; %bb.0: 1119; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1120; GFX940-NEXT: v_mov_b32_e32 v2, 0 1121; GFX940-NEXT: ;;#ASMSTART 1122; GFX940-NEXT: ; def v[0:1] 1123; GFX940-NEXT: ;;#ASMEND 1124; GFX940-NEXT: s_nop 0 1125; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1126; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1127; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1128; GFX940-NEXT: s_waitcnt vmcnt(0) 1129; GFX940-NEXT: s_setpc_b64 s[30:31] 1130 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1131 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1132 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 0, i32 0> 1133 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1134 ret void 1135} 1136 1137define void @v_shuffle_v3i16_v3i16__4_0_0(ptr addrspace(1) inreg %ptr) { 1138; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_0_0: 1139; GFX900: ; %bb.0: 1140; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1141; GFX900-NEXT: ;;#ASMSTART 1142; GFX900-NEXT: ; def v[0:1] 1143; GFX900-NEXT: ;;#ASMEND 1144; GFX900-NEXT: v_mov_b32_e32 v3, 0 1145; GFX900-NEXT: ;;#ASMSTART 1146; GFX900-NEXT: ; def v[1:2] 1147; GFX900-NEXT: ;;#ASMEND 1148; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 1149; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1150; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1151; GFX900-NEXT: s_waitcnt vmcnt(0) 1152; GFX900-NEXT: s_setpc_b64 s[30:31] 1153; 1154; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_0_0: 1155; GFX90A: ; %bb.0: 1156; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1157; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1158; GFX90A-NEXT: ;;#ASMSTART 1159; GFX90A-NEXT: ; def v[0:1] 1160; GFX90A-NEXT: ;;#ASMEND 1161; GFX90A-NEXT: ;;#ASMSTART 1162; GFX90A-NEXT: ; def v[2:3] 1163; GFX90A-NEXT: ;;#ASMEND 1164; GFX90A-NEXT: v_alignbit_b32 v1, v0, v2, 16 1165; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1166; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1167; GFX90A-NEXT: s_waitcnt vmcnt(0) 1168; GFX90A-NEXT: s_setpc_b64 s[30:31] 1169; 1170; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_0_0: 1171; GFX940: ; %bb.0: 1172; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1173; GFX940-NEXT: v_mov_b32_e32 v4, 0 1174; GFX940-NEXT: ;;#ASMSTART 1175; GFX940-NEXT: ; def v[0:1] 1176; GFX940-NEXT: ;;#ASMEND 1177; GFX940-NEXT: ;;#ASMSTART 1178; GFX940-NEXT: ; def v[2:3] 1179; GFX940-NEXT: ;;#ASMEND 1180; GFX940-NEXT: s_nop 0 1181; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 1182; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1183; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1184; GFX940-NEXT: s_waitcnt vmcnt(0) 1185; GFX940-NEXT: s_setpc_b64 s[30:31] 1186 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1187 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1188 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1189 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1190 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 0, i32 0> 1191 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1192 ret void 1193} 1194 1195define void @v_shuffle_v3i16_v3i16__5_0_0(ptr addrspace(1) inreg %ptr) { 1196; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_0: 1197; GFX900: ; %bb.0: 1198; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1199; GFX900-NEXT: ;;#ASMSTART 1200; GFX900-NEXT: ; def v[0:1] 1201; GFX900-NEXT: ;;#ASMEND 1202; GFX900-NEXT: v_mov_b32_e32 v3, 0 1203; GFX900-NEXT: ;;#ASMSTART 1204; GFX900-NEXT: ; def v[1:2] 1205; GFX900-NEXT: ;;#ASMEND 1206; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1207; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 1208; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1209; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1210; GFX900-NEXT: s_waitcnt vmcnt(0) 1211; GFX900-NEXT: s_setpc_b64 s[30:31] 1212; 1213; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_0: 1214; GFX90A: ; %bb.0: 1215; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1216; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1217; GFX90A-NEXT: ;;#ASMSTART 1218; GFX90A-NEXT: ; def v[0:1] 1219; GFX90A-NEXT: ;;#ASMEND 1220; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1221; GFX90A-NEXT: ;;#ASMSTART 1222; GFX90A-NEXT: ; def v[2:3] 1223; GFX90A-NEXT: ;;#ASMEND 1224; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4 1225; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1226; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1227; GFX90A-NEXT: s_waitcnt vmcnt(0) 1228; GFX90A-NEXT: s_setpc_b64 s[30:31] 1229; 1230; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_0: 1231; GFX940: ; %bb.0: 1232; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1233; GFX940-NEXT: v_mov_b32_e32 v4, 0 1234; GFX940-NEXT: ;;#ASMSTART 1235; GFX940-NEXT: ; def v[0:1] 1236; GFX940-NEXT: ;;#ASMEND 1237; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1238; GFX940-NEXT: ;;#ASMSTART 1239; GFX940-NEXT: ; def v[2:3] 1240; GFX940-NEXT: ;;#ASMEND 1241; GFX940-NEXT: s_nop 0 1242; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 1243; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1244; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1245; GFX940-NEXT: s_waitcnt vmcnt(0) 1246; GFX940-NEXT: s_setpc_b64 s[30:31] 1247 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1248 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1249 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1250 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1251 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 0> 1252 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1253 ret void 1254} 1255 1256define void @v_shuffle_v3i16_v3i16__5_u_0(ptr addrspace(1) inreg %ptr) { 1257; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_0: 1258; GFX900: ; %bb.0: 1259; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1260; GFX900-NEXT: v_mov_b32_e32 v3, 0 1261; GFX900-NEXT: ;;#ASMSTART 1262; GFX900-NEXT: ; def v[0:1] 1263; GFX900-NEXT: ;;#ASMEND 1264; GFX900-NEXT: ;;#ASMSTART 1265; GFX900-NEXT: ; def v[1:2] 1266; GFX900-NEXT: ;;#ASMEND 1267; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1268; GFX900-NEXT: global_store_dword v3, v2, s[16:17] 1269; GFX900-NEXT: s_waitcnt vmcnt(0) 1270; GFX900-NEXT: s_setpc_b64 s[30:31] 1271; 1272; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_0: 1273; GFX90A: ; %bb.0: 1274; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1275; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1276; GFX90A-NEXT: ;;#ASMSTART 1277; GFX90A-NEXT: ; def v[0:1] 1278; GFX90A-NEXT: ;;#ASMEND 1279; GFX90A-NEXT: ;;#ASMSTART 1280; GFX90A-NEXT: ; def v[2:3] 1281; GFX90A-NEXT: ;;#ASMEND 1282; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1283; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] 1284; GFX90A-NEXT: s_waitcnt vmcnt(0) 1285; GFX90A-NEXT: s_setpc_b64 s[30:31] 1286; 1287; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_0: 1288; GFX940: ; %bb.0: 1289; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1290; GFX940-NEXT: v_mov_b32_e32 v4, 0 1291; GFX940-NEXT: ;;#ASMSTART 1292; GFX940-NEXT: ; def v[0:1] 1293; GFX940-NEXT: ;;#ASMEND 1294; GFX940-NEXT: ;;#ASMSTART 1295; GFX940-NEXT: ; def v[2:3] 1296; GFX940-NEXT: ;;#ASMEND 1297; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1298; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 1299; GFX940-NEXT: s_waitcnt vmcnt(0) 1300; GFX940-NEXT: s_setpc_b64 s[30:31] 1301 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1302 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1303 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1304 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1305 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 0> 1306 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1307 ret void 1308} 1309 1310define void @v_shuffle_v3i16_v3i16__5_1_0(ptr addrspace(1) inreg %ptr) { 1311; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_0: 1312; GFX900: ; %bb.0: 1313; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1314; GFX900-NEXT: ;;#ASMSTART 1315; GFX900-NEXT: ; def v[0:1] 1316; GFX900-NEXT: ;;#ASMEND 1317; GFX900-NEXT: v_mov_b32_e32 v3, 0 1318; GFX900-NEXT: ;;#ASMSTART 1319; GFX900-NEXT: ; def v[1:2] 1320; GFX900-NEXT: ;;#ASMEND 1321; GFX900-NEXT: s_mov_b32 s4, 0xffff 1322; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0 1323; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1324; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1325; GFX900-NEXT: s_waitcnt vmcnt(0) 1326; GFX900-NEXT: s_setpc_b64 s[30:31] 1327; 1328; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_0: 1329; GFX90A: ; %bb.0: 1330; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1331; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1332; GFX90A-NEXT: ;;#ASMSTART 1333; GFX90A-NEXT: ; def v[0:1] 1334; GFX90A-NEXT: ;;#ASMEND 1335; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1336; GFX90A-NEXT: ;;#ASMSTART 1337; GFX90A-NEXT: ; def v[2:3] 1338; GFX90A-NEXT: ;;#ASMEND 1339; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v0 1340; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1341; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1342; GFX90A-NEXT: s_waitcnt vmcnt(0) 1343; GFX90A-NEXT: s_setpc_b64 s[30:31] 1344; 1345; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_0: 1346; GFX940: ; %bb.0: 1347; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1348; GFX940-NEXT: v_mov_b32_e32 v4, 0 1349; GFX940-NEXT: ;;#ASMSTART 1350; GFX940-NEXT: ; def v[0:1] 1351; GFX940-NEXT: ;;#ASMEND 1352; GFX940-NEXT: s_mov_b32 s2, 0xffff 1353; GFX940-NEXT: ;;#ASMSTART 1354; GFX940-NEXT: ; def v[2:3] 1355; GFX940-NEXT: ;;#ASMEND 1356; GFX940-NEXT: s_nop 0 1357; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 1358; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1359; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1360; GFX940-NEXT: s_waitcnt vmcnt(0) 1361; GFX940-NEXT: s_setpc_b64 s[30:31] 1362 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1363 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1364 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1365 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1366 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 0> 1367 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1368 ret void 1369} 1370 1371define void @v_shuffle_v3i16_v3i16__5_2_0(ptr addrspace(1) inreg %ptr) { 1372; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_0: 1373; GFX900: ; %bb.0: 1374; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1375; GFX900-NEXT: v_mov_b32_e32 v4, 0 1376; GFX900-NEXT: ;;#ASMSTART 1377; GFX900-NEXT: ; def v[0:1] 1378; GFX900-NEXT: ;;#ASMEND 1379; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1380; GFX900-NEXT: ;;#ASMSTART 1381; GFX900-NEXT: ; def v[2:3] 1382; GFX900-NEXT: ;;#ASMEND 1383; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 1384; GFX900-NEXT: global_store_short v4, v0, s[16:17] offset:4 1385; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 1386; GFX900-NEXT: s_waitcnt vmcnt(0) 1387; GFX900-NEXT: s_setpc_b64 s[30:31] 1388; 1389; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_0: 1390; GFX90A: ; %bb.0: 1391; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1392; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1393; GFX90A-NEXT: ;;#ASMSTART 1394; GFX90A-NEXT: ; def v[0:1] 1395; GFX90A-NEXT: ;;#ASMEND 1396; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1397; GFX90A-NEXT: ;;#ASMSTART 1398; GFX90A-NEXT: ; def v[2:3] 1399; GFX90A-NEXT: ;;#ASMEND 1400; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4 1401; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1402; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1403; GFX90A-NEXT: s_waitcnt vmcnt(0) 1404; GFX90A-NEXT: s_setpc_b64 s[30:31] 1405; 1406; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_0: 1407; GFX940: ; %bb.0: 1408; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1409; GFX940-NEXT: v_mov_b32_e32 v4, 0 1410; GFX940-NEXT: ;;#ASMSTART 1411; GFX940-NEXT: ; def v[0:1] 1412; GFX940-NEXT: ;;#ASMEND 1413; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1414; GFX940-NEXT: ;;#ASMSTART 1415; GFX940-NEXT: ; def v[2:3] 1416; GFX940-NEXT: ;;#ASMEND 1417; GFX940-NEXT: s_nop 0 1418; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 1419; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1420; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1421; GFX940-NEXT: s_waitcnt vmcnt(0) 1422; GFX940-NEXT: s_setpc_b64 s[30:31] 1423 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1424 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1425 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1426 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1427 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 0> 1428 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1429 ret void 1430} 1431 1432define void @v_shuffle_v3i16_v3i16__5_3_0(ptr addrspace(1) inreg %ptr) { 1433; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_0: 1434; GFX900: ; %bb.0: 1435; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1436; GFX900-NEXT: ;;#ASMSTART 1437; GFX900-NEXT: ; def v[0:1] 1438; GFX900-NEXT: ;;#ASMEND 1439; GFX900-NEXT: v_mov_b32_e32 v3, 0 1440; GFX900-NEXT: ;;#ASMSTART 1441; GFX900-NEXT: ; def v[1:2] 1442; GFX900-NEXT: ;;#ASMEND 1443; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1444; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 1445; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1446; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1447; GFX900-NEXT: s_waitcnt vmcnt(0) 1448; GFX900-NEXT: s_setpc_b64 s[30:31] 1449; 1450; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_0: 1451; GFX90A: ; %bb.0: 1452; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1453; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1454; GFX90A-NEXT: ;;#ASMSTART 1455; GFX90A-NEXT: ; def v[0:1] 1456; GFX90A-NEXT: ;;#ASMEND 1457; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1458; GFX90A-NEXT: ;;#ASMSTART 1459; GFX90A-NEXT: ; def v[2:3] 1460; GFX90A-NEXT: ;;#ASMEND 1461; GFX90A-NEXT: v_perm_b32 v1, v2, v3, s4 1462; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1463; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1464; GFX90A-NEXT: s_waitcnt vmcnt(0) 1465; GFX90A-NEXT: s_setpc_b64 s[30:31] 1466; 1467; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_0: 1468; GFX940: ; %bb.0: 1469; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1470; GFX940-NEXT: v_mov_b32_e32 v4, 0 1471; GFX940-NEXT: ;;#ASMSTART 1472; GFX940-NEXT: ; def v[0:1] 1473; GFX940-NEXT: ;;#ASMEND 1474; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1475; GFX940-NEXT: ;;#ASMSTART 1476; GFX940-NEXT: ; def v[2:3] 1477; GFX940-NEXT: ;;#ASMEND 1478; GFX940-NEXT: s_nop 0 1479; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 1480; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1481; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1482; GFX940-NEXT: s_waitcnt vmcnt(0) 1483; GFX940-NEXT: s_setpc_b64 s[30:31] 1484 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1485 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1486 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1487 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1488 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 0> 1489 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1490 ret void 1491} 1492 1493define void @v_shuffle_v3i16_v3i16__5_4_0(ptr addrspace(1) inreg %ptr) { 1494; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_0: 1495; GFX900: ; %bb.0: 1496; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1497; GFX900-NEXT: ;;#ASMSTART 1498; GFX900-NEXT: ; def v[0:1] 1499; GFX900-NEXT: ;;#ASMEND 1500; GFX900-NEXT: v_mov_b32_e32 v3, 0 1501; GFX900-NEXT: ;;#ASMSTART 1502; GFX900-NEXT: ; def v[1:2] 1503; GFX900-NEXT: ;;#ASMEND 1504; GFX900-NEXT: s_mov_b32 s4, 0xffff 1505; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v1 1506; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1507; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1508; GFX900-NEXT: s_waitcnt vmcnt(0) 1509; GFX900-NEXT: s_setpc_b64 s[30:31] 1510; 1511; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_0: 1512; GFX90A: ; %bb.0: 1513; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1514; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1515; GFX90A-NEXT: ;;#ASMSTART 1516; GFX90A-NEXT: ; def v[0:1] 1517; GFX90A-NEXT: ;;#ASMEND 1518; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1519; GFX90A-NEXT: ;;#ASMSTART 1520; GFX90A-NEXT: ; def v[2:3] 1521; GFX90A-NEXT: ;;#ASMEND 1522; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v2 1523; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1524; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1525; GFX90A-NEXT: s_waitcnt vmcnt(0) 1526; GFX90A-NEXT: s_setpc_b64 s[30:31] 1527; 1528; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_0: 1529; GFX940: ; %bb.0: 1530; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1531; GFX940-NEXT: v_mov_b32_e32 v4, 0 1532; GFX940-NEXT: ;;#ASMSTART 1533; GFX940-NEXT: ; def v[0:1] 1534; GFX940-NEXT: ;;#ASMEND 1535; GFX940-NEXT: s_mov_b32 s2, 0xffff 1536; GFX940-NEXT: ;;#ASMSTART 1537; GFX940-NEXT: ; def v[2:3] 1538; GFX940-NEXT: ;;#ASMEND 1539; GFX940-NEXT: s_nop 0 1540; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v2 1541; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1542; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1543; GFX940-NEXT: s_waitcnt vmcnt(0) 1544; GFX940-NEXT: s_setpc_b64 s[30:31] 1545 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1546 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1547 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1548 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1549 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 0> 1550 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1551 ret void 1552} 1553 1554define void @v_shuffle_v3i16_v3i16__u_1_1(ptr addrspace(1) inreg %ptr) { 1555; GFX900-LABEL: v_shuffle_v3i16_v3i16__u_1_1: 1556; GFX900: ; %bb.0: 1557; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1558; GFX900-NEXT: v_mov_b32_e32 v2, 0 1559; GFX900-NEXT: ;;#ASMSTART 1560; GFX900-NEXT: ; def v[0:1] 1561; GFX900-NEXT: ;;#ASMEND 1562; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 1563; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1564; GFX900-NEXT: s_waitcnt vmcnt(0) 1565; GFX900-NEXT: s_setpc_b64 s[30:31] 1566; 1567; GFX90A-LABEL: v_shuffle_v3i16_v3i16__u_1_1: 1568; GFX90A: ; %bb.0: 1569; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1570; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1571; GFX90A-NEXT: ;;#ASMSTART 1572; GFX90A-NEXT: ; def v[0:1] 1573; GFX90A-NEXT: ;;#ASMEND 1574; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 1575; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1576; GFX90A-NEXT: s_waitcnt vmcnt(0) 1577; GFX90A-NEXT: s_setpc_b64 s[30:31] 1578; 1579; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_1_1: 1580; GFX940: ; %bb.0: 1581; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1582; GFX940-NEXT: v_mov_b32_e32 v2, 0 1583; GFX940-NEXT: ;;#ASMSTART 1584; GFX940-NEXT: ; def v[0:1] 1585; GFX940-NEXT: ;;#ASMEND 1586; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 1587; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1588; GFX940-NEXT: s_waitcnt vmcnt(0) 1589; GFX940-NEXT: s_setpc_b64 s[30:31] 1590 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1591 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1592 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 1, i32 1> 1593 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1594 ret void 1595} 1596 1597define void @v_shuffle_v3i16_v3i16__0_1_1(ptr addrspace(1) inreg %ptr) { 1598; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_1_1: 1599; GFX900: ; %bb.0: 1600; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1601; GFX900-NEXT: v_mov_b32_e32 v2, 0 1602; GFX900-NEXT: ;;#ASMSTART 1603; GFX900-NEXT: ; def v[0:1] 1604; GFX900-NEXT: ;;#ASMEND 1605; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 1606; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1607; GFX900-NEXT: s_waitcnt vmcnt(0) 1608; GFX900-NEXT: s_setpc_b64 s[30:31] 1609; 1610; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_1_1: 1611; GFX90A: ; %bb.0: 1612; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1613; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1614; GFX90A-NEXT: ;;#ASMSTART 1615; GFX90A-NEXT: ; def v[0:1] 1616; GFX90A-NEXT: ;;#ASMEND 1617; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 1618; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1619; GFX90A-NEXT: s_waitcnt vmcnt(0) 1620; GFX90A-NEXT: s_setpc_b64 s[30:31] 1621; 1622; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_1_1: 1623; GFX940: ; %bb.0: 1624; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1625; GFX940-NEXT: v_mov_b32_e32 v2, 0 1626; GFX940-NEXT: ;;#ASMSTART 1627; GFX940-NEXT: ; def v[0:1] 1628; GFX940-NEXT: ;;#ASMEND 1629; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 1630; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1631; GFX940-NEXT: s_waitcnt vmcnt(0) 1632; GFX940-NEXT: s_setpc_b64 s[30:31] 1633 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1634 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1635 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 1, i32 1> 1636 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1637 ret void 1638} 1639 1640define void @v_shuffle_v3i16_v3i16__1_1_1(ptr addrspace(1) inreg %ptr) { 1641; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_1_1: 1642; GFX900: ; %bb.0: 1643; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1644; GFX900-NEXT: ;;#ASMSTART 1645; GFX900-NEXT: ; def v[0:1] 1646; GFX900-NEXT: ;;#ASMEND 1647; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1648; GFX900-NEXT: v_mov_b32_e32 v2, 0 1649; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1650; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1651; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1652; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1653; GFX900-NEXT: s_waitcnt vmcnt(0) 1654; GFX900-NEXT: s_setpc_b64 s[30:31] 1655; 1656; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_1_1: 1657; GFX90A: ; %bb.0: 1658; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1659; GFX90A-NEXT: ;;#ASMSTART 1660; GFX90A-NEXT: ; def v[0:1] 1661; GFX90A-NEXT: ;;#ASMEND 1662; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1663; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1664; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1665; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1666; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1667; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1668; GFX90A-NEXT: s_waitcnt vmcnt(0) 1669; GFX90A-NEXT: s_setpc_b64 s[30:31] 1670; 1671; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_1_1: 1672; GFX940: ; %bb.0: 1673; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1674; GFX940-NEXT: ;;#ASMSTART 1675; GFX940-NEXT: ; def v[0:1] 1676; GFX940-NEXT: ;;#ASMEND 1677; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1678; GFX940-NEXT: v_mov_b32_e32 v2, 0 1679; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1680; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1681; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1682; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1683; GFX940-NEXT: s_waitcnt vmcnt(0) 1684; GFX940-NEXT: s_setpc_b64 s[30:31] 1685 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1686 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1687 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 1, i32 1> 1688 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1689 ret void 1690} 1691 1692define void @v_shuffle_v3i16_v3i16__2_1_1(ptr addrspace(1) inreg %ptr) { 1693; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_1_1: 1694; GFX900: ; %bb.0: 1695; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1696; GFX900-NEXT: ;;#ASMSTART 1697; GFX900-NEXT: ; def v[0:1] 1698; GFX900-NEXT: ;;#ASMEND 1699; GFX900-NEXT: s_mov_b32 s4, 0xffff 1700; GFX900-NEXT: v_mov_b32_e32 v2, 0 1701; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 1702; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1703; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1704; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1705; GFX900-NEXT: s_waitcnt vmcnt(0) 1706; GFX900-NEXT: s_setpc_b64 s[30:31] 1707; 1708; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_1_1: 1709; GFX90A: ; %bb.0: 1710; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1711; GFX90A-NEXT: ;;#ASMSTART 1712; GFX90A-NEXT: ; def v[0:1] 1713; GFX90A-NEXT: ;;#ASMEND 1714; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1715; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1716; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0 1717; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1718; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1719; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1720; GFX90A-NEXT: s_waitcnt vmcnt(0) 1721; GFX90A-NEXT: s_setpc_b64 s[30:31] 1722; 1723; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_1_1: 1724; GFX940: ; %bb.0: 1725; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1726; GFX940-NEXT: ;;#ASMSTART 1727; GFX940-NEXT: ; def v[0:1] 1728; GFX940-NEXT: ;;#ASMEND 1729; GFX940-NEXT: s_mov_b32 s2, 0xffff 1730; GFX940-NEXT: v_mov_b32_e32 v2, 0 1731; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 1732; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1733; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1734; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1735; GFX940-NEXT: s_waitcnt vmcnt(0) 1736; GFX940-NEXT: s_setpc_b64 s[30:31] 1737 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1738 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1739 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 1, i32 1> 1740 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1741 ret void 1742} 1743 1744define void @v_shuffle_v3i16_v3i16__3_1_1(ptr addrspace(1) inreg %ptr) { 1745; GFX900-LABEL: v_shuffle_v3i16_v3i16__3_1_1: 1746; GFX900: ; %bb.0: 1747; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1748; GFX900-NEXT: v_mov_b32_e32 v2, 0 1749; GFX900-NEXT: ;;#ASMSTART 1750; GFX900-NEXT: ; def v[0:1] 1751; GFX900-NEXT: ;;#ASMEND 1752; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 1753; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1754; GFX900-NEXT: s_waitcnt vmcnt(0) 1755; GFX900-NEXT: s_setpc_b64 s[30:31] 1756; 1757; GFX90A-LABEL: v_shuffle_v3i16_v3i16__3_1_1: 1758; GFX90A: ; %bb.0: 1759; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1760; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1761; GFX90A-NEXT: ;;#ASMSTART 1762; GFX90A-NEXT: ; def v[0:1] 1763; GFX90A-NEXT: ;;#ASMEND 1764; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 1765; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1766; GFX90A-NEXT: s_waitcnt vmcnt(0) 1767; GFX90A-NEXT: s_setpc_b64 s[30:31] 1768; 1769; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_1_1: 1770; GFX940: ; %bb.0: 1771; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1772; GFX940-NEXT: v_mov_b32_e32 v2, 0 1773; GFX940-NEXT: ;;#ASMSTART 1774; GFX940-NEXT: ; def v[0:1] 1775; GFX940-NEXT: ;;#ASMEND 1776; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 1777; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1778; GFX940-NEXT: s_waitcnt vmcnt(0) 1779; GFX940-NEXT: s_setpc_b64 s[30:31] 1780 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1781 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1782 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 1, i32 1> 1783 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1784 ret void 1785} 1786 1787define void @v_shuffle_v3i16_v3i16__4_1_1(ptr addrspace(1) inreg %ptr) { 1788; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_1_1: 1789; GFX900: ; %bb.0: 1790; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1791; GFX900-NEXT: ;;#ASMSTART 1792; GFX900-NEXT: ; def v[0:1] 1793; GFX900-NEXT: ;;#ASMEND 1794; GFX900-NEXT: ;;#ASMSTART 1795; GFX900-NEXT: ; def v[1:2] 1796; GFX900-NEXT: ;;#ASMEND 1797; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1798; GFX900-NEXT: v_mov_b32_e32 v3, 0 1799; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 1800; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1801; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1802; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1803; GFX900-NEXT: s_waitcnt vmcnt(0) 1804; GFX900-NEXT: s_setpc_b64 s[30:31] 1805; 1806; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_1_1: 1807; GFX90A: ; %bb.0: 1808; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1809; GFX90A-NEXT: ;;#ASMSTART 1810; GFX90A-NEXT: ; def v[0:1] 1811; GFX90A-NEXT: ;;#ASMEND 1812; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1813; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1814; GFX90A-NEXT: ;;#ASMSTART 1815; GFX90A-NEXT: ; def v[2:3] 1816; GFX90A-NEXT: ;;#ASMEND 1817; GFX90A-NEXT: v_perm_b32 v1, v0, v2, s4 1818; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1819; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1820; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1821; GFX90A-NEXT: s_waitcnt vmcnt(0) 1822; GFX90A-NEXT: s_setpc_b64 s[30:31] 1823; 1824; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_1_1: 1825; GFX940: ; %bb.0: 1826; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1827; GFX940-NEXT: ;;#ASMSTART 1828; GFX940-NEXT: ; def v[0:1] 1829; GFX940-NEXT: ;;#ASMEND 1830; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1831; GFX940-NEXT: v_mov_b32_e32 v4, 0 1832; GFX940-NEXT: ;;#ASMSTART 1833; GFX940-NEXT: ; def v[2:3] 1834; GFX940-NEXT: ;;#ASMEND 1835; GFX940-NEXT: s_nop 0 1836; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 1837; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1838; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1839; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1840; GFX940-NEXT: s_waitcnt vmcnt(0) 1841; GFX940-NEXT: s_setpc_b64 s[30:31] 1842 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1843 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1844 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1845 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1846 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 1, i32 1> 1847 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1848 ret void 1849} 1850 1851define void @v_shuffle_v3i16_v3i16__5_1_1(ptr addrspace(1) inreg %ptr) { 1852; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_1: 1853; GFX900: ; %bb.0: 1854; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1855; GFX900-NEXT: ;;#ASMSTART 1856; GFX900-NEXT: ; def v[0:1] 1857; GFX900-NEXT: ;;#ASMEND 1858; GFX900-NEXT: ;;#ASMSTART 1859; GFX900-NEXT: ; def v[1:2] 1860; GFX900-NEXT: ;;#ASMEND 1861; GFX900-NEXT: s_mov_b32 s4, 0xffff 1862; GFX900-NEXT: v_mov_b32_e32 v3, 0 1863; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0 1864; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1865; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1866; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1867; GFX900-NEXT: s_waitcnt vmcnt(0) 1868; GFX900-NEXT: s_setpc_b64 s[30:31] 1869; 1870; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_1: 1871; GFX90A: ; %bb.0: 1872; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1873; GFX90A-NEXT: ;;#ASMSTART 1874; GFX90A-NEXT: ; def v[0:1] 1875; GFX90A-NEXT: ;;#ASMEND 1876; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1877; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1878; GFX90A-NEXT: ;;#ASMSTART 1879; GFX90A-NEXT: ; def v[2:3] 1880; GFX90A-NEXT: ;;#ASMEND 1881; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v0 1882; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1883; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1884; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1885; GFX90A-NEXT: s_waitcnt vmcnt(0) 1886; GFX90A-NEXT: s_setpc_b64 s[30:31] 1887; 1888; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_1: 1889; GFX940: ; %bb.0: 1890; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1891; GFX940-NEXT: ;;#ASMSTART 1892; GFX940-NEXT: ; def v[0:1] 1893; GFX940-NEXT: ;;#ASMEND 1894; GFX940-NEXT: s_mov_b32 s2, 0xffff 1895; GFX940-NEXT: v_mov_b32_e32 v4, 0 1896; GFX940-NEXT: ;;#ASMSTART 1897; GFX940-NEXT: ; def v[2:3] 1898; GFX940-NEXT: ;;#ASMEND 1899; GFX940-NEXT: s_nop 0 1900; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 1901; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1902; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1903; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1904; GFX940-NEXT: s_waitcnt vmcnt(0) 1905; GFX940-NEXT: s_setpc_b64 s[30:31] 1906 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1907 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1908 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1909 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1910 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 1> 1911 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1912 ret void 1913} 1914 1915define void @v_shuffle_v3i16_v3i16__5_u_1(ptr addrspace(1) inreg %ptr) { 1916; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_1: 1917; GFX900: ; %bb.0: 1918; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1919; GFX900-NEXT: v_mov_b32_e32 v3, 0 1920; GFX900-NEXT: ;;#ASMSTART 1921; GFX900-NEXT: ; def v[0:1] 1922; GFX900-NEXT: ;;#ASMEND 1923; GFX900-NEXT: ;;#ASMSTART 1924; GFX900-NEXT: ; def v[1:2] 1925; GFX900-NEXT: ;;#ASMEND 1926; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 1927; GFX900-NEXT: global_store_dword v3, v2, s[16:17] 1928; GFX900-NEXT: s_waitcnt vmcnt(0) 1929; GFX900-NEXT: s_setpc_b64 s[30:31] 1930; 1931; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_1: 1932; GFX90A: ; %bb.0: 1933; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1934; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1935; GFX90A-NEXT: ;;#ASMSTART 1936; GFX90A-NEXT: ; def v[0:1] 1937; GFX90A-NEXT: ;;#ASMEND 1938; GFX90A-NEXT: ;;#ASMSTART 1939; GFX90A-NEXT: ; def v[2:3] 1940; GFX90A-NEXT: ;;#ASMEND 1941; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 1942; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] 1943; GFX90A-NEXT: s_waitcnt vmcnt(0) 1944; GFX90A-NEXT: s_setpc_b64 s[30:31] 1945; 1946; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_1: 1947; GFX940: ; %bb.0: 1948; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1949; GFX940-NEXT: v_mov_b32_e32 v4, 0 1950; GFX940-NEXT: ;;#ASMSTART 1951; GFX940-NEXT: ; def v[0:1] 1952; GFX940-NEXT: ;;#ASMEND 1953; GFX940-NEXT: ;;#ASMSTART 1954; GFX940-NEXT: ; def v[2:3] 1955; GFX940-NEXT: ;;#ASMEND 1956; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 1957; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 1958; GFX940-NEXT: s_waitcnt vmcnt(0) 1959; GFX940-NEXT: s_setpc_b64 s[30:31] 1960 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1961 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1962 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1963 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 1964 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 1> 1965 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1966 ret void 1967} 1968 1969define void @v_shuffle_v3i16_v3i16__5_0_1(ptr addrspace(1) inreg %ptr) { 1970; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_1: 1971; GFX900: ; %bb.0: 1972; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1973; GFX900-NEXT: ;;#ASMSTART 1974; GFX900-NEXT: ; def v[0:1] 1975; GFX900-NEXT: ;;#ASMEND 1976; GFX900-NEXT: v_mov_b32_e32 v3, 0 1977; GFX900-NEXT: ;;#ASMSTART 1978; GFX900-NEXT: ; def v[1:2] 1979; GFX900-NEXT: ;;#ASMEND 1980; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1981; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 1982; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 1983; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1984; GFX900-NEXT: s_waitcnt vmcnt(0) 1985; GFX900-NEXT: s_setpc_b64 s[30:31] 1986; 1987; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_1: 1988; GFX90A: ; %bb.0: 1989; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1990; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1991; GFX90A-NEXT: ;;#ASMSTART 1992; GFX90A-NEXT: ; def v[0:1] 1993; GFX90A-NEXT: ;;#ASMEND 1994; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1995; GFX90A-NEXT: ;;#ASMSTART 1996; GFX90A-NEXT: ; def v[2:3] 1997; GFX90A-NEXT: ;;#ASMEND 1998; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4 1999; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2000; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2001; GFX90A-NEXT: s_waitcnt vmcnt(0) 2002; GFX90A-NEXT: s_setpc_b64 s[30:31] 2003; 2004; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_1: 2005; GFX940: ; %bb.0: 2006; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2007; GFX940-NEXT: v_mov_b32_e32 v4, 0 2008; GFX940-NEXT: ;;#ASMSTART 2009; GFX940-NEXT: ; def v[0:1] 2010; GFX940-NEXT: ;;#ASMEND 2011; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2012; GFX940-NEXT: ;;#ASMSTART 2013; GFX940-NEXT: ; def v[2:3] 2014; GFX940-NEXT: ;;#ASMEND 2015; GFX940-NEXT: s_nop 0 2016; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 2017; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2018; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2019; GFX940-NEXT: s_waitcnt vmcnt(0) 2020; GFX940-NEXT: s_setpc_b64 s[30:31] 2021 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2022 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2023 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2024 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2025 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 1> 2026 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2027 ret void 2028} 2029 2030define void @v_shuffle_v3i16_v3i16__5_2_1(ptr addrspace(1) inreg %ptr) { 2031; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_1: 2032; GFX900: ; %bb.0: 2033; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2034; GFX900-NEXT: v_mov_b32_e32 v4, 0 2035; GFX900-NEXT: ;;#ASMSTART 2036; GFX900-NEXT: ; def v[0:1] 2037; GFX900-NEXT: ;;#ASMEND 2038; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2039; GFX900-NEXT: ;;#ASMSTART 2040; GFX900-NEXT: ; def v[2:3] 2041; GFX900-NEXT: ;;#ASMEND 2042; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 2043; GFX900-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2044; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 2045; GFX900-NEXT: s_waitcnt vmcnt(0) 2046; GFX900-NEXT: s_setpc_b64 s[30:31] 2047; 2048; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_1: 2049; GFX90A: ; %bb.0: 2050; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2051; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2052; GFX90A-NEXT: ;;#ASMSTART 2053; GFX90A-NEXT: ; def v[0:1] 2054; GFX90A-NEXT: ;;#ASMEND 2055; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2056; GFX90A-NEXT: ;;#ASMSTART 2057; GFX90A-NEXT: ; def v[2:3] 2058; GFX90A-NEXT: ;;#ASMEND 2059; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4 2060; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2061; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2062; GFX90A-NEXT: s_waitcnt vmcnt(0) 2063; GFX90A-NEXT: s_setpc_b64 s[30:31] 2064; 2065; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_1: 2066; GFX940: ; %bb.0: 2067; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2068; GFX940-NEXT: v_mov_b32_e32 v4, 0 2069; GFX940-NEXT: ;;#ASMSTART 2070; GFX940-NEXT: ; def v[0:1] 2071; GFX940-NEXT: ;;#ASMEND 2072; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2073; GFX940-NEXT: ;;#ASMSTART 2074; GFX940-NEXT: ; def v[2:3] 2075; GFX940-NEXT: ;;#ASMEND 2076; GFX940-NEXT: s_nop 0 2077; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 2078; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2079; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2080; GFX940-NEXT: s_waitcnt vmcnt(0) 2081; GFX940-NEXT: s_setpc_b64 s[30:31] 2082 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2083 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2084 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2085 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2086 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 1> 2087 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2088 ret void 2089} 2090 2091define void @v_shuffle_v3i16_v3i16__5_3_1(ptr addrspace(1) inreg %ptr) { 2092; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_1: 2093; GFX900: ; %bb.0: 2094; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2095; GFX900-NEXT: ;;#ASMSTART 2096; GFX900-NEXT: ; def v[0:1] 2097; GFX900-NEXT: ;;#ASMEND 2098; GFX900-NEXT: v_mov_b32_e32 v3, 0 2099; GFX900-NEXT: ;;#ASMSTART 2100; GFX900-NEXT: ; def v[1:2] 2101; GFX900-NEXT: ;;#ASMEND 2102; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2103; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 2104; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2105; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2106; GFX900-NEXT: s_waitcnt vmcnt(0) 2107; GFX900-NEXT: s_setpc_b64 s[30:31] 2108; 2109; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_1: 2110; GFX90A: ; %bb.0: 2111; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2112; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2113; GFX90A-NEXT: ;;#ASMSTART 2114; GFX90A-NEXT: ; def v[0:1] 2115; GFX90A-NEXT: ;;#ASMEND 2116; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2117; GFX90A-NEXT: ;;#ASMSTART 2118; GFX90A-NEXT: ; def v[2:3] 2119; GFX90A-NEXT: ;;#ASMEND 2120; GFX90A-NEXT: v_perm_b32 v1, v2, v3, s4 2121; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2122; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2123; GFX90A-NEXT: s_waitcnt vmcnt(0) 2124; GFX90A-NEXT: s_setpc_b64 s[30:31] 2125; 2126; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_1: 2127; GFX940: ; %bb.0: 2128; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2129; GFX940-NEXT: v_mov_b32_e32 v4, 0 2130; GFX940-NEXT: ;;#ASMSTART 2131; GFX940-NEXT: ; def v[0:1] 2132; GFX940-NEXT: ;;#ASMEND 2133; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2134; GFX940-NEXT: ;;#ASMSTART 2135; GFX940-NEXT: ; def v[2:3] 2136; GFX940-NEXT: ;;#ASMEND 2137; GFX940-NEXT: s_nop 0 2138; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 2139; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2140; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2141; GFX940-NEXT: s_waitcnt vmcnt(0) 2142; GFX940-NEXT: s_setpc_b64 s[30:31] 2143 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2144 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2145 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2146 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2147 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 1> 2148 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2149 ret void 2150} 2151 2152define void @v_shuffle_v3i16_v3i16__5_4_1(ptr addrspace(1) inreg %ptr) { 2153; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_1: 2154; GFX900: ; %bb.0: 2155; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2156; GFX900-NEXT: ;;#ASMSTART 2157; GFX900-NEXT: ; def v[0:1] 2158; GFX900-NEXT: ;;#ASMEND 2159; GFX900-NEXT: v_mov_b32_e32 v3, 0 2160; GFX900-NEXT: ;;#ASMSTART 2161; GFX900-NEXT: ; def v[1:2] 2162; GFX900-NEXT: ;;#ASMEND 2163; GFX900-NEXT: s_mov_b32 s4, 0xffff 2164; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v1 2165; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2166; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2167; GFX900-NEXT: s_waitcnt vmcnt(0) 2168; GFX900-NEXT: s_setpc_b64 s[30:31] 2169; 2170; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_1: 2171; GFX90A: ; %bb.0: 2172; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2173; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2174; GFX90A-NEXT: ;;#ASMSTART 2175; GFX90A-NEXT: ; def v[0:1] 2176; GFX90A-NEXT: ;;#ASMEND 2177; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2178; GFX90A-NEXT: ;;#ASMSTART 2179; GFX90A-NEXT: ; def v[2:3] 2180; GFX90A-NEXT: ;;#ASMEND 2181; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v2 2182; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2183; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2184; GFX90A-NEXT: s_waitcnt vmcnt(0) 2185; GFX90A-NEXT: s_setpc_b64 s[30:31] 2186; 2187; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_1: 2188; GFX940: ; %bb.0: 2189; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2190; GFX940-NEXT: v_mov_b32_e32 v4, 0 2191; GFX940-NEXT: ;;#ASMSTART 2192; GFX940-NEXT: ; def v[0:1] 2193; GFX940-NEXT: ;;#ASMEND 2194; GFX940-NEXT: s_mov_b32 s2, 0xffff 2195; GFX940-NEXT: ;;#ASMSTART 2196; GFX940-NEXT: ; def v[2:3] 2197; GFX940-NEXT: ;;#ASMEND 2198; GFX940-NEXT: s_nop 0 2199; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v2 2200; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2201; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2202; GFX940-NEXT: s_waitcnt vmcnt(0) 2203; GFX940-NEXT: s_setpc_b64 s[30:31] 2204 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2205 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2206 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2207 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2208 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 1> 2209 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2210 ret void 2211} 2212 2213define void @v_shuffle_v3i16_v3i16__u_2_2(ptr addrspace(1) inreg %ptr) { 2214; GFX900-LABEL: v_shuffle_v3i16_v3i16__u_2_2: 2215; GFX900: ; %bb.0: 2216; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2217; GFX900-NEXT: v_mov_b32_e32 v2, 0 2218; GFX900-NEXT: ;;#ASMSTART 2219; GFX900-NEXT: ; def v[0:1] 2220; GFX900-NEXT: ;;#ASMEND 2221; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2222; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2223; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2224; GFX900-NEXT: s_waitcnt vmcnt(0) 2225; GFX900-NEXT: s_setpc_b64 s[30:31] 2226; 2227; GFX90A-LABEL: v_shuffle_v3i16_v3i16__u_2_2: 2228; GFX90A: ; %bb.0: 2229; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2230; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2231; GFX90A-NEXT: ;;#ASMSTART 2232; GFX90A-NEXT: ; def v[0:1] 2233; GFX90A-NEXT: ;;#ASMEND 2234; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2235; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2236; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2237; GFX90A-NEXT: s_waitcnt vmcnt(0) 2238; GFX90A-NEXT: s_setpc_b64 s[30:31] 2239; 2240; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_2_2: 2241; GFX940: ; %bb.0: 2242; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2243; GFX940-NEXT: v_mov_b32_e32 v2, 0 2244; GFX940-NEXT: ;;#ASMSTART 2245; GFX940-NEXT: ; def v[0:1] 2246; GFX940-NEXT: ;;#ASMEND 2247; GFX940-NEXT: s_nop 0 2248; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2249; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 2250; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2251; GFX940-NEXT: s_waitcnt vmcnt(0) 2252; GFX940-NEXT: s_setpc_b64 s[30:31] 2253 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2254 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2255 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 2, i32 2> 2256 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2257 ret void 2258} 2259 2260define void @v_shuffle_v3i16_v3i16__0_2_2(ptr addrspace(1) inreg %ptr) { 2261; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_2_2: 2262; GFX900: ; %bb.0: 2263; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2264; GFX900-NEXT: v_mov_b32_e32 v2, 0 2265; GFX900-NEXT: ;;#ASMSTART 2266; GFX900-NEXT: ; def v[0:1] 2267; GFX900-NEXT: ;;#ASMEND 2268; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2269; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 2270; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2271; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2272; GFX900-NEXT: s_waitcnt vmcnt(0) 2273; GFX900-NEXT: s_setpc_b64 s[30:31] 2274; 2275; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_2_2: 2276; GFX90A: ; %bb.0: 2277; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2278; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2279; GFX90A-NEXT: ;;#ASMSTART 2280; GFX90A-NEXT: ; def v[0:1] 2281; GFX90A-NEXT: ;;#ASMEND 2282; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2283; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 2284; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2285; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2286; GFX90A-NEXT: s_waitcnt vmcnt(0) 2287; GFX90A-NEXT: s_setpc_b64 s[30:31] 2288; 2289; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_2_2: 2290; GFX940: ; %bb.0: 2291; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2292; GFX940-NEXT: v_mov_b32_e32 v2, 0 2293; GFX940-NEXT: ;;#ASMSTART 2294; GFX940-NEXT: ; def v[0:1] 2295; GFX940-NEXT: ;;#ASMEND 2296; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2297; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 2298; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 2299; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2300; GFX940-NEXT: s_waitcnt vmcnt(0) 2301; GFX940-NEXT: s_setpc_b64 s[30:31] 2302 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2303 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2304 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 2, i32 2> 2305 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2306 ret void 2307} 2308 2309define void @v_shuffle_v3i16_v3i16__1_2_2(ptr addrspace(1) inreg %ptr) { 2310; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_2_2: 2311; GFX900: ; %bb.0: 2312; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2313; GFX900-NEXT: v_mov_b32_e32 v2, 0 2314; GFX900-NEXT: ;;#ASMSTART 2315; GFX900-NEXT: ; def v[0:1] 2316; GFX900-NEXT: ;;#ASMEND 2317; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 2318; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2319; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2320; GFX900-NEXT: s_waitcnt vmcnt(0) 2321; GFX900-NEXT: s_setpc_b64 s[30:31] 2322; 2323; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_2_2: 2324; GFX90A: ; %bb.0: 2325; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2326; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2327; GFX90A-NEXT: ;;#ASMSTART 2328; GFX90A-NEXT: ; def v[0:1] 2329; GFX90A-NEXT: ;;#ASMEND 2330; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 2331; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2332; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2333; GFX90A-NEXT: s_waitcnt vmcnt(0) 2334; GFX90A-NEXT: s_setpc_b64 s[30:31] 2335; 2336; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_2_2: 2337; GFX940: ; %bb.0: 2338; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2339; GFX940-NEXT: v_mov_b32_e32 v2, 0 2340; GFX940-NEXT: ;;#ASMSTART 2341; GFX940-NEXT: ; def v[0:1] 2342; GFX940-NEXT: ;;#ASMEND 2343; GFX940-NEXT: s_nop 0 2344; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 2345; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 2346; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2347; GFX940-NEXT: s_waitcnt vmcnt(0) 2348; GFX940-NEXT: s_setpc_b64 s[30:31] 2349 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2350 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2351 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 2, i32 2> 2352 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2353 ret void 2354} 2355 2356define void @v_shuffle_v3i16_v3i16__2_2_2(ptr addrspace(1) inreg %ptr) { 2357; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_2_2: 2358; GFX900: ; %bb.0: 2359; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2360; GFX900-NEXT: v_mov_b32_e32 v2, 0 2361; GFX900-NEXT: ;;#ASMSTART 2362; GFX900-NEXT: ; def v[0:1] 2363; GFX900-NEXT: ;;#ASMEND 2364; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2365; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 2366; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2367; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2368; GFX900-NEXT: s_waitcnt vmcnt(0) 2369; GFX900-NEXT: s_setpc_b64 s[30:31] 2370; 2371; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_2_2: 2372; GFX90A: ; %bb.0: 2373; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2374; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2375; GFX90A-NEXT: ;;#ASMSTART 2376; GFX90A-NEXT: ; def v[0:1] 2377; GFX90A-NEXT: ;;#ASMEND 2378; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2379; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 2380; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2381; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2382; GFX90A-NEXT: s_waitcnt vmcnt(0) 2383; GFX90A-NEXT: s_setpc_b64 s[30:31] 2384; 2385; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_2_2: 2386; GFX940: ; %bb.0: 2387; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2388; GFX940-NEXT: v_mov_b32_e32 v2, 0 2389; GFX940-NEXT: ;;#ASMSTART 2390; GFX940-NEXT: ; def v[0:1] 2391; GFX940-NEXT: ;;#ASMEND 2392; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2393; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 2394; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 2395; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2396; GFX940-NEXT: s_waitcnt vmcnt(0) 2397; GFX940-NEXT: s_setpc_b64 s[30:31] 2398 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2399 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2400 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 2, i32 2> 2401 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2402 ret void 2403} 2404 2405define void @v_shuffle_v3i16_v3i16__3_2_2(ptr addrspace(1) inreg %ptr) { 2406; GFX900-LABEL: v_shuffle_v3i16_v3i16__3_2_2: 2407; GFX900: ; %bb.0: 2408; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2409; GFX900-NEXT: v_mov_b32_e32 v2, 0 2410; GFX900-NEXT: ;;#ASMSTART 2411; GFX900-NEXT: ; def v[0:1] 2412; GFX900-NEXT: ;;#ASMEND 2413; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2414; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2415; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2416; GFX900-NEXT: s_waitcnt vmcnt(0) 2417; GFX900-NEXT: s_setpc_b64 s[30:31] 2418; 2419; GFX90A-LABEL: v_shuffle_v3i16_v3i16__3_2_2: 2420; GFX90A: ; %bb.0: 2421; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2422; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2423; GFX90A-NEXT: ;;#ASMSTART 2424; GFX90A-NEXT: ; def v[0:1] 2425; GFX90A-NEXT: ;;#ASMEND 2426; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2427; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2428; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2429; GFX90A-NEXT: s_waitcnt vmcnt(0) 2430; GFX90A-NEXT: s_setpc_b64 s[30:31] 2431; 2432; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_2_2: 2433; GFX940: ; %bb.0: 2434; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2435; GFX940-NEXT: v_mov_b32_e32 v2, 0 2436; GFX940-NEXT: ;;#ASMSTART 2437; GFX940-NEXT: ; def v[0:1] 2438; GFX940-NEXT: ;;#ASMEND 2439; GFX940-NEXT: s_nop 0 2440; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2441; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 2442; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2443; GFX940-NEXT: s_waitcnt vmcnt(0) 2444; GFX940-NEXT: s_setpc_b64 s[30:31] 2445 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2446 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2447 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 2, i32 2> 2448 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2449 ret void 2450} 2451 2452define void @v_shuffle_v3i16_v3i16__4_2_2(ptr addrspace(1) inreg %ptr) { 2453; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_2_2: 2454; GFX900: ; %bb.0: 2455; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2456; GFX900-NEXT: v_mov_b32_e32 v4, 0 2457; GFX900-NEXT: ;;#ASMSTART 2458; GFX900-NEXT: ; def v[0:1] 2459; GFX900-NEXT: ;;#ASMEND 2460; GFX900-NEXT: ;;#ASMSTART 2461; GFX900-NEXT: ; def v[2:3] 2462; GFX900-NEXT: ;;#ASMEND 2463; GFX900-NEXT: v_alignbit_b32 v0, v1, v2, 16 2464; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 2465; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2466; GFX900-NEXT: s_waitcnt vmcnt(0) 2467; GFX900-NEXT: s_setpc_b64 s[30:31] 2468; 2469; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_2_2: 2470; GFX90A: ; %bb.0: 2471; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2472; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2473; GFX90A-NEXT: ;;#ASMSTART 2474; GFX90A-NEXT: ; def v[0:1] 2475; GFX90A-NEXT: ;;#ASMEND 2476; GFX90A-NEXT: ;;#ASMSTART 2477; GFX90A-NEXT: ; def v[2:3] 2478; GFX90A-NEXT: ;;#ASMEND 2479; GFX90A-NEXT: v_alignbit_b32 v0, v1, v2, 16 2480; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 2481; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2482; GFX90A-NEXT: s_waitcnt vmcnt(0) 2483; GFX90A-NEXT: s_setpc_b64 s[30:31] 2484; 2485; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_2_2: 2486; GFX940: ; %bb.0: 2487; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2488; GFX940-NEXT: v_mov_b32_e32 v4, 0 2489; GFX940-NEXT: ;;#ASMSTART 2490; GFX940-NEXT: ; def v[0:1] 2491; GFX940-NEXT: ;;#ASMEND 2492; GFX940-NEXT: ;;#ASMSTART 2493; GFX940-NEXT: ; def v[2:3] 2494; GFX940-NEXT: ;;#ASMEND 2495; GFX940-NEXT: s_nop 0 2496; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 2497; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 2498; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2499; GFX940-NEXT: s_waitcnt vmcnt(0) 2500; GFX940-NEXT: s_setpc_b64 s[30:31] 2501 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2502 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2503 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2504 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2505 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 2, i32 2> 2506 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2507 ret void 2508} 2509 2510define void @v_shuffle_v3i16_v3i16__5_2_2(ptr addrspace(1) inreg %ptr) { 2511; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_2: 2512; GFX900: ; %bb.0: 2513; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2514; GFX900-NEXT: v_mov_b32_e32 v4, 0 2515; GFX900-NEXT: ;;#ASMSTART 2516; GFX900-NEXT: ; def v[0:1] 2517; GFX900-NEXT: ;;#ASMEND 2518; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2519; GFX900-NEXT: ;;#ASMSTART 2520; GFX900-NEXT: ; def v[2:3] 2521; GFX900-NEXT: ;;#ASMEND 2522; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 2523; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 2524; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2525; GFX900-NEXT: s_waitcnt vmcnt(0) 2526; GFX900-NEXT: s_setpc_b64 s[30:31] 2527; 2528; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_2: 2529; GFX90A: ; %bb.0: 2530; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2531; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2532; GFX90A-NEXT: ;;#ASMSTART 2533; GFX90A-NEXT: ; def v[0:1] 2534; GFX90A-NEXT: ;;#ASMEND 2535; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2536; GFX90A-NEXT: ;;#ASMSTART 2537; GFX90A-NEXT: ; def v[2:3] 2538; GFX90A-NEXT: ;;#ASMEND 2539; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 2540; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 2541; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2542; GFX90A-NEXT: s_waitcnt vmcnt(0) 2543; GFX90A-NEXT: s_setpc_b64 s[30:31] 2544; 2545; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_2: 2546; GFX940: ; %bb.0: 2547; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2548; GFX940-NEXT: v_mov_b32_e32 v4, 0 2549; GFX940-NEXT: ;;#ASMSTART 2550; GFX940-NEXT: ; def v[0:1] 2551; GFX940-NEXT: ;;#ASMEND 2552; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2553; GFX940-NEXT: ;;#ASMSTART 2554; GFX940-NEXT: ; def v[2:3] 2555; GFX940-NEXT: ;;#ASMEND 2556; GFX940-NEXT: s_nop 0 2557; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 2558; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 2559; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2560; GFX940-NEXT: s_waitcnt vmcnt(0) 2561; GFX940-NEXT: s_setpc_b64 s[30:31] 2562 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2563 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2564 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2565 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2566 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 2> 2567 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2568 ret void 2569} 2570 2571define void @v_shuffle_v3i16_v3i16__5_u_2(ptr addrspace(1) inreg %ptr) { 2572; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_2: 2573; GFX900: ; %bb.0: 2574; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2575; GFX900-NEXT: v_mov_b32_e32 v4, 0 2576; GFX900-NEXT: ;;#ASMSTART 2577; GFX900-NEXT: ; def v[0:1] 2578; GFX900-NEXT: ;;#ASMEND 2579; GFX900-NEXT: ;;#ASMSTART 2580; GFX900-NEXT: ; def v[2:3] 2581; GFX900-NEXT: ;;#ASMEND 2582; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 2583; GFX900-NEXT: global_store_dword v4, v3, s[16:17] 2584; GFX900-NEXT: s_waitcnt vmcnt(0) 2585; GFX900-NEXT: s_setpc_b64 s[30:31] 2586; 2587; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_2: 2588; GFX90A: ; %bb.0: 2589; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2590; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2591; GFX90A-NEXT: ;;#ASMSTART 2592; GFX90A-NEXT: ; def v[0:1] 2593; GFX90A-NEXT: ;;#ASMEND 2594; GFX90A-NEXT: ;;#ASMSTART 2595; GFX90A-NEXT: ; def v[2:3] 2596; GFX90A-NEXT: ;;#ASMEND 2597; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 2598; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] 2599; GFX90A-NEXT: s_waitcnt vmcnt(0) 2600; GFX90A-NEXT: s_setpc_b64 s[30:31] 2601; 2602; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_2: 2603; GFX940: ; %bb.0: 2604; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2605; GFX940-NEXT: v_mov_b32_e32 v4, 0 2606; GFX940-NEXT: ;;#ASMSTART 2607; GFX940-NEXT: ; def v[0:1] 2608; GFX940-NEXT: ;;#ASMEND 2609; GFX940-NEXT: ;;#ASMSTART 2610; GFX940-NEXT: ; def v[2:3] 2611; GFX940-NEXT: ;;#ASMEND 2612; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 2613; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 2614; GFX940-NEXT: s_waitcnt vmcnt(0) 2615; GFX940-NEXT: s_setpc_b64 s[30:31] 2616 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2617 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2618 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2619 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2620 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 2> 2621 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2622 ret void 2623} 2624 2625define void @v_shuffle_v3i16_v3i16__5_0_2(ptr addrspace(1) inreg %ptr) { 2626; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_2: 2627; GFX900: ; %bb.0: 2628; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2629; GFX900-NEXT: v_mov_b32_e32 v4, 0 2630; GFX900-NEXT: ;;#ASMSTART 2631; GFX900-NEXT: ; def v[0:1] 2632; GFX900-NEXT: ;;#ASMEND 2633; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2634; GFX900-NEXT: ;;#ASMSTART 2635; GFX900-NEXT: ; def v[2:3] 2636; GFX900-NEXT: ;;#ASMEND 2637; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 2638; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 2639; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2640; GFX900-NEXT: s_waitcnt vmcnt(0) 2641; GFX900-NEXT: s_setpc_b64 s[30:31] 2642; 2643; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_2: 2644; GFX90A: ; %bb.0: 2645; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2646; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2647; GFX90A-NEXT: ;;#ASMSTART 2648; GFX90A-NEXT: ; def v[0:1] 2649; GFX90A-NEXT: ;;#ASMEND 2650; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2651; GFX90A-NEXT: ;;#ASMSTART 2652; GFX90A-NEXT: ; def v[2:3] 2653; GFX90A-NEXT: ;;#ASMEND 2654; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 2655; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 2656; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2657; GFX90A-NEXT: s_waitcnt vmcnt(0) 2658; GFX90A-NEXT: s_setpc_b64 s[30:31] 2659; 2660; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_2: 2661; GFX940: ; %bb.0: 2662; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2663; GFX940-NEXT: v_mov_b32_e32 v4, 0 2664; GFX940-NEXT: ;;#ASMSTART 2665; GFX940-NEXT: ; def v[0:1] 2666; GFX940-NEXT: ;;#ASMEND 2667; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2668; GFX940-NEXT: ;;#ASMSTART 2669; GFX940-NEXT: ; def v[2:3] 2670; GFX940-NEXT: ;;#ASMEND 2671; GFX940-NEXT: s_nop 0 2672; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 2673; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 2674; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2675; GFX940-NEXT: s_waitcnt vmcnt(0) 2676; GFX940-NEXT: s_setpc_b64 s[30:31] 2677 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2678 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2679 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2680 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2681 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 2> 2682 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2683 ret void 2684} 2685 2686define void @v_shuffle_v3i16_v3i16__5_1_2(ptr addrspace(1) inreg %ptr) { 2687; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_2: 2688; GFX900: ; %bb.0: 2689; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2690; GFX900-NEXT: v_mov_b32_e32 v4, 0 2691; GFX900-NEXT: ;;#ASMSTART 2692; GFX900-NEXT: ; def v[0:1] 2693; GFX900-NEXT: ;;#ASMEND 2694; GFX900-NEXT: s_mov_b32 s4, 0xffff 2695; GFX900-NEXT: ;;#ASMSTART 2696; GFX900-NEXT: ; def v[2:3] 2697; GFX900-NEXT: ;;#ASMEND 2698; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v0 2699; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 2700; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2701; GFX900-NEXT: s_waitcnt vmcnt(0) 2702; GFX900-NEXT: s_setpc_b64 s[30:31] 2703; 2704; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_2: 2705; GFX90A: ; %bb.0: 2706; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2707; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2708; GFX90A-NEXT: ;;#ASMSTART 2709; GFX90A-NEXT: ; def v[0:1] 2710; GFX90A-NEXT: ;;#ASMEND 2711; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2712; GFX90A-NEXT: ;;#ASMSTART 2713; GFX90A-NEXT: ; def v[2:3] 2714; GFX90A-NEXT: ;;#ASMEND 2715; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 2716; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 2717; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2718; GFX90A-NEXT: s_waitcnt vmcnt(0) 2719; GFX90A-NEXT: s_setpc_b64 s[30:31] 2720; 2721; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_2: 2722; GFX940: ; %bb.0: 2723; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2724; GFX940-NEXT: v_mov_b32_e32 v4, 0 2725; GFX940-NEXT: ;;#ASMSTART 2726; GFX940-NEXT: ; def v[0:1] 2727; GFX940-NEXT: ;;#ASMEND 2728; GFX940-NEXT: s_mov_b32 s2, 0xffff 2729; GFX940-NEXT: ;;#ASMSTART 2730; GFX940-NEXT: ; def v[2:3] 2731; GFX940-NEXT: ;;#ASMEND 2732; GFX940-NEXT: s_nop 0 2733; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 2734; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 2735; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2736; GFX940-NEXT: s_waitcnt vmcnt(0) 2737; GFX940-NEXT: s_setpc_b64 s[30:31] 2738 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2739 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2740 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2741 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2742 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 2> 2743 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2744 ret void 2745} 2746 2747define void @v_shuffle_v3i16_v3i16__5_3_2(ptr addrspace(1) inreg %ptr) { 2748; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_2: 2749; GFX900: ; %bb.0: 2750; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2751; GFX900-NEXT: v_mov_b32_e32 v4, 0 2752; GFX900-NEXT: ;;#ASMSTART 2753; GFX900-NEXT: ; def v[0:1] 2754; GFX900-NEXT: ;;#ASMEND 2755; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2756; GFX900-NEXT: ;;#ASMSTART 2757; GFX900-NEXT: ; def v[2:3] 2758; GFX900-NEXT: ;;#ASMEND 2759; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 2760; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 2761; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2762; GFX900-NEXT: s_waitcnt vmcnt(0) 2763; GFX900-NEXT: s_setpc_b64 s[30:31] 2764; 2765; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_2: 2766; GFX90A: ; %bb.0: 2767; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2768; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2769; GFX90A-NEXT: ;;#ASMSTART 2770; GFX90A-NEXT: ; def v[0:1] 2771; GFX90A-NEXT: ;;#ASMEND 2772; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2773; GFX90A-NEXT: ;;#ASMSTART 2774; GFX90A-NEXT: ; def v[2:3] 2775; GFX90A-NEXT: ;;#ASMEND 2776; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 2777; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 2778; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2779; GFX90A-NEXT: s_waitcnt vmcnt(0) 2780; GFX90A-NEXT: s_setpc_b64 s[30:31] 2781; 2782; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_2: 2783; GFX940: ; %bb.0: 2784; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2785; GFX940-NEXT: v_mov_b32_e32 v4, 0 2786; GFX940-NEXT: ;;#ASMSTART 2787; GFX940-NEXT: ; def v[0:1] 2788; GFX940-NEXT: ;;#ASMEND 2789; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2790; GFX940-NEXT: ;;#ASMSTART 2791; GFX940-NEXT: ; def v[2:3] 2792; GFX940-NEXT: ;;#ASMEND 2793; GFX940-NEXT: s_nop 0 2794; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 2795; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 2796; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2797; GFX940-NEXT: s_waitcnt vmcnt(0) 2798; GFX940-NEXT: s_setpc_b64 s[30:31] 2799 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2800 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2801 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2802 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2803 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 2> 2804 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2805 ret void 2806} 2807 2808define void @v_shuffle_v3i16_v3i16__5_4_2(ptr addrspace(1) inreg %ptr) { 2809; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_2: 2810; GFX900: ; %bb.0: 2811; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2812; GFX900-NEXT: v_mov_b32_e32 v4, 0 2813; GFX900-NEXT: ;;#ASMSTART 2814; GFX900-NEXT: ; def v[0:1] 2815; GFX900-NEXT: ;;#ASMEND 2816; GFX900-NEXT: s_mov_b32 s4, 0xffff 2817; GFX900-NEXT: ;;#ASMSTART 2818; GFX900-NEXT: ; def v[2:3] 2819; GFX900-NEXT: ;;#ASMEND 2820; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v2 2821; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 2822; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2823; GFX900-NEXT: s_waitcnt vmcnt(0) 2824; GFX900-NEXT: s_setpc_b64 s[30:31] 2825; 2826; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_2: 2827; GFX90A: ; %bb.0: 2828; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2829; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2830; GFX90A-NEXT: ;;#ASMSTART 2831; GFX90A-NEXT: ; def v[0:1] 2832; GFX90A-NEXT: ;;#ASMEND 2833; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2834; GFX90A-NEXT: ;;#ASMSTART 2835; GFX90A-NEXT: ; def v[2:3] 2836; GFX90A-NEXT: ;;#ASMEND 2837; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v2 2838; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 2839; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2840; GFX90A-NEXT: s_waitcnt vmcnt(0) 2841; GFX90A-NEXT: s_setpc_b64 s[30:31] 2842; 2843; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_2: 2844; GFX940: ; %bb.0: 2845; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2846; GFX940-NEXT: v_mov_b32_e32 v4, 0 2847; GFX940-NEXT: ;;#ASMSTART 2848; GFX940-NEXT: ; def v[0:1] 2849; GFX940-NEXT: ;;#ASMEND 2850; GFX940-NEXT: s_mov_b32 s2, 0xffff 2851; GFX940-NEXT: ;;#ASMSTART 2852; GFX940-NEXT: ; def v[2:3] 2853; GFX940-NEXT: ;;#ASMEND 2854; GFX940-NEXT: s_nop 0 2855; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 2856; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 2857; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2858; GFX940-NEXT: s_waitcnt vmcnt(0) 2859; GFX940-NEXT: s_setpc_b64 s[30:31] 2860 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2861 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2862 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2863 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2864 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 2> 2865 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2866 ret void 2867} 2868 2869define void @v_shuffle_v3i16_v3i16__u_3_3(ptr addrspace(1) inreg %ptr) { 2870; GFX9-LABEL: v_shuffle_v3i16_v3i16__u_3_3: 2871; GFX9: ; %bb.0: 2872; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2873; GFX9-NEXT: s_setpc_b64 s[30:31] 2874 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2875 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2876 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 3, i32 3> 2877 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2878 ret void 2879} 2880 2881define void @v_shuffle_v3i16_v3i16__0_3_3(ptr addrspace(1) inreg %ptr) { 2882; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_3_3: 2883; GFX900: ; %bb.0: 2884; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2885; GFX900-NEXT: v_mov_b32_e32 v2, 0 2886; GFX900-NEXT: ;;#ASMSTART 2887; GFX900-NEXT: ; def v[0:1] 2888; GFX900-NEXT: ;;#ASMEND 2889; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2890; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2891; GFX900-NEXT: s_waitcnt vmcnt(0) 2892; GFX900-NEXT: s_setpc_b64 s[30:31] 2893; 2894; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_3_3: 2895; GFX90A: ; %bb.0: 2896; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2897; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2898; GFX90A-NEXT: ;;#ASMSTART 2899; GFX90A-NEXT: ; def v[0:1] 2900; GFX90A-NEXT: ;;#ASMEND 2901; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2902; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2903; GFX90A-NEXT: s_waitcnt vmcnt(0) 2904; GFX90A-NEXT: s_setpc_b64 s[30:31] 2905; 2906; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_3_3: 2907; GFX940: ; %bb.0: 2908; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2909; GFX940-NEXT: v_mov_b32_e32 v2, 0 2910; GFX940-NEXT: ;;#ASMSTART 2911; GFX940-NEXT: ; def v[0:1] 2912; GFX940-NEXT: ;;#ASMEND 2913; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 2914; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2915; GFX940-NEXT: s_waitcnt vmcnt(0) 2916; GFX940-NEXT: s_setpc_b64 s[30:31] 2917 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2918 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2919 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 3, i32 3> 2920 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2921 ret void 2922} 2923 2924define void @v_shuffle_v3i16_v3i16__1_3_3(ptr addrspace(1) inreg %ptr) { 2925; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_3_3: 2926; GFX900: ; %bb.0: 2927; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2928; GFX900-NEXT: ;;#ASMSTART 2929; GFX900-NEXT: ; def v[0:1] 2930; GFX900-NEXT: ;;#ASMEND 2931; GFX900-NEXT: v_mov_b32_e32 v2, 0 2932; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 2933; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2934; GFX900-NEXT: s_waitcnt vmcnt(0) 2935; GFX900-NEXT: s_setpc_b64 s[30:31] 2936; 2937; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_3_3: 2938; GFX90A: ; %bb.0: 2939; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2940; GFX90A-NEXT: ;;#ASMSTART 2941; GFX90A-NEXT: ; def v[0:1] 2942; GFX90A-NEXT: ;;#ASMEND 2943; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2944; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 2945; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2946; GFX90A-NEXT: s_waitcnt vmcnt(0) 2947; GFX90A-NEXT: s_setpc_b64 s[30:31] 2948; 2949; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_3_3: 2950; GFX940: ; %bb.0: 2951; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2952; GFX940-NEXT: ;;#ASMSTART 2953; GFX940-NEXT: ; def v[0:1] 2954; GFX940-NEXT: ;;#ASMEND 2955; GFX940-NEXT: v_mov_b32_e32 v2, 0 2956; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 2957; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2958; GFX940-NEXT: s_waitcnt vmcnt(0) 2959; GFX940-NEXT: s_setpc_b64 s[30:31] 2960 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2961 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 2962 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 3, i32 3> 2963 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2964 ret void 2965} 2966 2967define void @v_shuffle_v3i16_v3i16__2_3_3(ptr addrspace(1) inreg %ptr) { 2968; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_3_3: 2969; GFX900: ; %bb.0: 2970; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2971; GFX900-NEXT: v_mov_b32_e32 v2, 0 2972; GFX900-NEXT: ;;#ASMSTART 2973; GFX900-NEXT: ; def v[0:1] 2974; GFX900-NEXT: ;;#ASMEND 2975; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 2976; GFX900-NEXT: s_waitcnt vmcnt(0) 2977; GFX900-NEXT: s_setpc_b64 s[30:31] 2978; 2979; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_3_3: 2980; GFX90A: ; %bb.0: 2981; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2982; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2983; GFX90A-NEXT: ;;#ASMSTART 2984; GFX90A-NEXT: ; def v[0:1] 2985; GFX90A-NEXT: ;;#ASMEND 2986; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 2987; GFX90A-NEXT: s_waitcnt vmcnt(0) 2988; GFX90A-NEXT: s_setpc_b64 s[30:31] 2989; 2990; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_3_3: 2991; GFX940: ; %bb.0: 2992; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2993; GFX940-NEXT: v_mov_b32_e32 v2, 0 2994; GFX940-NEXT: ;;#ASMSTART 2995; GFX940-NEXT: ; def v[0:1] 2996; GFX940-NEXT: ;;#ASMEND 2997; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 2998; GFX940-NEXT: s_waitcnt vmcnt(0) 2999; GFX940-NEXT: s_setpc_b64 s[30:31] 3000 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3001 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3002 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 3, i32 3> 3003 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3004 ret void 3005} 3006 3007define void @v_shuffle_v3i16_v3i16__3_3_3(ptr addrspace(1) inreg %ptr) { 3008; GFX9-LABEL: v_shuffle_v3i16_v3i16__3_3_3: 3009; GFX9: ; %bb.0: 3010; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3011; GFX9-NEXT: s_setpc_b64 s[30:31] 3012 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3013 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3014 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 3, i32 3> 3015 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3016 ret void 3017} 3018 3019define void @v_shuffle_v3i16_v3i16__4_3_3(ptr addrspace(1) inreg %ptr) { 3020; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_3_3: 3021; GFX900: ; %bb.0: 3022; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3023; GFX900-NEXT: v_mov_b32_e32 v2, 0 3024; GFX900-NEXT: ;;#ASMSTART 3025; GFX900-NEXT: ; def v[0:1] 3026; GFX900-NEXT: ;;#ASMEND 3027; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16 3028; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 3029; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3030; GFX900-NEXT: s_waitcnt vmcnt(0) 3031; GFX900-NEXT: s_setpc_b64 s[30:31] 3032; 3033; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_3_3: 3034; GFX90A: ; %bb.0: 3035; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3036; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3037; GFX90A-NEXT: ;;#ASMSTART 3038; GFX90A-NEXT: ; def v[0:1] 3039; GFX90A-NEXT: ;;#ASMEND 3040; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16 3041; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 3042; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3043; GFX90A-NEXT: s_waitcnt vmcnt(0) 3044; GFX90A-NEXT: s_setpc_b64 s[30:31] 3045; 3046; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_3_3: 3047; GFX940: ; %bb.0: 3048; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3049; GFX940-NEXT: v_mov_b32_e32 v2, 0 3050; GFX940-NEXT: ;;#ASMSTART 3051; GFX940-NEXT: ; def v[0:1] 3052; GFX940-NEXT: ;;#ASMEND 3053; GFX940-NEXT: s_nop 0 3054; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 3055; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 3056; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3057; GFX940-NEXT: s_waitcnt vmcnt(0) 3058; GFX940-NEXT: s_setpc_b64 s[30:31] 3059 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3060 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3061 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3062 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3063 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 3, i32 3> 3064 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3065 ret void 3066} 3067 3068define void @v_shuffle_v3i16_v3i16__5_3_3(ptr addrspace(1) inreg %ptr) { 3069; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_3: 3070; GFX900: ; %bb.0: 3071; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3072; GFX900-NEXT: v_mov_b32_e32 v2, 0 3073; GFX900-NEXT: ;;#ASMSTART 3074; GFX900-NEXT: ; def v[0:1] 3075; GFX900-NEXT: ;;#ASMEND 3076; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3077; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 3078; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 3079; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3080; GFX900-NEXT: s_waitcnt vmcnt(0) 3081; GFX900-NEXT: s_setpc_b64 s[30:31] 3082; 3083; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_3: 3084; GFX90A: ; %bb.0: 3085; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3086; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3087; GFX90A-NEXT: ;;#ASMSTART 3088; GFX90A-NEXT: ; def v[0:1] 3089; GFX90A-NEXT: ;;#ASMEND 3090; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3091; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 3092; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 3093; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3094; GFX90A-NEXT: s_waitcnt vmcnt(0) 3095; GFX90A-NEXT: s_setpc_b64 s[30:31] 3096; 3097; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_3: 3098; GFX940: ; %bb.0: 3099; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3100; GFX940-NEXT: v_mov_b32_e32 v2, 0 3101; GFX940-NEXT: ;;#ASMSTART 3102; GFX940-NEXT: ; def v[0:1] 3103; GFX940-NEXT: ;;#ASMEND 3104; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3105; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 3106; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 3107; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3108; GFX940-NEXT: s_waitcnt vmcnt(0) 3109; GFX940-NEXT: s_setpc_b64 s[30:31] 3110 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3111 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3112 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3113 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3114 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 3> 3115 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3116 ret void 3117} 3118 3119define void @v_shuffle_v3i16_v3i16__5_u_3(ptr addrspace(1) inreg %ptr) { 3120; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_3: 3121; GFX900: ; %bb.0: 3122; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3123; GFX900-NEXT: v_mov_b32_e32 v2, 0 3124; GFX900-NEXT: ;;#ASMSTART 3125; GFX900-NEXT: ; def v[0:1] 3126; GFX900-NEXT: ;;#ASMEND 3127; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 3128; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3129; GFX900-NEXT: s_waitcnt vmcnt(0) 3130; GFX900-NEXT: s_setpc_b64 s[30:31] 3131; 3132; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_3: 3133; GFX90A: ; %bb.0: 3134; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3135; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3136; GFX90A-NEXT: ;;#ASMSTART 3137; GFX90A-NEXT: ; def v[0:1] 3138; GFX90A-NEXT: ;;#ASMEND 3139; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 3140; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3141; GFX90A-NEXT: s_waitcnt vmcnt(0) 3142; GFX90A-NEXT: s_setpc_b64 s[30:31] 3143; 3144; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_3: 3145; GFX940: ; %bb.0: 3146; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3147; GFX940-NEXT: v_mov_b32_e32 v2, 0 3148; GFX940-NEXT: ;;#ASMSTART 3149; GFX940-NEXT: ; def v[0:1] 3150; GFX940-NEXT: ;;#ASMEND 3151; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 3152; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3153; GFX940-NEXT: s_waitcnt vmcnt(0) 3154; GFX940-NEXT: s_setpc_b64 s[30:31] 3155 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3156 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3157 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3158 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3159 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 3> 3160 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3161 ret void 3162} 3163 3164define void @v_shuffle_v3i16_v3i16__5_0_3(ptr addrspace(1) inreg %ptr) { 3165; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_3: 3166; GFX900: ; %bb.0: 3167; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3168; GFX900-NEXT: ;;#ASMSTART 3169; GFX900-NEXT: ; def v[0:1] 3170; GFX900-NEXT: ;;#ASMEND 3171; GFX900-NEXT: v_mov_b32_e32 v3, 0 3172; GFX900-NEXT: ;;#ASMSTART 3173; GFX900-NEXT: ; def v[1:2] 3174; GFX900-NEXT: ;;#ASMEND 3175; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3176; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 3177; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 3178; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 3179; GFX900-NEXT: s_waitcnt vmcnt(0) 3180; GFX900-NEXT: s_setpc_b64 s[30:31] 3181; 3182; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_3: 3183; GFX90A: ; %bb.0: 3184; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3185; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3186; GFX90A-NEXT: ;;#ASMSTART 3187; GFX90A-NEXT: ; def v[0:1] 3188; GFX90A-NEXT: ;;#ASMEND 3189; GFX90A-NEXT: ;;#ASMSTART 3190; GFX90A-NEXT: ; def v[2:3] 3191; GFX90A-NEXT: ;;#ASMEND 3192; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3193; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 3194; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4 3195; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3196; GFX90A-NEXT: s_waitcnt vmcnt(0) 3197; GFX90A-NEXT: s_setpc_b64 s[30:31] 3198; 3199; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_3: 3200; GFX940: ; %bb.0: 3201; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3202; GFX940-NEXT: v_mov_b32_e32 v4, 0 3203; GFX940-NEXT: ;;#ASMSTART 3204; GFX940-NEXT: ; def v[0:1] 3205; GFX940-NEXT: ;;#ASMEND 3206; GFX940-NEXT: ;;#ASMSTART 3207; GFX940-NEXT: ; def v[2:3] 3208; GFX940-NEXT: ;;#ASMEND 3209; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3210; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 3211; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 3212; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3213; GFX940-NEXT: s_waitcnt vmcnt(0) 3214; GFX940-NEXT: s_setpc_b64 s[30:31] 3215 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3216 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3217 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3218 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3219 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 3> 3220 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3221 ret void 3222} 3223 3224define void @v_shuffle_v3i16_v3i16__5_1_3(ptr addrspace(1) inreg %ptr) { 3225; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_3: 3226; GFX900: ; %bb.0: 3227; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3228; GFX900-NEXT: ;;#ASMSTART 3229; GFX900-NEXT: ; def v[0:1] 3230; GFX900-NEXT: ;;#ASMEND 3231; GFX900-NEXT: v_mov_b32_e32 v3, 0 3232; GFX900-NEXT: ;;#ASMSTART 3233; GFX900-NEXT: ; def v[1:2] 3234; GFX900-NEXT: ;;#ASMEND 3235; GFX900-NEXT: s_mov_b32 s4, 0xffff 3236; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 3237; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 3238; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 3239; GFX900-NEXT: s_waitcnt vmcnt(0) 3240; GFX900-NEXT: s_setpc_b64 s[30:31] 3241; 3242; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_3: 3243; GFX90A: ; %bb.0: 3244; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3245; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3246; GFX90A-NEXT: ;;#ASMSTART 3247; GFX90A-NEXT: ; def v[0:1] 3248; GFX90A-NEXT: ;;#ASMEND 3249; GFX90A-NEXT: ;;#ASMSTART 3250; GFX90A-NEXT: ; def v[2:3] 3251; GFX90A-NEXT: ;;#ASMEND 3252; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3253; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 3254; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4 3255; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3256; GFX90A-NEXT: s_waitcnt vmcnt(0) 3257; GFX90A-NEXT: s_setpc_b64 s[30:31] 3258; 3259; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_3: 3260; GFX940: ; %bb.0: 3261; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3262; GFX940-NEXT: v_mov_b32_e32 v4, 0 3263; GFX940-NEXT: ;;#ASMSTART 3264; GFX940-NEXT: ; def v[0:1] 3265; GFX940-NEXT: ;;#ASMEND 3266; GFX940-NEXT: ;;#ASMSTART 3267; GFX940-NEXT: ; def v[2:3] 3268; GFX940-NEXT: ;;#ASMEND 3269; GFX940-NEXT: s_mov_b32 s2, 0xffff 3270; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 3271; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 3272; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3273; GFX940-NEXT: s_waitcnt vmcnt(0) 3274; GFX940-NEXT: s_setpc_b64 s[30:31] 3275 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3276 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3277 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3278 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3279 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 3> 3280 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3281 ret void 3282} 3283 3284define void @v_shuffle_v3i16_v3i16__5_2_3(ptr addrspace(1) inreg %ptr) { 3285; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_3: 3286; GFX900: ; %bb.0: 3287; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3288; GFX900-NEXT: v_mov_b32_e32 v4, 0 3289; GFX900-NEXT: ;;#ASMSTART 3290; GFX900-NEXT: ; def v[0:1] 3291; GFX900-NEXT: ;;#ASMEND 3292; GFX900-NEXT: ;;#ASMSTART 3293; GFX900-NEXT: ; def v[2:3] 3294; GFX900-NEXT: ;;#ASMEND 3295; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3296; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 3297; GFX900-NEXT: global_store_short v4, v2, s[16:17] offset:4 3298; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3299; GFX900-NEXT: s_waitcnt vmcnt(0) 3300; GFX900-NEXT: s_setpc_b64 s[30:31] 3301; 3302; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_3: 3303; GFX90A: ; %bb.0: 3304; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3305; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3306; GFX90A-NEXT: ;;#ASMSTART 3307; GFX90A-NEXT: ; def v[0:1] 3308; GFX90A-NEXT: ;;#ASMEND 3309; GFX90A-NEXT: ;;#ASMSTART 3310; GFX90A-NEXT: ; def v[2:3] 3311; GFX90A-NEXT: ;;#ASMEND 3312; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3313; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 3314; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4 3315; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3316; GFX90A-NEXT: s_waitcnt vmcnt(0) 3317; GFX90A-NEXT: s_setpc_b64 s[30:31] 3318; 3319; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_3: 3320; GFX940: ; %bb.0: 3321; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3322; GFX940-NEXT: v_mov_b32_e32 v4, 0 3323; GFX940-NEXT: ;;#ASMSTART 3324; GFX940-NEXT: ; def v[0:1] 3325; GFX940-NEXT: ;;#ASMEND 3326; GFX940-NEXT: ;;#ASMSTART 3327; GFX940-NEXT: ; def v[2:3] 3328; GFX940-NEXT: ;;#ASMEND 3329; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3330; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 3331; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 3332; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3333; GFX940-NEXT: s_waitcnt vmcnt(0) 3334; GFX940-NEXT: s_setpc_b64 s[30:31] 3335 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3336 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3337 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3338 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3339 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 3> 3340 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3341 ret void 3342} 3343 3344define void @v_shuffle_v3i16_v3i16__5_4_3(ptr addrspace(1) inreg %ptr) { 3345; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_3: 3346; GFX900: ; %bb.0: 3347; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3348; GFX900-NEXT: v_mov_b32_e32 v2, 0 3349; GFX900-NEXT: ;;#ASMSTART 3350; GFX900-NEXT: ; def v[0:1] 3351; GFX900-NEXT: ;;#ASMEND 3352; GFX900-NEXT: s_mov_b32 s4, 0xffff 3353; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 3354; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 3355; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3356; GFX900-NEXT: s_waitcnt vmcnt(0) 3357; GFX900-NEXT: s_setpc_b64 s[30:31] 3358; 3359; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_3: 3360; GFX90A: ; %bb.0: 3361; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3362; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3363; GFX90A-NEXT: ;;#ASMSTART 3364; GFX90A-NEXT: ; def v[0:1] 3365; GFX90A-NEXT: ;;#ASMEND 3366; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3367; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0 3368; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 3369; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3370; GFX90A-NEXT: s_waitcnt vmcnt(0) 3371; GFX90A-NEXT: s_setpc_b64 s[30:31] 3372; 3373; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_3: 3374; GFX940: ; %bb.0: 3375; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3376; GFX940-NEXT: v_mov_b32_e32 v2, 0 3377; GFX940-NEXT: ;;#ASMSTART 3378; GFX940-NEXT: ; def v[0:1] 3379; GFX940-NEXT: ;;#ASMEND 3380; GFX940-NEXT: s_mov_b32 s2, 0xffff 3381; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 3382; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 3383; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3384; GFX940-NEXT: s_waitcnt vmcnt(0) 3385; GFX940-NEXT: s_setpc_b64 s[30:31] 3386 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3387 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3388 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3389 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3390 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 3> 3391 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3392 ret void 3393} 3394 3395define void @v_shuffle_v3i16_v3i16__u_4_4(ptr addrspace(1) inreg %ptr) { 3396; GFX900-LABEL: v_shuffle_v3i16_v3i16__u_4_4: 3397; GFX900: ; %bb.0: 3398; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3399; GFX900-NEXT: v_mov_b32_e32 v2, 0 3400; GFX900-NEXT: ;;#ASMSTART 3401; GFX900-NEXT: ; def v[0:1] 3402; GFX900-NEXT: ;;#ASMEND 3403; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 3404; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3405; GFX900-NEXT: s_waitcnt vmcnt(0) 3406; GFX900-NEXT: s_setpc_b64 s[30:31] 3407; 3408; GFX90A-LABEL: v_shuffle_v3i16_v3i16__u_4_4: 3409; GFX90A: ; %bb.0: 3410; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3411; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3412; GFX90A-NEXT: ;;#ASMSTART 3413; GFX90A-NEXT: ; def v[0:1] 3414; GFX90A-NEXT: ;;#ASMEND 3415; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 3416; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3417; GFX90A-NEXT: s_waitcnt vmcnt(0) 3418; GFX90A-NEXT: s_setpc_b64 s[30:31] 3419; 3420; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_4_4: 3421; GFX940: ; %bb.0: 3422; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3423; GFX940-NEXT: v_mov_b32_e32 v2, 0 3424; GFX940-NEXT: ;;#ASMSTART 3425; GFX940-NEXT: ; def v[0:1] 3426; GFX940-NEXT: ;;#ASMEND 3427; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 3428; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3429; GFX940-NEXT: s_waitcnt vmcnt(0) 3430; GFX940-NEXT: s_setpc_b64 s[30:31] 3431 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3432 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3433 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3434 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3435 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 poison, i32 4, i32 4> 3436 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3437 ret void 3438} 3439 3440define void @v_shuffle_v3i16_v3i16__0_4_4(ptr addrspace(1) inreg %ptr) { 3441; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_4_4: 3442; GFX900: ; %bb.0: 3443; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3444; GFX900-NEXT: ;;#ASMSTART 3445; GFX900-NEXT: ; def v[0:1] 3446; GFX900-NEXT: ;;#ASMEND 3447; GFX900-NEXT: s_mov_b32 s4, 0xffff 3448; GFX900-NEXT: v_mov_b32_e32 v3, 0 3449; GFX900-NEXT: ;;#ASMSTART 3450; GFX900-NEXT: ; def v[1:2] 3451; GFX900-NEXT: ;;#ASMEND 3452; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 3453; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 3454; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 3455; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 3456; GFX900-NEXT: s_waitcnt vmcnt(0) 3457; GFX900-NEXT: s_setpc_b64 s[30:31] 3458; 3459; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_4_4: 3460; GFX90A: ; %bb.0: 3461; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3462; GFX90A-NEXT: ;;#ASMSTART 3463; GFX90A-NEXT: ; def v[0:1] 3464; GFX90A-NEXT: ;;#ASMEND 3465; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3466; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3467; GFX90A-NEXT: ;;#ASMSTART 3468; GFX90A-NEXT: ; def v[2:3] 3469; GFX90A-NEXT: ;;#ASMEND 3470; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2 3471; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3472; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v2 3473; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 3474; GFX90A-NEXT: s_waitcnt vmcnt(0) 3475; GFX90A-NEXT: s_setpc_b64 s[30:31] 3476; 3477; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_4_4: 3478; GFX940: ; %bb.0: 3479; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3480; GFX940-NEXT: ;;#ASMSTART 3481; GFX940-NEXT: ; def v[0:1] 3482; GFX940-NEXT: ;;#ASMEND 3483; GFX940-NEXT: s_mov_b32 s2, 0xffff 3484; GFX940-NEXT: v_mov_b32_e32 v4, 0 3485; GFX940-NEXT: ;;#ASMSTART 3486; GFX940-NEXT: ; def v[2:3] 3487; GFX940-NEXT: ;;#ASMEND 3488; GFX940-NEXT: s_nop 0 3489; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 3490; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3491; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 3492; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 3493; GFX940-NEXT: s_waitcnt vmcnt(0) 3494; GFX940-NEXT: s_setpc_b64 s[30:31] 3495 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3496 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3497 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3498 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3499 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 0, i32 4, i32 4> 3500 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3501 ret void 3502} 3503 3504define void @v_shuffle_v3i16_v3i16__1_4_4(ptr addrspace(1) inreg %ptr) { 3505; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_4_4: 3506; GFX900: ; %bb.0: 3507; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3508; GFX900-NEXT: ;;#ASMSTART 3509; GFX900-NEXT: ; def v[0:1] 3510; GFX900-NEXT: ;;#ASMEND 3511; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3512; GFX900-NEXT: v_mov_b32_e32 v3, 0 3513; GFX900-NEXT: ;;#ASMSTART 3514; GFX900-NEXT: ; def v[1:2] 3515; GFX900-NEXT: ;;#ASMEND 3516; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 3517; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 3518; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 3519; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 3520; GFX900-NEXT: s_waitcnt vmcnt(0) 3521; GFX900-NEXT: s_setpc_b64 s[30:31] 3522; 3523; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_4_4: 3524; GFX90A: ; %bb.0: 3525; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3526; GFX90A-NEXT: ;;#ASMSTART 3527; GFX90A-NEXT: ; def v[0:1] 3528; GFX90A-NEXT: ;;#ASMEND 3529; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3530; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3531; GFX90A-NEXT: ;;#ASMSTART 3532; GFX90A-NEXT: ; def v[2:3] 3533; GFX90A-NEXT: ;;#ASMEND 3534; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 3535; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3536; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v2 3537; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 3538; GFX90A-NEXT: s_waitcnt vmcnt(0) 3539; GFX90A-NEXT: s_setpc_b64 s[30:31] 3540; 3541; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_4_4: 3542; GFX940: ; %bb.0: 3543; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3544; GFX940-NEXT: ;;#ASMSTART 3545; GFX940-NEXT: ; def v[0:1] 3546; GFX940-NEXT: ;;#ASMEND 3547; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3548; GFX940-NEXT: v_mov_b32_e32 v4, 0 3549; GFX940-NEXT: ;;#ASMSTART 3550; GFX940-NEXT: ; def v[2:3] 3551; GFX940-NEXT: ;;#ASMEND 3552; GFX940-NEXT: s_nop 0 3553; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 3554; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3555; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 3556; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 3557; GFX940-NEXT: s_waitcnt vmcnt(0) 3558; GFX940-NEXT: s_setpc_b64 s[30:31] 3559 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3560 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3561 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3562 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3563 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 1, i32 4, i32 4> 3564 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3565 ret void 3566} 3567 3568define void @v_shuffle_v3i16_v3i16__2_4_4(ptr addrspace(1) inreg %ptr) { 3569; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_4_4: 3570; GFX900: ; %bb.0: 3571; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3572; GFX900-NEXT: ;;#ASMSTART 3573; GFX900-NEXT: ; def v[0:1] 3574; GFX900-NEXT: ;;#ASMEND 3575; GFX900-NEXT: s_mov_b32 s4, 0xffff 3576; GFX900-NEXT: v_mov_b32_e32 v4, 0 3577; GFX900-NEXT: ;;#ASMSTART 3578; GFX900-NEXT: ; def v[2:3] 3579; GFX900-NEXT: ;;#ASMEND 3580; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2 3581; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 3582; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3583; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3584; GFX900-NEXT: s_waitcnt vmcnt(0) 3585; GFX900-NEXT: s_setpc_b64 s[30:31] 3586; 3587; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_4_4: 3588; GFX90A: ; %bb.0: 3589; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3590; GFX90A-NEXT: ;;#ASMSTART 3591; GFX90A-NEXT: ; def v[0:1] 3592; GFX90A-NEXT: ;;#ASMEND 3593; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3594; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3595; GFX90A-NEXT: ;;#ASMSTART 3596; GFX90A-NEXT: ; def v[2:3] 3597; GFX90A-NEXT: ;;#ASMEND 3598; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2 3599; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2 3600; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3601; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3602; GFX90A-NEXT: s_waitcnt vmcnt(0) 3603; GFX90A-NEXT: s_setpc_b64 s[30:31] 3604; 3605; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_4_4: 3606; GFX940: ; %bb.0: 3607; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3608; GFX940-NEXT: ;;#ASMSTART 3609; GFX940-NEXT: ; def v[0:1] 3610; GFX940-NEXT: ;;#ASMEND 3611; GFX940-NEXT: s_mov_b32 s2, 0xffff 3612; GFX940-NEXT: v_mov_b32_e32 v4, 0 3613; GFX940-NEXT: ;;#ASMSTART 3614; GFX940-NEXT: ; def v[2:3] 3615; GFX940-NEXT: ;;#ASMEND 3616; GFX940-NEXT: s_nop 0 3617; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 3618; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 3619; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3620; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3621; GFX940-NEXT: s_waitcnt vmcnt(0) 3622; GFX940-NEXT: s_setpc_b64 s[30:31] 3623 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3624 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3625 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3626 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3627 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 2, i32 4, i32 4> 3628 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3629 ret void 3630} 3631 3632define void @v_shuffle_v3i16_v3i16__3_4_4(ptr addrspace(1) inreg %ptr) { 3633; GFX900-LABEL: v_shuffle_v3i16_v3i16__3_4_4: 3634; GFX900: ; %bb.0: 3635; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3636; GFX900-NEXT: v_mov_b32_e32 v2, 0 3637; GFX900-NEXT: ;;#ASMSTART 3638; GFX900-NEXT: ; def v[0:1] 3639; GFX900-NEXT: ;;#ASMEND 3640; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 3641; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3642; GFX900-NEXT: s_waitcnt vmcnt(0) 3643; GFX900-NEXT: s_setpc_b64 s[30:31] 3644; 3645; GFX90A-LABEL: v_shuffle_v3i16_v3i16__3_4_4: 3646; GFX90A: ; %bb.0: 3647; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3648; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3649; GFX90A-NEXT: ;;#ASMSTART 3650; GFX90A-NEXT: ; def v[0:1] 3651; GFX90A-NEXT: ;;#ASMEND 3652; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 3653; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3654; GFX90A-NEXT: s_waitcnt vmcnt(0) 3655; GFX90A-NEXT: s_setpc_b64 s[30:31] 3656; 3657; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_4_4: 3658; GFX940: ; %bb.0: 3659; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3660; GFX940-NEXT: v_mov_b32_e32 v2, 0 3661; GFX940-NEXT: ;;#ASMSTART 3662; GFX940-NEXT: ; def v[0:1] 3663; GFX940-NEXT: ;;#ASMEND 3664; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 3665; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3666; GFX940-NEXT: s_waitcnt vmcnt(0) 3667; GFX940-NEXT: s_setpc_b64 s[30:31] 3668 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3669 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3670 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3671 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3672 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 3, i32 4, i32 4> 3673 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3674 ret void 3675} 3676 3677define void @v_shuffle_v3i16_v3i16__4_4_4(ptr addrspace(1) inreg %ptr) { 3678; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_4_4: 3679; GFX900: ; %bb.0: 3680; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3681; GFX900-NEXT: ;;#ASMSTART 3682; GFX900-NEXT: ; def v[0:1] 3683; GFX900-NEXT: ;;#ASMEND 3684; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3685; GFX900-NEXT: v_mov_b32_e32 v2, 0 3686; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 3687; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3688; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3689; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 3690; GFX900-NEXT: s_waitcnt vmcnt(0) 3691; GFX900-NEXT: s_setpc_b64 s[30:31] 3692; 3693; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_4_4: 3694; GFX90A: ; %bb.0: 3695; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3696; GFX90A-NEXT: ;;#ASMSTART 3697; GFX90A-NEXT: ; def v[0:1] 3698; GFX90A-NEXT: ;;#ASMEND 3699; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3700; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3701; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 3702; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3703; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3704; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 3705; GFX90A-NEXT: s_waitcnt vmcnt(0) 3706; GFX90A-NEXT: s_setpc_b64 s[30:31] 3707; 3708; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_4_4: 3709; GFX940: ; %bb.0: 3710; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3711; GFX940-NEXT: ;;#ASMSTART 3712; GFX940-NEXT: ; def v[0:1] 3713; GFX940-NEXT: ;;#ASMEND 3714; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3715; GFX940-NEXT: v_mov_b32_e32 v2, 0 3716; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 3717; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3718; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3719; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 3720; GFX940-NEXT: s_waitcnt vmcnt(0) 3721; GFX940-NEXT: s_setpc_b64 s[30:31] 3722 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3723 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3724 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3725 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3726 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 4, i32 4> 3727 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3728 ret void 3729} 3730 3731define void @v_shuffle_v3i16_v3i16__5_4_4(ptr addrspace(1) inreg %ptr) { 3732; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_4: 3733; GFX900: ; %bb.0: 3734; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3735; GFX900-NEXT: ;;#ASMSTART 3736; GFX900-NEXT: ; def v[0:1] 3737; GFX900-NEXT: ;;#ASMEND 3738; GFX900-NEXT: s_mov_b32 s4, 0xffff 3739; GFX900-NEXT: v_mov_b32_e32 v2, 0 3740; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 3741; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3742; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 3743; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3744; GFX900-NEXT: s_waitcnt vmcnt(0) 3745; GFX900-NEXT: s_setpc_b64 s[30:31] 3746; 3747; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_4: 3748; GFX90A: ; %bb.0: 3749; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3750; GFX90A-NEXT: ;;#ASMSTART 3751; GFX90A-NEXT: ; def v[0:1] 3752; GFX90A-NEXT: ;;#ASMEND 3753; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3754; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3755; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0 3756; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3757; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 3758; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3759; GFX90A-NEXT: s_waitcnt vmcnt(0) 3760; GFX90A-NEXT: s_setpc_b64 s[30:31] 3761; 3762; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_4: 3763; GFX940: ; %bb.0: 3764; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3765; GFX940-NEXT: ;;#ASMSTART 3766; GFX940-NEXT: ; def v[0:1] 3767; GFX940-NEXT: ;;#ASMEND 3768; GFX940-NEXT: s_mov_b32 s2, 0xffff 3769; GFX940-NEXT: v_mov_b32_e32 v2, 0 3770; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 3771; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 3772; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 3773; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3774; GFX940-NEXT: s_waitcnt vmcnt(0) 3775; GFX940-NEXT: s_setpc_b64 s[30:31] 3776 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3777 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3778 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3779 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3780 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 4> 3781 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3782 ret void 3783} 3784 3785define void @v_shuffle_v3i16_v3i16__5_u_4(ptr addrspace(1) inreg %ptr) { 3786; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_4: 3787; GFX900: ; %bb.0: 3788; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3789; GFX900-NEXT: v_mov_b32_e32 v2, 0 3790; GFX900-NEXT: ;;#ASMSTART 3791; GFX900-NEXT: ; def v[0:1] 3792; GFX900-NEXT: ;;#ASMEND 3793; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 3794; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3795; GFX900-NEXT: s_waitcnt vmcnt(0) 3796; GFX900-NEXT: s_setpc_b64 s[30:31] 3797; 3798; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_4: 3799; GFX90A: ; %bb.0: 3800; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3801; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3802; GFX90A-NEXT: ;;#ASMSTART 3803; GFX90A-NEXT: ; def v[0:1] 3804; GFX90A-NEXT: ;;#ASMEND 3805; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 3806; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3807; GFX90A-NEXT: s_waitcnt vmcnt(0) 3808; GFX90A-NEXT: s_setpc_b64 s[30:31] 3809; 3810; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_4: 3811; GFX940: ; %bb.0: 3812; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3813; GFX940-NEXT: v_mov_b32_e32 v2, 0 3814; GFX940-NEXT: ;;#ASMSTART 3815; GFX940-NEXT: ; def v[0:1] 3816; GFX940-NEXT: ;;#ASMEND 3817; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 3818; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3819; GFX940-NEXT: s_waitcnt vmcnt(0) 3820; GFX940-NEXT: s_setpc_b64 s[30:31] 3821 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3822 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3823 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3824 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3825 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 4> 3826 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3827 ret void 3828} 3829 3830define void @v_shuffle_v3i16_v3i16__5_0_4(ptr addrspace(1) inreg %ptr) { 3831; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_4: 3832; GFX900: ; %bb.0: 3833; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3834; GFX900-NEXT: ;;#ASMSTART 3835; GFX900-NEXT: ; def v[0:1] 3836; GFX900-NEXT: ;;#ASMEND 3837; GFX900-NEXT: v_mov_b32_e32 v3, 0 3838; GFX900-NEXT: ;;#ASMSTART 3839; GFX900-NEXT: ; def v[1:2] 3840; GFX900-NEXT: ;;#ASMEND 3841; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3842; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 3843; GFX900-NEXT: global_store_short_d16_hi v3, v1, s[16:17] offset:4 3844; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 3845; GFX900-NEXT: s_waitcnt vmcnt(0) 3846; GFX900-NEXT: s_setpc_b64 s[30:31] 3847; 3848; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_4: 3849; GFX90A: ; %bb.0: 3850; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3851; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3852; GFX90A-NEXT: ;;#ASMSTART 3853; GFX90A-NEXT: ; def v[0:1] 3854; GFX90A-NEXT: ;;#ASMEND 3855; GFX90A-NEXT: ;;#ASMSTART 3856; GFX90A-NEXT: ; def v[2:3] 3857; GFX90A-NEXT: ;;#ASMEND 3858; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3859; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 3860; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 3861; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3862; GFX90A-NEXT: s_waitcnt vmcnt(0) 3863; GFX90A-NEXT: s_setpc_b64 s[30:31] 3864; 3865; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_4: 3866; GFX940: ; %bb.0: 3867; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3868; GFX940-NEXT: v_mov_b32_e32 v4, 0 3869; GFX940-NEXT: ;;#ASMSTART 3870; GFX940-NEXT: ; def v[0:1] 3871; GFX940-NEXT: ;;#ASMEND 3872; GFX940-NEXT: ;;#ASMSTART 3873; GFX940-NEXT: ; def v[2:3] 3874; GFX940-NEXT: ;;#ASMEND 3875; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3876; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 3877; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 3878; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3879; GFX940-NEXT: s_waitcnt vmcnt(0) 3880; GFX940-NEXT: s_setpc_b64 s[30:31] 3881 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3882 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3883 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3884 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3885 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 4> 3886 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3887 ret void 3888} 3889 3890define void @v_shuffle_v3i16_v3i16__5_1_4(ptr addrspace(1) inreg %ptr) { 3891; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_4: 3892; GFX900: ; %bb.0: 3893; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3894; GFX900-NEXT: ;;#ASMSTART 3895; GFX900-NEXT: ; def v[0:1] 3896; GFX900-NEXT: ;;#ASMEND 3897; GFX900-NEXT: v_mov_b32_e32 v3, 0 3898; GFX900-NEXT: ;;#ASMSTART 3899; GFX900-NEXT: ; def v[1:2] 3900; GFX900-NEXT: ;;#ASMEND 3901; GFX900-NEXT: s_mov_b32 s4, 0xffff 3902; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 3903; GFX900-NEXT: global_store_short_d16_hi v3, v1, s[16:17] offset:4 3904; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 3905; GFX900-NEXT: s_waitcnt vmcnt(0) 3906; GFX900-NEXT: s_setpc_b64 s[30:31] 3907; 3908; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_4: 3909; GFX90A: ; %bb.0: 3910; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3911; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3912; GFX90A-NEXT: ;;#ASMSTART 3913; GFX90A-NEXT: ; def v[0:1] 3914; GFX90A-NEXT: ;;#ASMEND 3915; GFX90A-NEXT: ;;#ASMSTART 3916; GFX90A-NEXT: ; def v[2:3] 3917; GFX90A-NEXT: ;;#ASMEND 3918; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3919; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 3920; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 3921; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3922; GFX90A-NEXT: s_waitcnt vmcnt(0) 3923; GFX90A-NEXT: s_setpc_b64 s[30:31] 3924; 3925; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_4: 3926; GFX940: ; %bb.0: 3927; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3928; GFX940-NEXT: v_mov_b32_e32 v4, 0 3929; GFX940-NEXT: ;;#ASMSTART 3930; GFX940-NEXT: ; def v[0:1] 3931; GFX940-NEXT: ;;#ASMEND 3932; GFX940-NEXT: ;;#ASMSTART 3933; GFX940-NEXT: ; def v[2:3] 3934; GFX940-NEXT: ;;#ASMEND 3935; GFX940-NEXT: s_mov_b32 s2, 0xffff 3936; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 3937; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 3938; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3939; GFX940-NEXT: s_waitcnt vmcnt(0) 3940; GFX940-NEXT: s_setpc_b64 s[30:31] 3941 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3942 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3943 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3944 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 3945 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 4> 3946 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3947 ret void 3948} 3949 3950define void @v_shuffle_v3i16_v3i16__5_2_4(ptr addrspace(1) inreg %ptr) { 3951; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_4: 3952; GFX900: ; %bb.0: 3953; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3954; GFX900-NEXT: v_mov_b32_e32 v4, 0 3955; GFX900-NEXT: ;;#ASMSTART 3956; GFX900-NEXT: ; def v[0:1] 3957; GFX900-NEXT: ;;#ASMEND 3958; GFX900-NEXT: ;;#ASMSTART 3959; GFX900-NEXT: ; def v[2:3] 3960; GFX900-NEXT: ;;#ASMEND 3961; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3962; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 3963; GFX900-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 3964; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3965; GFX900-NEXT: s_waitcnt vmcnt(0) 3966; GFX900-NEXT: s_setpc_b64 s[30:31] 3967; 3968; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_4: 3969; GFX90A: ; %bb.0: 3970; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3971; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3972; GFX90A-NEXT: ;;#ASMSTART 3973; GFX90A-NEXT: ; def v[0:1] 3974; GFX90A-NEXT: ;;#ASMEND 3975; GFX90A-NEXT: ;;#ASMSTART 3976; GFX90A-NEXT: ; def v[2:3] 3977; GFX90A-NEXT: ;;#ASMEND 3978; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3979; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 3980; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 3981; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3982; GFX90A-NEXT: s_waitcnt vmcnt(0) 3983; GFX90A-NEXT: s_setpc_b64 s[30:31] 3984; 3985; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_4: 3986; GFX940: ; %bb.0: 3987; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3988; GFX940-NEXT: v_mov_b32_e32 v4, 0 3989; GFX940-NEXT: ;;#ASMSTART 3990; GFX940-NEXT: ; def v[0:1] 3991; GFX940-NEXT: ;;#ASMEND 3992; GFX940-NEXT: ;;#ASMSTART 3993; GFX940-NEXT: ; def v[2:3] 3994; GFX940-NEXT: ;;#ASMEND 3995; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3996; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 3997; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 3998; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3999; GFX940-NEXT: s_waitcnt vmcnt(0) 4000; GFX940-NEXT: s_setpc_b64 s[30:31] 4001 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4002 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4003 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4004 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4005 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 4> 4006 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4007 ret void 4008} 4009 4010define void @v_shuffle_v3i16_v3i16__5_3_4(ptr addrspace(1) inreg %ptr) { 4011; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_4: 4012; GFX900: ; %bb.0: 4013; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4014; GFX900-NEXT: v_mov_b32_e32 v2, 0 4015; GFX900-NEXT: ;;#ASMSTART 4016; GFX900-NEXT: ; def v[0:1] 4017; GFX900-NEXT: ;;#ASMEND 4018; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4019; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 4020; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 4021; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4022; GFX900-NEXT: s_waitcnt vmcnt(0) 4023; GFX900-NEXT: s_setpc_b64 s[30:31] 4024; 4025; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_4: 4026; GFX90A: ; %bb.0: 4027; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4028; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4029; GFX90A-NEXT: ;;#ASMSTART 4030; GFX90A-NEXT: ; def v[0:1] 4031; GFX90A-NEXT: ;;#ASMEND 4032; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4033; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 4034; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 4035; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4036; GFX90A-NEXT: s_waitcnt vmcnt(0) 4037; GFX90A-NEXT: s_setpc_b64 s[30:31] 4038; 4039; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_4: 4040; GFX940: ; %bb.0: 4041; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4042; GFX940-NEXT: v_mov_b32_e32 v2, 0 4043; GFX940-NEXT: ;;#ASMSTART 4044; GFX940-NEXT: ; def v[0:1] 4045; GFX940-NEXT: ;;#ASMEND 4046; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4047; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 4048; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 4049; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4050; GFX940-NEXT: s_waitcnt vmcnt(0) 4051; GFX940-NEXT: s_setpc_b64 s[30:31] 4052 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4053 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4054 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4055 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4056 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 4> 4057 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4058 ret void 4059} 4060 4061define void @v_shuffle_v3i16_v3i16__u_5_5(ptr addrspace(1) inreg %ptr) { 4062; GFX900-LABEL: v_shuffle_v3i16_v3i16__u_5_5: 4063; GFX900: ; %bb.0: 4064; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4065; GFX900-NEXT: v_mov_b32_e32 v2, 0 4066; GFX900-NEXT: ;;#ASMSTART 4067; GFX900-NEXT: ; def v[0:1] 4068; GFX900-NEXT: ;;#ASMEND 4069; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 4070; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 4071; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 4072; GFX900-NEXT: s_waitcnt vmcnt(0) 4073; GFX900-NEXT: s_setpc_b64 s[30:31] 4074; 4075; GFX90A-LABEL: v_shuffle_v3i16_v3i16__u_5_5: 4076; GFX90A: ; %bb.0: 4077; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4078; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4079; GFX90A-NEXT: ;;#ASMSTART 4080; GFX90A-NEXT: ; def v[0:1] 4081; GFX90A-NEXT: ;;#ASMEND 4082; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 4083; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 4084; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4085; GFX90A-NEXT: s_waitcnt vmcnt(0) 4086; GFX90A-NEXT: s_setpc_b64 s[30:31] 4087; 4088; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_5_5: 4089; GFX940: ; %bb.0: 4090; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4091; GFX940-NEXT: v_mov_b32_e32 v2, 0 4092; GFX940-NEXT: ;;#ASMSTART 4093; GFX940-NEXT: ; def v[0:1] 4094; GFX940-NEXT: ;;#ASMEND 4095; GFX940-NEXT: s_nop 0 4096; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 4097; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 4098; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4099; GFX940-NEXT: s_waitcnt vmcnt(0) 4100; GFX940-NEXT: s_setpc_b64 s[30:31] 4101 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4102 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4103 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4104 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4105 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 poison, i32 5, i32 5> 4106 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4107 ret void 4108} 4109 4110define void @v_shuffle_v3i16_v3i16__0_5_5(ptr addrspace(1) inreg %ptr) { 4111; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_5_5: 4112; GFX900: ; %bb.0: 4113; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4114; GFX900-NEXT: ;;#ASMSTART 4115; GFX900-NEXT: ; def v[0:1] 4116; GFX900-NEXT: ;;#ASMEND 4117; GFX900-NEXT: v_mov_b32_e32 v3, 0 4118; GFX900-NEXT: ;;#ASMSTART 4119; GFX900-NEXT: ; def v[1:2] 4120; GFX900-NEXT: ;;#ASMEND 4121; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4122; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 4123; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 4124; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 4125; GFX900-NEXT: s_waitcnt vmcnt(0) 4126; GFX900-NEXT: s_setpc_b64 s[30:31] 4127; 4128; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_5_5: 4129; GFX90A: ; %bb.0: 4130; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4131; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4132; GFX90A-NEXT: ;;#ASMSTART 4133; GFX90A-NEXT: ; def v[0:1] 4134; GFX90A-NEXT: ;;#ASMEND 4135; GFX90A-NEXT: ;;#ASMSTART 4136; GFX90A-NEXT: ; def v[2:3] 4137; GFX90A-NEXT: ;;#ASMEND 4138; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4139; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 4140; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 4141; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4142; GFX90A-NEXT: s_waitcnt vmcnt(0) 4143; GFX90A-NEXT: s_setpc_b64 s[30:31] 4144; 4145; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_5_5: 4146; GFX940: ; %bb.0: 4147; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4148; GFX940-NEXT: v_mov_b32_e32 v4, 0 4149; GFX940-NEXT: ;;#ASMSTART 4150; GFX940-NEXT: ; def v[0:1] 4151; GFX940-NEXT: ;;#ASMEND 4152; GFX940-NEXT: ;;#ASMSTART 4153; GFX940-NEXT: ; def v[2:3] 4154; GFX940-NEXT: ;;#ASMEND 4155; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4156; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 4157; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 4158; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4159; GFX940-NEXT: s_waitcnt vmcnt(0) 4160; GFX940-NEXT: s_setpc_b64 s[30:31] 4161 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4162 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4163 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4164 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4165 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 0, i32 5, i32 5> 4166 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4167 ret void 4168} 4169 4170define void @v_shuffle_v3i16_v3i16__1_5_5(ptr addrspace(1) inreg %ptr) { 4171; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_5_5: 4172; GFX900: ; %bb.0: 4173; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4174; GFX900-NEXT: ;;#ASMSTART 4175; GFX900-NEXT: ; def v[0:1] 4176; GFX900-NEXT: ;;#ASMEND 4177; GFX900-NEXT: v_mov_b32_e32 v3, 0 4178; GFX900-NEXT: ;;#ASMSTART 4179; GFX900-NEXT: ; def v[1:2] 4180; GFX900-NEXT: ;;#ASMEND 4181; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16 4182; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 4183; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 4184; GFX900-NEXT: s_waitcnt vmcnt(0) 4185; GFX900-NEXT: s_setpc_b64 s[30:31] 4186; 4187; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_5_5: 4188; GFX90A: ; %bb.0: 4189; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4190; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4191; GFX90A-NEXT: ;;#ASMSTART 4192; GFX90A-NEXT: ; def v[0:1] 4193; GFX90A-NEXT: ;;#ASMEND 4194; GFX90A-NEXT: ;;#ASMSTART 4195; GFX90A-NEXT: ; def v[2:3] 4196; GFX90A-NEXT: ;;#ASMEND 4197; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16 4198; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 4199; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4200; GFX90A-NEXT: s_waitcnt vmcnt(0) 4201; GFX90A-NEXT: s_setpc_b64 s[30:31] 4202; 4203; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_5_5: 4204; GFX940: ; %bb.0: 4205; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4206; GFX940-NEXT: v_mov_b32_e32 v4, 0 4207; GFX940-NEXT: ;;#ASMSTART 4208; GFX940-NEXT: ; def v[0:1] 4209; GFX940-NEXT: ;;#ASMEND 4210; GFX940-NEXT: ;;#ASMSTART 4211; GFX940-NEXT: ; def v[2:3] 4212; GFX940-NEXT: ;;#ASMEND 4213; GFX940-NEXT: s_nop 0 4214; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 4215; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 4216; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4217; GFX940-NEXT: s_waitcnt vmcnt(0) 4218; GFX940-NEXT: s_setpc_b64 s[30:31] 4219 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4220 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4221 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4222 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4223 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 1, i32 5, i32 5> 4224 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4225 ret void 4226} 4227 4228define void @v_shuffle_v3i16_v3i16__2_5_5(ptr addrspace(1) inreg %ptr) { 4229; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_5_5: 4230; GFX900: ; %bb.0: 4231; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4232; GFX900-NEXT: v_mov_b32_e32 v4, 0 4233; GFX900-NEXT: ;;#ASMSTART 4234; GFX900-NEXT: ; def v[0:1] 4235; GFX900-NEXT: ;;#ASMEND 4236; GFX900-NEXT: ;;#ASMSTART 4237; GFX900-NEXT: ; def v[2:3] 4238; GFX900-NEXT: ;;#ASMEND 4239; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4240; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 4241; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4 4242; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4243; GFX900-NEXT: s_waitcnt vmcnt(0) 4244; GFX900-NEXT: s_setpc_b64 s[30:31] 4245; 4246; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_5_5: 4247; GFX90A: ; %bb.0: 4248; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4249; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4250; GFX90A-NEXT: ;;#ASMSTART 4251; GFX90A-NEXT: ; def v[0:1] 4252; GFX90A-NEXT: ;;#ASMEND 4253; GFX90A-NEXT: ;;#ASMSTART 4254; GFX90A-NEXT: ; def v[2:3] 4255; GFX90A-NEXT: ;;#ASMEND 4256; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4257; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 4258; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 4259; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4260; GFX90A-NEXT: s_waitcnt vmcnt(0) 4261; GFX90A-NEXT: s_setpc_b64 s[30:31] 4262; 4263; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_5_5: 4264; GFX940: ; %bb.0: 4265; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4266; GFX940-NEXT: v_mov_b32_e32 v4, 0 4267; GFX940-NEXT: ;;#ASMSTART 4268; GFX940-NEXT: ; def v[0:1] 4269; GFX940-NEXT: ;;#ASMEND 4270; GFX940-NEXT: ;;#ASMSTART 4271; GFX940-NEXT: ; def v[2:3] 4272; GFX940-NEXT: ;;#ASMEND 4273; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4274; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 4275; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 4276; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4277; GFX940-NEXT: s_waitcnt vmcnt(0) 4278; GFX940-NEXT: s_setpc_b64 s[30:31] 4279 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4280 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4281 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4282 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4283 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 2, i32 5, i32 5> 4284 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4285 ret void 4286} 4287 4288define void @v_shuffle_v3i16_v3i16__3_5_5(ptr addrspace(1) inreg %ptr) { 4289; GFX900-LABEL: v_shuffle_v3i16_v3i16__3_5_5: 4290; GFX900: ; %bb.0: 4291; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4292; GFX900-NEXT: v_mov_b32_e32 v2, 0 4293; GFX900-NEXT: ;;#ASMSTART 4294; GFX900-NEXT: ; def v[0:1] 4295; GFX900-NEXT: ;;#ASMEND 4296; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4297; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 4298; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 4299; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 4300; GFX900-NEXT: s_waitcnt vmcnt(0) 4301; GFX900-NEXT: s_setpc_b64 s[30:31] 4302; 4303; GFX90A-LABEL: v_shuffle_v3i16_v3i16__3_5_5: 4304; GFX90A: ; %bb.0: 4305; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4306; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4307; GFX90A-NEXT: ;;#ASMSTART 4308; GFX90A-NEXT: ; def v[0:1] 4309; GFX90A-NEXT: ;;#ASMEND 4310; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4311; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 4312; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 4313; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4314; GFX90A-NEXT: s_waitcnt vmcnt(0) 4315; GFX90A-NEXT: s_setpc_b64 s[30:31] 4316; 4317; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_5_5: 4318; GFX940: ; %bb.0: 4319; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4320; GFX940-NEXT: v_mov_b32_e32 v2, 0 4321; GFX940-NEXT: ;;#ASMSTART 4322; GFX940-NEXT: ; def v[0:1] 4323; GFX940-NEXT: ;;#ASMEND 4324; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4325; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 4326; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 4327; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4328; GFX940-NEXT: s_waitcnt vmcnt(0) 4329; GFX940-NEXT: s_setpc_b64 s[30:31] 4330 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4331 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4332 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4333 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4334 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 3, i32 5, i32 5> 4335 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4336 ret void 4337} 4338 4339define void @v_shuffle_v3i16_v3i16__4_5_5(ptr addrspace(1) inreg %ptr) { 4340; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_5_5: 4341; GFX900: ; %bb.0: 4342; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4343; GFX900-NEXT: v_mov_b32_e32 v2, 0 4344; GFX900-NEXT: ;;#ASMSTART 4345; GFX900-NEXT: ; def v[0:1] 4346; GFX900-NEXT: ;;#ASMEND 4347; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 4348; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 4349; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 4350; GFX900-NEXT: s_waitcnt vmcnt(0) 4351; GFX900-NEXT: s_setpc_b64 s[30:31] 4352; 4353; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_5_5: 4354; GFX90A: ; %bb.0: 4355; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4356; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4357; GFX90A-NEXT: ;;#ASMSTART 4358; GFX90A-NEXT: ; def v[0:1] 4359; GFX90A-NEXT: ;;#ASMEND 4360; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 4361; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 4362; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4363; GFX90A-NEXT: s_waitcnt vmcnt(0) 4364; GFX90A-NEXT: s_setpc_b64 s[30:31] 4365; 4366; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_5_5: 4367; GFX940: ; %bb.0: 4368; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4369; GFX940-NEXT: v_mov_b32_e32 v2, 0 4370; GFX940-NEXT: ;;#ASMSTART 4371; GFX940-NEXT: ; def v[0:1] 4372; GFX940-NEXT: ;;#ASMEND 4373; GFX940-NEXT: s_nop 0 4374; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 4375; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 4376; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4377; GFX940-NEXT: s_waitcnt vmcnt(0) 4378; GFX940-NEXT: s_setpc_b64 s[30:31] 4379 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4380 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4381 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4382 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4383 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 5, i32 5> 4384 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4385 ret void 4386} 4387 4388define void @v_shuffle_v3i16_v3i16__5_u_5(ptr addrspace(1) inreg %ptr) { 4389; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_5: 4390; GFX900: ; %bb.0: 4391; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4392; GFX900-NEXT: v_mov_b32_e32 v2, 0 4393; GFX900-NEXT: ;;#ASMSTART 4394; GFX900-NEXT: ; def v[0:1] 4395; GFX900-NEXT: ;;#ASMEND 4396; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 4397; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4398; GFX900-NEXT: s_waitcnt vmcnt(0) 4399; GFX900-NEXT: s_setpc_b64 s[30:31] 4400; 4401; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_5: 4402; GFX90A: ; %bb.0: 4403; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4404; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4405; GFX90A-NEXT: ;;#ASMSTART 4406; GFX90A-NEXT: ; def v[0:1] 4407; GFX90A-NEXT: ;;#ASMEND 4408; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 4409; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4410; GFX90A-NEXT: s_waitcnt vmcnt(0) 4411; GFX90A-NEXT: s_setpc_b64 s[30:31] 4412; 4413; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_5: 4414; GFX940: ; %bb.0: 4415; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4416; GFX940-NEXT: v_mov_b32_e32 v2, 0 4417; GFX940-NEXT: ;;#ASMSTART 4418; GFX940-NEXT: ; def v[0:1] 4419; GFX940-NEXT: ;;#ASMEND 4420; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 4421; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4422; GFX940-NEXT: s_waitcnt vmcnt(0) 4423; GFX940-NEXT: s_setpc_b64 s[30:31] 4424 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4425 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4426 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4427 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4428 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 5> 4429 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4430 ret void 4431} 4432 4433define void @v_shuffle_v3i16_v3i16__5_0_5(ptr addrspace(1) inreg %ptr) { 4434; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_5: 4435; GFX900: ; %bb.0: 4436; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4437; GFX900-NEXT: ;;#ASMSTART 4438; GFX900-NEXT: ; def v[0:1] 4439; GFX900-NEXT: ;;#ASMEND 4440; GFX900-NEXT: v_mov_b32_e32 v3, 0 4441; GFX900-NEXT: ;;#ASMSTART 4442; GFX900-NEXT: ; def v[1:2] 4443; GFX900-NEXT: ;;#ASMEND 4444; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4445; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 4446; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 4447; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 4448; GFX900-NEXT: s_waitcnt vmcnt(0) 4449; GFX900-NEXT: s_setpc_b64 s[30:31] 4450; 4451; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_5: 4452; GFX90A: ; %bb.0: 4453; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4454; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4455; GFX90A-NEXT: ;;#ASMSTART 4456; GFX90A-NEXT: ; def v[0:1] 4457; GFX90A-NEXT: ;;#ASMEND 4458; GFX90A-NEXT: ;;#ASMSTART 4459; GFX90A-NEXT: ; def v[2:3] 4460; GFX90A-NEXT: ;;#ASMEND 4461; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4462; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 4463; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 4464; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4465; GFX90A-NEXT: s_waitcnt vmcnt(0) 4466; GFX90A-NEXT: s_setpc_b64 s[30:31] 4467; 4468; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_5: 4469; GFX940: ; %bb.0: 4470; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4471; GFX940-NEXT: v_mov_b32_e32 v4, 0 4472; GFX940-NEXT: ;;#ASMSTART 4473; GFX940-NEXT: ; def v[0:1] 4474; GFX940-NEXT: ;;#ASMEND 4475; GFX940-NEXT: ;;#ASMSTART 4476; GFX940-NEXT: ; def v[2:3] 4477; GFX940-NEXT: ;;#ASMEND 4478; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4479; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 4480; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 4481; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4482; GFX940-NEXT: s_waitcnt vmcnt(0) 4483; GFX940-NEXT: s_setpc_b64 s[30:31] 4484 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4485 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4486 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4487 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4488 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 5> 4489 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4490 ret void 4491} 4492 4493define void @v_shuffle_v3i16_v3i16__5_1_5(ptr addrspace(1) inreg %ptr) { 4494; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_5: 4495; GFX900: ; %bb.0: 4496; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4497; GFX900-NEXT: ;;#ASMSTART 4498; GFX900-NEXT: ; def v[0:1] 4499; GFX900-NEXT: ;;#ASMEND 4500; GFX900-NEXT: v_mov_b32_e32 v3, 0 4501; GFX900-NEXT: ;;#ASMSTART 4502; GFX900-NEXT: ; def v[1:2] 4503; GFX900-NEXT: ;;#ASMEND 4504; GFX900-NEXT: s_mov_b32 s4, 0xffff 4505; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 4506; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 4507; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 4508; GFX900-NEXT: s_waitcnt vmcnt(0) 4509; GFX900-NEXT: s_setpc_b64 s[30:31] 4510; 4511; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_5: 4512; GFX90A: ; %bb.0: 4513; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4514; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4515; GFX90A-NEXT: ;;#ASMSTART 4516; GFX90A-NEXT: ; def v[0:1] 4517; GFX90A-NEXT: ;;#ASMEND 4518; GFX90A-NEXT: ;;#ASMSTART 4519; GFX90A-NEXT: ; def v[2:3] 4520; GFX90A-NEXT: ;;#ASMEND 4521; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4522; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 4523; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 4524; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4525; GFX90A-NEXT: s_waitcnt vmcnt(0) 4526; GFX90A-NEXT: s_setpc_b64 s[30:31] 4527; 4528; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_5: 4529; GFX940: ; %bb.0: 4530; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4531; GFX940-NEXT: v_mov_b32_e32 v4, 0 4532; GFX940-NEXT: ;;#ASMSTART 4533; GFX940-NEXT: ; def v[0:1] 4534; GFX940-NEXT: ;;#ASMEND 4535; GFX940-NEXT: ;;#ASMSTART 4536; GFX940-NEXT: ; def v[2:3] 4537; GFX940-NEXT: ;;#ASMEND 4538; GFX940-NEXT: s_mov_b32 s2, 0xffff 4539; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 4540; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 4541; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4542; GFX940-NEXT: s_waitcnt vmcnt(0) 4543; GFX940-NEXT: s_setpc_b64 s[30:31] 4544 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4545 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4546 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4547 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4548 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 5> 4549 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4550 ret void 4551} 4552 4553define void @v_shuffle_v3i16_v3i16__5_2_5(ptr addrspace(1) inreg %ptr) { 4554; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_5: 4555; GFX900: ; %bb.0: 4556; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4557; GFX900-NEXT: v_mov_b32_e32 v4, 0 4558; GFX900-NEXT: ;;#ASMSTART 4559; GFX900-NEXT: ; def v[0:1] 4560; GFX900-NEXT: ;;#ASMEND 4561; GFX900-NEXT: ;;#ASMSTART 4562; GFX900-NEXT: ; def v[2:3] 4563; GFX900-NEXT: ;;#ASMEND 4564; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4565; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 4566; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4 4567; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4568; GFX900-NEXT: s_waitcnt vmcnt(0) 4569; GFX900-NEXT: s_setpc_b64 s[30:31] 4570; 4571; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_5: 4572; GFX90A: ; %bb.0: 4573; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4574; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4575; GFX90A-NEXT: ;;#ASMSTART 4576; GFX90A-NEXT: ; def v[0:1] 4577; GFX90A-NEXT: ;;#ASMEND 4578; GFX90A-NEXT: ;;#ASMSTART 4579; GFX90A-NEXT: ; def v[2:3] 4580; GFX90A-NEXT: ;;#ASMEND 4581; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4582; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 4583; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 4584; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4585; GFX90A-NEXT: s_waitcnt vmcnt(0) 4586; GFX90A-NEXT: s_setpc_b64 s[30:31] 4587; 4588; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_5: 4589; GFX940: ; %bb.0: 4590; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4591; GFX940-NEXT: v_mov_b32_e32 v4, 0 4592; GFX940-NEXT: ;;#ASMSTART 4593; GFX940-NEXT: ; def v[0:1] 4594; GFX940-NEXT: ;;#ASMEND 4595; GFX940-NEXT: ;;#ASMSTART 4596; GFX940-NEXT: ; def v[2:3] 4597; GFX940-NEXT: ;;#ASMEND 4598; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4599; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 4600; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 4601; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4602; GFX940-NEXT: s_waitcnt vmcnt(0) 4603; GFX940-NEXT: s_setpc_b64 s[30:31] 4604 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4605 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4606 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4607 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4608 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 5> 4609 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4610 ret void 4611} 4612 4613define void @v_shuffle_v3i16_v3i16__5_3_5(ptr addrspace(1) inreg %ptr) { 4614; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_5: 4615; GFX900: ; %bb.0: 4616; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4617; GFX900-NEXT: v_mov_b32_e32 v2, 0 4618; GFX900-NEXT: ;;#ASMSTART 4619; GFX900-NEXT: ; def v[0:1] 4620; GFX900-NEXT: ;;#ASMEND 4621; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4622; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 4623; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 4624; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 4625; GFX900-NEXT: s_waitcnt vmcnt(0) 4626; GFX900-NEXT: s_setpc_b64 s[30:31] 4627; 4628; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_5: 4629; GFX90A: ; %bb.0: 4630; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4631; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4632; GFX90A-NEXT: ;;#ASMSTART 4633; GFX90A-NEXT: ; def v[0:1] 4634; GFX90A-NEXT: ;;#ASMEND 4635; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4636; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 4637; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 4638; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4639; GFX90A-NEXT: s_waitcnt vmcnt(0) 4640; GFX90A-NEXT: s_setpc_b64 s[30:31] 4641; 4642; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_5: 4643; GFX940: ; %bb.0: 4644; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4645; GFX940-NEXT: v_mov_b32_e32 v2, 0 4646; GFX940-NEXT: ;;#ASMSTART 4647; GFX940-NEXT: ; def v[0:1] 4648; GFX940-NEXT: ;;#ASMEND 4649; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4650; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 4651; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 4652; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4653; GFX940-NEXT: s_waitcnt vmcnt(0) 4654; GFX940-NEXT: s_setpc_b64 s[30:31] 4655 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4656 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4657 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4658 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4659 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 5> 4660 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4661 ret void 4662} 4663 4664define void @v_shuffle_v3i16_v3i16__5_4_5(ptr addrspace(1) inreg %ptr) { 4665; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_5: 4666; GFX900: ; %bb.0: 4667; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4668; GFX900-NEXT: v_mov_b32_e32 v2, 0 4669; GFX900-NEXT: ;;#ASMSTART 4670; GFX900-NEXT: ; def v[0:1] 4671; GFX900-NEXT: ;;#ASMEND 4672; GFX900-NEXT: s_mov_b32 s4, 0xffff 4673; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0 4674; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 4675; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 4676; GFX900-NEXT: s_waitcnt vmcnt(0) 4677; GFX900-NEXT: s_setpc_b64 s[30:31] 4678; 4679; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_5: 4680; GFX90A: ; %bb.0: 4681; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4682; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4683; GFX90A-NEXT: ;;#ASMSTART 4684; GFX90A-NEXT: ; def v[0:1] 4685; GFX90A-NEXT: ;;#ASMEND 4686; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4687; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0 4688; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 4689; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4690; GFX90A-NEXT: s_waitcnt vmcnt(0) 4691; GFX90A-NEXT: s_setpc_b64 s[30:31] 4692; 4693; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_5: 4694; GFX940: ; %bb.0: 4695; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4696; GFX940-NEXT: v_mov_b32_e32 v2, 0 4697; GFX940-NEXT: ;;#ASMSTART 4698; GFX940-NEXT: ; def v[0:1] 4699; GFX940-NEXT: ;;#ASMEND 4700; GFX940-NEXT: s_mov_b32 s2, 0xffff 4701; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 4702; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 4703; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4704; GFX940-NEXT: s_waitcnt vmcnt(0) 4705; GFX940-NEXT: s_setpc_b64 s[30:31] 4706 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4707 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4708 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4709 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4710 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 5> 4711 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4712 ret void 4713} 4714 4715define void @s_shuffle_v3i16_v3i16__u_u_u() { 4716; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_u_u: 4717; GFX9: ; %bb.0: 4718; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4719; GFX9-NEXT: ;;#ASMSTART 4720; GFX9-NEXT: ; use s[8:9] 4721; GFX9-NEXT: ;;#ASMEND 4722; GFX9-NEXT: s_setpc_b64 s[30:31] 4723 %vec0 = call <4 x i16> asm "; def $0", "=s"() 4724 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4725 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> poison 4726 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 4727 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 4728 ret void 4729} 4730 4731define void @s_shuffle_v3i16_v3i16__0_u_u() { 4732; GFX900-LABEL: s_shuffle_v3i16_v3i16__0_u_u: 4733; GFX900: ; %bb.0: 4734; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4735; GFX900-NEXT: ;;#ASMSTART 4736; GFX900-NEXT: ; def s[8:9] 4737; GFX900-NEXT: ;;#ASMEND 4738; GFX900-NEXT: ;;#ASMSTART 4739; GFX900-NEXT: ; use s[8:9] 4740; GFX900-NEXT: ;;#ASMEND 4741; GFX900-NEXT: s_setpc_b64 s[30:31] 4742; 4743; GFX90A-LABEL: s_shuffle_v3i16_v3i16__0_u_u: 4744; GFX90A: ; %bb.0: 4745; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4746; GFX90A-NEXT: ;;#ASMSTART 4747; GFX90A-NEXT: ; def s[8:9] 4748; GFX90A-NEXT: ;;#ASMEND 4749; GFX90A-NEXT: ;;#ASMSTART 4750; GFX90A-NEXT: ; use s[8:9] 4751; GFX90A-NEXT: ;;#ASMEND 4752; GFX90A-NEXT: s_setpc_b64 s[30:31] 4753; 4754; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_u_u: 4755; GFX940: ; %bb.0: 4756; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4757; GFX940-NEXT: ;;#ASMSTART 4758; GFX940-NEXT: ; def s[8:9] 4759; GFX940-NEXT: ;;#ASMEND 4760; GFX940-NEXT: s_nop 0 4761; GFX940-NEXT: ;;#ASMSTART 4762; GFX940-NEXT: ; use s[8:9] 4763; GFX940-NEXT: ;;#ASMEND 4764; GFX940-NEXT: s_setpc_b64 s[30:31] 4765 %vec0 = call <4 x i16> asm "; def $0", "=s"() 4766 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4767 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 poison, i32 poison> 4768 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 4769 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 4770 ret void 4771} 4772 4773define void @s_shuffle_v3i16_v3i16__1_u_u() { 4774; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_u_u: 4775; GFX900: ; %bb.0: 4776; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4777; GFX900-NEXT: ;;#ASMSTART 4778; GFX900-NEXT: ; def s[4:5] 4779; GFX900-NEXT: ;;#ASMEND 4780; GFX900-NEXT: s_lshr_b32 s8, s4, 16 4781; GFX900-NEXT: ;;#ASMSTART 4782; GFX900-NEXT: ; use s[8:9] 4783; GFX900-NEXT: ;;#ASMEND 4784; GFX900-NEXT: s_setpc_b64 s[30:31] 4785; 4786; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_u_u: 4787; GFX90A: ; %bb.0: 4788; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4789; GFX90A-NEXT: ;;#ASMSTART 4790; GFX90A-NEXT: ; def s[4:5] 4791; GFX90A-NEXT: ;;#ASMEND 4792; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 4793; GFX90A-NEXT: ;;#ASMSTART 4794; GFX90A-NEXT: ; use s[8:9] 4795; GFX90A-NEXT: ;;#ASMEND 4796; GFX90A-NEXT: s_setpc_b64 s[30:31] 4797; 4798; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_u_u: 4799; GFX940: ; %bb.0: 4800; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4801; GFX940-NEXT: ;;#ASMSTART 4802; GFX940-NEXT: ; def s[0:1] 4803; GFX940-NEXT: ;;#ASMEND 4804; GFX940-NEXT: s_lshr_b32 s8, s0, 16 4805; GFX940-NEXT: ;;#ASMSTART 4806; GFX940-NEXT: ; use s[8:9] 4807; GFX940-NEXT: ;;#ASMEND 4808; GFX940-NEXT: s_setpc_b64 s[30:31] 4809 %vec0 = call <4 x i16> asm "; def $0", "=s"() 4810 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4811 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 poison, i32 poison> 4812 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 4813 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 4814 ret void 4815} 4816 4817define void @s_shuffle_v3i16_v3i16__2_u_u() { 4818; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_u_u: 4819; GFX900: ; %bb.0: 4820; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4821; GFX900-NEXT: ;;#ASMSTART 4822; GFX900-NEXT: ; def s[4:5] 4823; GFX900-NEXT: ;;#ASMEND 4824; GFX900-NEXT: s_mov_b32 s8, s5 4825; GFX900-NEXT: ;;#ASMSTART 4826; GFX900-NEXT: ; use s[8:9] 4827; GFX900-NEXT: ;;#ASMEND 4828; GFX900-NEXT: s_setpc_b64 s[30:31] 4829; 4830; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_u_u: 4831; GFX90A: ; %bb.0: 4832; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4833; GFX90A-NEXT: ;;#ASMSTART 4834; GFX90A-NEXT: ; def s[4:5] 4835; GFX90A-NEXT: ;;#ASMEND 4836; GFX90A-NEXT: s_mov_b32 s8, s5 4837; GFX90A-NEXT: ;;#ASMSTART 4838; GFX90A-NEXT: ; use s[8:9] 4839; GFX90A-NEXT: ;;#ASMEND 4840; GFX90A-NEXT: s_setpc_b64 s[30:31] 4841; 4842; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_u_u: 4843; GFX940: ; %bb.0: 4844; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4845; GFX940-NEXT: ;;#ASMSTART 4846; GFX940-NEXT: ; def s[0:1] 4847; GFX940-NEXT: ;;#ASMEND 4848; GFX940-NEXT: s_mov_b32 s8, s1 4849; GFX940-NEXT: ;;#ASMSTART 4850; GFX940-NEXT: ; use s[8:9] 4851; GFX940-NEXT: ;;#ASMEND 4852; GFX940-NEXT: s_setpc_b64 s[30:31] 4853 %vec0 = call <4 x i16> asm "; def $0", "=s"() 4854 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4855 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 poison, i32 poison> 4856 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 4857 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 4858 ret void 4859} 4860 4861define void @s_shuffle_v3i16_v3i16__3_u_u() { 4862; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_u_u: 4863; GFX9: ; %bb.0: 4864; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4865; GFX9-NEXT: ;;#ASMSTART 4866; GFX9-NEXT: ; use s[8:9] 4867; GFX9-NEXT: ;;#ASMEND 4868; GFX9-NEXT: s_setpc_b64 s[30:31] 4869 %vec0 = call <4 x i16> asm "; def $0", "=s"() 4870 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4871 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 poison, i32 poison> 4872 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 4873 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 4874 ret void 4875} 4876 4877define void @s_shuffle_v3i16_v3i16__4_u_u() { 4878; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_u_u: 4879; GFX900: ; %bb.0: 4880; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4881; GFX900-NEXT: ;;#ASMSTART 4882; GFX900-NEXT: ; def s[4:5] 4883; GFX900-NEXT: ;;#ASMEND 4884; GFX900-NEXT: s_lshr_b32 s8, s4, 16 4885; GFX900-NEXT: ;;#ASMSTART 4886; GFX900-NEXT: ; use s[8:9] 4887; GFX900-NEXT: ;;#ASMEND 4888; GFX900-NEXT: s_setpc_b64 s[30:31] 4889; 4890; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_u_u: 4891; GFX90A: ; %bb.0: 4892; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4893; GFX90A-NEXT: ;;#ASMSTART 4894; GFX90A-NEXT: ; def s[4:5] 4895; GFX90A-NEXT: ;;#ASMEND 4896; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 4897; GFX90A-NEXT: ;;#ASMSTART 4898; GFX90A-NEXT: ; use s[8:9] 4899; GFX90A-NEXT: ;;#ASMEND 4900; GFX90A-NEXT: s_setpc_b64 s[30:31] 4901; 4902; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_u_u: 4903; GFX940: ; %bb.0: 4904; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4905; GFX940-NEXT: ;;#ASMSTART 4906; GFX940-NEXT: ; def s[0:1] 4907; GFX940-NEXT: ;;#ASMEND 4908; GFX940-NEXT: s_lshr_b32 s8, s0, 16 4909; GFX940-NEXT: ;;#ASMSTART 4910; GFX940-NEXT: ; use s[8:9] 4911; GFX940-NEXT: ;;#ASMEND 4912; GFX940-NEXT: s_setpc_b64 s[30:31] 4913 %vec0 = call <4 x i16> asm "; def $0", "=s"() 4914 %vec1 = call <4 x i16> asm "; def $0", "=s"() 4915 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4916 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4917 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 poison, i32 poison> 4918 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 4919 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 4920 ret void 4921} 4922 4923define void @s_shuffle_v3i16_v3i16__5_u_u() { 4924; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_u: 4925; GFX900: ; %bb.0: 4926; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4927; GFX900-NEXT: ;;#ASMSTART 4928; GFX900-NEXT: ; def s[4:5] 4929; GFX900-NEXT: ;;#ASMEND 4930; GFX900-NEXT: s_mov_b32 s8, s5 4931; GFX900-NEXT: ;;#ASMSTART 4932; GFX900-NEXT: ; use s[8:9] 4933; GFX900-NEXT: ;;#ASMEND 4934; GFX900-NEXT: s_setpc_b64 s[30:31] 4935; 4936; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_u: 4937; GFX90A: ; %bb.0: 4938; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4939; GFX90A-NEXT: ;;#ASMSTART 4940; GFX90A-NEXT: ; def s[4:5] 4941; GFX90A-NEXT: ;;#ASMEND 4942; GFX90A-NEXT: s_mov_b32 s8, s5 4943; GFX90A-NEXT: ;;#ASMSTART 4944; GFX90A-NEXT: ; use s[8:9] 4945; GFX90A-NEXT: ;;#ASMEND 4946; GFX90A-NEXT: s_setpc_b64 s[30:31] 4947; 4948; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_u: 4949; GFX940: ; %bb.0: 4950; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4951; GFX940-NEXT: ;;#ASMSTART 4952; GFX940-NEXT: ; def s[0:1] 4953; GFX940-NEXT: ;;#ASMEND 4954; GFX940-NEXT: s_mov_b32 s8, s1 4955; GFX940-NEXT: ;;#ASMSTART 4956; GFX940-NEXT: ; use s[8:9] 4957; GFX940-NEXT: ;;#ASMEND 4958; GFX940-NEXT: s_setpc_b64 s[30:31] 4959 %vec0 = call <4 x i16> asm "; def $0", "=s"() 4960 %vec1 = call <4 x i16> asm "; def $0", "=s"() 4961 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4962 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 4963 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 poison> 4964 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 4965 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 4966 ret void 4967} 4968 4969define void @s_shuffle_v3i16_v3i16__5_0_u() { 4970; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_u: 4971; GFX900: ; %bb.0: 4972; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4973; GFX900-NEXT: ;;#ASMSTART 4974; GFX900-NEXT: ; def s[4:5] 4975; GFX900-NEXT: ;;#ASMEND 4976; GFX900-NEXT: ;;#ASMSTART 4977; GFX900-NEXT: ; def s[6:7] 4978; GFX900-NEXT: ;;#ASMEND 4979; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 4980; GFX900-NEXT: ;;#ASMSTART 4981; GFX900-NEXT: ; use s[8:9] 4982; GFX900-NEXT: ;;#ASMEND 4983; GFX900-NEXT: s_setpc_b64 s[30:31] 4984; 4985; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_u: 4986; GFX90A: ; %bb.0: 4987; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4988; GFX90A-NEXT: ;;#ASMSTART 4989; GFX90A-NEXT: ; def s[4:5] 4990; GFX90A-NEXT: ;;#ASMEND 4991; GFX90A-NEXT: ;;#ASMSTART 4992; GFX90A-NEXT: ; def s[6:7] 4993; GFX90A-NEXT: ;;#ASMEND 4994; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 4995; GFX90A-NEXT: ;;#ASMSTART 4996; GFX90A-NEXT: ; use s[8:9] 4997; GFX90A-NEXT: ;;#ASMEND 4998; GFX90A-NEXT: s_setpc_b64 s[30:31] 4999; 5000; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_u: 5001; GFX940: ; %bb.0: 5002; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5003; GFX940-NEXT: ;;#ASMSTART 5004; GFX940-NEXT: ; def s[0:1] 5005; GFX940-NEXT: ;;#ASMEND 5006; GFX940-NEXT: ;;#ASMSTART 5007; GFX940-NEXT: ; def s[2:3] 5008; GFX940-NEXT: ;;#ASMEND 5009; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 5010; GFX940-NEXT: ;;#ASMSTART 5011; GFX940-NEXT: ; use s[8:9] 5012; GFX940-NEXT: ;;#ASMEND 5013; GFX940-NEXT: s_setpc_b64 s[30:31] 5014 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5015 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5016 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5017 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5018 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 poison> 5019 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5020 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5021 ret void 5022} 5023 5024define void @s_shuffle_v3i16_v3i16__5_1_u() { 5025; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_u: 5026; GFX900: ; %bb.0: 5027; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5028; GFX900-NEXT: ;;#ASMSTART 5029; GFX900-NEXT: ; def s[4:5] 5030; GFX900-NEXT: ;;#ASMEND 5031; GFX900-NEXT: ;;#ASMSTART 5032; GFX900-NEXT: ; def s[6:7] 5033; GFX900-NEXT: ;;#ASMEND 5034; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 5035; GFX900-NEXT: ;;#ASMSTART 5036; GFX900-NEXT: ; use s[8:9] 5037; GFX900-NEXT: ;;#ASMEND 5038; GFX900-NEXT: s_setpc_b64 s[30:31] 5039; 5040; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_u: 5041; GFX90A: ; %bb.0: 5042; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5043; GFX90A-NEXT: ;;#ASMSTART 5044; GFX90A-NEXT: ; def s[4:5] 5045; GFX90A-NEXT: ;;#ASMEND 5046; GFX90A-NEXT: ;;#ASMSTART 5047; GFX90A-NEXT: ; def s[6:7] 5048; GFX90A-NEXT: ;;#ASMEND 5049; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 5050; GFX90A-NEXT: ;;#ASMSTART 5051; GFX90A-NEXT: ; use s[8:9] 5052; GFX90A-NEXT: ;;#ASMEND 5053; GFX90A-NEXT: s_setpc_b64 s[30:31] 5054; 5055; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_u: 5056; GFX940: ; %bb.0: 5057; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5058; GFX940-NEXT: ;;#ASMSTART 5059; GFX940-NEXT: ; def s[0:1] 5060; GFX940-NEXT: ;;#ASMEND 5061; GFX940-NEXT: ;;#ASMSTART 5062; GFX940-NEXT: ; def s[2:3] 5063; GFX940-NEXT: ;;#ASMEND 5064; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 5065; GFX940-NEXT: ;;#ASMSTART 5066; GFX940-NEXT: ; use s[8:9] 5067; GFX940-NEXT: ;;#ASMEND 5068; GFX940-NEXT: s_setpc_b64 s[30:31] 5069 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5070 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5071 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5072 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5073 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 poison> 5074 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5075 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5076 ret void 5077} 5078 5079define void @s_shuffle_v3i16_v3i16__5_2_u() { 5080; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_u: 5081; GFX900: ; %bb.0: 5082; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5083; GFX900-NEXT: ;;#ASMSTART 5084; GFX900-NEXT: ; def s[4:5] 5085; GFX900-NEXT: ;;#ASMEND 5086; GFX900-NEXT: ;;#ASMSTART 5087; GFX900-NEXT: ; def s[6:7] 5088; GFX900-NEXT: ;;#ASMEND 5089; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 5090; GFX900-NEXT: ;;#ASMSTART 5091; GFX900-NEXT: ; use s[8:9] 5092; GFX900-NEXT: ;;#ASMEND 5093; GFX900-NEXT: s_setpc_b64 s[30:31] 5094; 5095; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_u: 5096; GFX90A: ; %bb.0: 5097; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5098; GFX90A-NEXT: ;;#ASMSTART 5099; GFX90A-NEXT: ; def s[4:5] 5100; GFX90A-NEXT: ;;#ASMEND 5101; GFX90A-NEXT: ;;#ASMSTART 5102; GFX90A-NEXT: ; def s[6:7] 5103; GFX90A-NEXT: ;;#ASMEND 5104; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 5105; GFX90A-NEXT: ;;#ASMSTART 5106; GFX90A-NEXT: ; use s[8:9] 5107; GFX90A-NEXT: ;;#ASMEND 5108; GFX90A-NEXT: s_setpc_b64 s[30:31] 5109; 5110; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_u: 5111; GFX940: ; %bb.0: 5112; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5113; GFX940-NEXT: ;;#ASMSTART 5114; GFX940-NEXT: ; def s[0:1] 5115; GFX940-NEXT: ;;#ASMEND 5116; GFX940-NEXT: ;;#ASMSTART 5117; GFX940-NEXT: ; def s[2:3] 5118; GFX940-NEXT: ;;#ASMEND 5119; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 5120; GFX940-NEXT: ;;#ASMSTART 5121; GFX940-NEXT: ; use s[8:9] 5122; GFX940-NEXT: ;;#ASMEND 5123; GFX940-NEXT: s_setpc_b64 s[30:31] 5124 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5125 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5126 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5127 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5128 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 poison> 5129 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5130 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5131 ret void 5132} 5133 5134define void @s_shuffle_v3i16_v3i16__5_3_u() { 5135; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_u: 5136; GFX900: ; %bb.0: 5137; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5138; GFX900-NEXT: ;;#ASMSTART 5139; GFX900-NEXT: ; def s[4:5] 5140; GFX900-NEXT: ;;#ASMEND 5141; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5142; GFX900-NEXT: ;;#ASMSTART 5143; GFX900-NEXT: ; use s[8:9] 5144; GFX900-NEXT: ;;#ASMEND 5145; GFX900-NEXT: s_setpc_b64 s[30:31] 5146; 5147; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_u: 5148; GFX90A: ; %bb.0: 5149; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5150; GFX90A-NEXT: ;;#ASMSTART 5151; GFX90A-NEXT: ; def s[4:5] 5152; GFX90A-NEXT: ;;#ASMEND 5153; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5154; GFX90A-NEXT: ;;#ASMSTART 5155; GFX90A-NEXT: ; use s[8:9] 5156; GFX90A-NEXT: ;;#ASMEND 5157; GFX90A-NEXT: s_setpc_b64 s[30:31] 5158; 5159; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_u: 5160; GFX940: ; %bb.0: 5161; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5162; GFX940-NEXT: ;;#ASMSTART 5163; GFX940-NEXT: ; def s[0:1] 5164; GFX940-NEXT: ;;#ASMEND 5165; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 5166; GFX940-NEXT: ;;#ASMSTART 5167; GFX940-NEXT: ; use s[8:9] 5168; GFX940-NEXT: ;;#ASMEND 5169; GFX940-NEXT: s_setpc_b64 s[30:31] 5170 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5171 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5172 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5173 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5174 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 poison> 5175 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5176 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5177 ret void 5178} 5179 5180define void @s_shuffle_v3i16_v3i16__5_4_u() { 5181; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_u: 5182; GFX900: ; %bb.0: 5183; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5184; GFX900-NEXT: ;;#ASMSTART 5185; GFX900-NEXT: ; def s[4:5] 5186; GFX900-NEXT: ;;#ASMEND 5187; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 5188; GFX900-NEXT: ;;#ASMSTART 5189; GFX900-NEXT: ; use s[8:9] 5190; GFX900-NEXT: ;;#ASMEND 5191; GFX900-NEXT: s_setpc_b64 s[30:31] 5192; 5193; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_u: 5194; GFX90A: ; %bb.0: 5195; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5196; GFX90A-NEXT: ;;#ASMSTART 5197; GFX90A-NEXT: ; def s[4:5] 5198; GFX90A-NEXT: ;;#ASMEND 5199; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 5200; GFX90A-NEXT: ;;#ASMSTART 5201; GFX90A-NEXT: ; use s[8:9] 5202; GFX90A-NEXT: ;;#ASMEND 5203; GFX90A-NEXT: s_setpc_b64 s[30:31] 5204; 5205; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_u: 5206; GFX940: ; %bb.0: 5207; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5208; GFX940-NEXT: ;;#ASMSTART 5209; GFX940-NEXT: ; def s[0:1] 5210; GFX940-NEXT: ;;#ASMEND 5211; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 5212; GFX940-NEXT: ;;#ASMSTART 5213; GFX940-NEXT: ; use s[8:9] 5214; GFX940-NEXT: ;;#ASMEND 5215; GFX940-NEXT: s_setpc_b64 s[30:31] 5216 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5217 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5218 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5219 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5220 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 poison> 5221 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5222 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5223 ret void 5224} 5225 5226define void @s_shuffle_v3i16_v3i16__5_5_u() { 5227; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_u: 5228; GFX900: ; %bb.0: 5229; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5230; GFX900-NEXT: ;;#ASMSTART 5231; GFX900-NEXT: ; def s[4:5] 5232; GFX900-NEXT: ;;#ASMEND 5233; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 5234; GFX900-NEXT: ;;#ASMSTART 5235; GFX900-NEXT: ; use s[8:9] 5236; GFX900-NEXT: ;;#ASMEND 5237; GFX900-NEXT: s_setpc_b64 s[30:31] 5238; 5239; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_u: 5240; GFX90A: ; %bb.0: 5241; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5242; GFX90A-NEXT: ;;#ASMSTART 5243; GFX90A-NEXT: ; def s[4:5] 5244; GFX90A-NEXT: ;;#ASMEND 5245; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 5246; GFX90A-NEXT: ;;#ASMSTART 5247; GFX90A-NEXT: ; use s[8:9] 5248; GFX90A-NEXT: ;;#ASMEND 5249; GFX90A-NEXT: s_setpc_b64 s[30:31] 5250; 5251; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_u: 5252; GFX940: ; %bb.0: 5253; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5254; GFX940-NEXT: ;;#ASMSTART 5255; GFX940-NEXT: ; def s[0:1] 5256; GFX940-NEXT: ;;#ASMEND 5257; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 5258; GFX940-NEXT: ;;#ASMSTART 5259; GFX940-NEXT: ; use s[8:9] 5260; GFX940-NEXT: ;;#ASMEND 5261; GFX940-NEXT: s_setpc_b64 s[30:31] 5262 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5263 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5264 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5265 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5266 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 poison> 5267 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5268 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5269 ret void 5270} 5271 5272define void @s_shuffle_v3i16_v3i16__5_5_0() { 5273; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_0: 5274; GFX900: ; %bb.0: 5275; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5276; GFX900-NEXT: ;;#ASMSTART 5277; GFX900-NEXT: ; def s[4:5] 5278; GFX900-NEXT: ;;#ASMEND 5279; GFX900-NEXT: ;;#ASMSTART 5280; GFX900-NEXT: ; def s[6:7] 5281; GFX900-NEXT: ;;#ASMEND 5282; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 5283; GFX900-NEXT: s_mov_b32 s9, s4 5284; GFX900-NEXT: ;;#ASMSTART 5285; GFX900-NEXT: ; use s[8:9] 5286; GFX900-NEXT: ;;#ASMEND 5287; GFX900-NEXT: s_setpc_b64 s[30:31] 5288; 5289; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_0: 5290; GFX90A: ; %bb.0: 5291; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5292; GFX90A-NEXT: ;;#ASMSTART 5293; GFX90A-NEXT: ; def s[4:5] 5294; GFX90A-NEXT: ;;#ASMEND 5295; GFX90A-NEXT: ;;#ASMSTART 5296; GFX90A-NEXT: ; def s[6:7] 5297; GFX90A-NEXT: ;;#ASMEND 5298; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 5299; GFX90A-NEXT: s_mov_b32 s9, s4 5300; GFX90A-NEXT: ;;#ASMSTART 5301; GFX90A-NEXT: ; use s[8:9] 5302; GFX90A-NEXT: ;;#ASMEND 5303; GFX90A-NEXT: s_setpc_b64 s[30:31] 5304; 5305; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_0: 5306; GFX940: ; %bb.0: 5307; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5308; GFX940-NEXT: ;;#ASMSTART 5309; GFX940-NEXT: ; def s[0:1] 5310; GFX940-NEXT: ;;#ASMEND 5311; GFX940-NEXT: ;;#ASMSTART 5312; GFX940-NEXT: ; def s[2:3] 5313; GFX940-NEXT: ;;#ASMEND 5314; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 5315; GFX940-NEXT: s_mov_b32 s9, s0 5316; GFX940-NEXT: ;;#ASMSTART 5317; GFX940-NEXT: ; use s[8:9] 5318; GFX940-NEXT: ;;#ASMEND 5319; GFX940-NEXT: s_setpc_b64 s[30:31] 5320 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5321 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5322 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5323 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5324 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 0> 5325 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5326 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5327 ret void 5328} 5329 5330define void @s_shuffle_v3i16_v3i16__5_5_1() { 5331; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_1: 5332; GFX900: ; %bb.0: 5333; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5334; GFX900-NEXT: ;;#ASMSTART 5335; GFX900-NEXT: ; def s[4:5] 5336; GFX900-NEXT: ;;#ASMEND 5337; GFX900-NEXT: ;;#ASMSTART 5338; GFX900-NEXT: ; def s[6:7] 5339; GFX900-NEXT: ;;#ASMEND 5340; GFX900-NEXT: s_lshr_b32 s9, s4, 16 5341; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 5342; GFX900-NEXT: ;;#ASMSTART 5343; GFX900-NEXT: ; use s[8:9] 5344; GFX900-NEXT: ;;#ASMEND 5345; GFX900-NEXT: s_setpc_b64 s[30:31] 5346; 5347; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_1: 5348; GFX90A: ; %bb.0: 5349; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5350; GFX90A-NEXT: ;;#ASMSTART 5351; GFX90A-NEXT: ; def s[4:5] 5352; GFX90A-NEXT: ;;#ASMEND 5353; GFX90A-NEXT: ;;#ASMSTART 5354; GFX90A-NEXT: ; def s[6:7] 5355; GFX90A-NEXT: ;;#ASMEND 5356; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 5357; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 5358; GFX90A-NEXT: ;;#ASMSTART 5359; GFX90A-NEXT: ; use s[8:9] 5360; GFX90A-NEXT: ;;#ASMEND 5361; GFX90A-NEXT: s_setpc_b64 s[30:31] 5362; 5363; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_1: 5364; GFX940: ; %bb.0: 5365; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5366; GFX940-NEXT: ;;#ASMSTART 5367; GFX940-NEXT: ; def s[0:1] 5368; GFX940-NEXT: ;;#ASMEND 5369; GFX940-NEXT: ;;#ASMSTART 5370; GFX940-NEXT: ; def s[2:3] 5371; GFX940-NEXT: ;;#ASMEND 5372; GFX940-NEXT: s_lshr_b32 s9, s0, 16 5373; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 5374; GFX940-NEXT: ;;#ASMSTART 5375; GFX940-NEXT: ; use s[8:9] 5376; GFX940-NEXT: ;;#ASMEND 5377; GFX940-NEXT: s_setpc_b64 s[30:31] 5378 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5379 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5380 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5381 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5382 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 1> 5383 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5384 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5385 ret void 5386} 5387 5388define void @s_shuffle_v3i16_v3i16__5_5_2() { 5389; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_2: 5390; GFX900: ; %bb.0: 5391; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5392; GFX900-NEXT: ;;#ASMSTART 5393; GFX900-NEXT: ; def s[8:9] 5394; GFX900-NEXT: ;;#ASMEND 5395; GFX900-NEXT: ;;#ASMSTART 5396; GFX900-NEXT: ; def s[4:5] 5397; GFX900-NEXT: ;;#ASMEND 5398; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 5399; GFX900-NEXT: ;;#ASMSTART 5400; GFX900-NEXT: ; use s[8:9] 5401; GFX900-NEXT: ;;#ASMEND 5402; GFX900-NEXT: s_setpc_b64 s[30:31] 5403; 5404; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_2: 5405; GFX90A: ; %bb.0: 5406; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5407; GFX90A-NEXT: ;;#ASMSTART 5408; GFX90A-NEXT: ; def s[8:9] 5409; GFX90A-NEXT: ;;#ASMEND 5410; GFX90A-NEXT: ;;#ASMSTART 5411; GFX90A-NEXT: ; def s[4:5] 5412; GFX90A-NEXT: ;;#ASMEND 5413; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 5414; GFX90A-NEXT: ;;#ASMSTART 5415; GFX90A-NEXT: ; use s[8:9] 5416; GFX90A-NEXT: ;;#ASMEND 5417; GFX90A-NEXT: s_setpc_b64 s[30:31] 5418; 5419; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_2: 5420; GFX940: ; %bb.0: 5421; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5422; GFX940-NEXT: ;;#ASMSTART 5423; GFX940-NEXT: ; def s[8:9] 5424; GFX940-NEXT: ;;#ASMEND 5425; GFX940-NEXT: ;;#ASMSTART 5426; GFX940-NEXT: ; def s[0:1] 5427; GFX940-NEXT: ;;#ASMEND 5428; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 5429; GFX940-NEXT: ;;#ASMSTART 5430; GFX940-NEXT: ; use s[8:9] 5431; GFX940-NEXT: ;;#ASMEND 5432; GFX940-NEXT: s_setpc_b64 s[30:31] 5433 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5434 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5435 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5436 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5437 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 2> 5438 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5439 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5440 ret void 5441} 5442 5443define void @s_shuffle_v3i16_v3i16__5_5_3() { 5444; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_3: 5445; GFX900: ; %bb.0: 5446; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5447; GFX900-NEXT: ;;#ASMSTART 5448; GFX900-NEXT: ; def s[4:5] 5449; GFX900-NEXT: ;;#ASMEND 5450; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 5451; GFX900-NEXT: s_mov_b32 s9, s4 5452; GFX900-NEXT: ;;#ASMSTART 5453; GFX900-NEXT: ; use s[8:9] 5454; GFX900-NEXT: ;;#ASMEND 5455; GFX900-NEXT: s_setpc_b64 s[30:31] 5456; 5457; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_3: 5458; GFX90A: ; %bb.0: 5459; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5460; GFX90A-NEXT: ;;#ASMSTART 5461; GFX90A-NEXT: ; def s[4:5] 5462; GFX90A-NEXT: ;;#ASMEND 5463; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 5464; GFX90A-NEXT: s_mov_b32 s9, s4 5465; GFX90A-NEXT: ;;#ASMSTART 5466; GFX90A-NEXT: ; use s[8:9] 5467; GFX90A-NEXT: ;;#ASMEND 5468; GFX90A-NEXT: s_setpc_b64 s[30:31] 5469; 5470; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_3: 5471; GFX940: ; %bb.0: 5472; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5473; GFX940-NEXT: ;;#ASMSTART 5474; GFX940-NEXT: ; def s[0:1] 5475; GFX940-NEXT: ;;#ASMEND 5476; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 5477; GFX940-NEXT: s_mov_b32 s9, s0 5478; GFX940-NEXT: ;;#ASMSTART 5479; GFX940-NEXT: ; use s[8:9] 5480; GFX940-NEXT: ;;#ASMEND 5481; GFX940-NEXT: s_setpc_b64 s[30:31] 5482 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5483 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5484 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5485 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5486 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 3> 5487 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5488 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5489 ret void 5490} 5491 5492define void @s_shuffle_v3i16_v3i16__5_5_4() { 5493; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_4: 5494; GFX900: ; %bb.0: 5495; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5496; GFX900-NEXT: ;;#ASMSTART 5497; GFX900-NEXT: ; def s[4:5] 5498; GFX900-NEXT: ;;#ASMEND 5499; GFX900-NEXT: s_lshr_b32 s9, s4, 16 5500; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 5501; GFX900-NEXT: ;;#ASMSTART 5502; GFX900-NEXT: ; use s[8:9] 5503; GFX900-NEXT: ;;#ASMEND 5504; GFX900-NEXT: s_setpc_b64 s[30:31] 5505; 5506; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_4: 5507; GFX90A: ; %bb.0: 5508; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5509; GFX90A-NEXT: ;;#ASMSTART 5510; GFX90A-NEXT: ; def s[4:5] 5511; GFX90A-NEXT: ;;#ASMEND 5512; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 5513; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 5514; GFX90A-NEXT: ;;#ASMSTART 5515; GFX90A-NEXT: ; use s[8:9] 5516; GFX90A-NEXT: ;;#ASMEND 5517; GFX90A-NEXT: s_setpc_b64 s[30:31] 5518; 5519; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_4: 5520; GFX940: ; %bb.0: 5521; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5522; GFX940-NEXT: ;;#ASMSTART 5523; GFX940-NEXT: ; def s[0:1] 5524; GFX940-NEXT: ;;#ASMEND 5525; GFX940-NEXT: s_lshr_b32 s9, s0, 16 5526; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 5527; GFX940-NEXT: ;;#ASMSTART 5528; GFX940-NEXT: ; use s[8:9] 5529; GFX940-NEXT: ;;#ASMEND 5530; GFX940-NEXT: s_setpc_b64 s[30:31] 5531 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5532 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5533 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5534 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5535 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 4> 5536 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5537 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5538 ret void 5539} 5540 5541define void @s_shuffle_v3i16_v3i16__5_5_5() { 5542; GFX9-LABEL: s_shuffle_v3i16_v3i16__5_5_5: 5543; GFX9: ; %bb.0: 5544; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5545; GFX9-NEXT: ;;#ASMSTART 5546; GFX9-NEXT: ; def s[8:9] 5547; GFX9-NEXT: ;;#ASMEND 5548; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9 5549; GFX9-NEXT: ;;#ASMSTART 5550; GFX9-NEXT: ; use s[8:9] 5551; GFX9-NEXT: ;;#ASMEND 5552; GFX9-NEXT: s_setpc_b64 s[30:31] 5553 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5554 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5555 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5556 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5557 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 5> 5558 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5559 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5560 ret void 5561} 5562 5563define void @s_shuffle_v3i16_v3i16__u_0_0() { 5564; GFX900-LABEL: s_shuffle_v3i16_v3i16__u_0_0: 5565; GFX900: ; %bb.0: 5566; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5567; GFX900-NEXT: ;;#ASMSTART 5568; GFX900-NEXT: ; def s[4:5] 5569; GFX900-NEXT: ;;#ASMEND 5570; GFX900-NEXT: s_lshl_b32 s8, s4, 16 5571; GFX900-NEXT: s_mov_b32 s9, s4 5572; GFX900-NEXT: ;;#ASMSTART 5573; GFX900-NEXT: ; use s[8:9] 5574; GFX900-NEXT: ;;#ASMEND 5575; GFX900-NEXT: s_setpc_b64 s[30:31] 5576; 5577; GFX90A-LABEL: s_shuffle_v3i16_v3i16__u_0_0: 5578; GFX90A: ; %bb.0: 5579; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5580; GFX90A-NEXT: ;;#ASMSTART 5581; GFX90A-NEXT: ; def s[4:5] 5582; GFX90A-NEXT: ;;#ASMEND 5583; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 5584; GFX90A-NEXT: s_mov_b32 s9, s4 5585; GFX90A-NEXT: ;;#ASMSTART 5586; GFX90A-NEXT: ; use s[8:9] 5587; GFX90A-NEXT: ;;#ASMEND 5588; GFX90A-NEXT: s_setpc_b64 s[30:31] 5589; 5590; GFX940-LABEL: s_shuffle_v3i16_v3i16__u_0_0: 5591; GFX940: ; %bb.0: 5592; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5593; GFX940-NEXT: ;;#ASMSTART 5594; GFX940-NEXT: ; def s[0:1] 5595; GFX940-NEXT: ;;#ASMEND 5596; GFX940-NEXT: s_lshl_b32 s8, s0, 16 5597; GFX940-NEXT: s_mov_b32 s9, s0 5598; GFX940-NEXT: ;;#ASMSTART 5599; GFX940-NEXT: ; use s[8:9] 5600; GFX940-NEXT: ;;#ASMEND 5601; GFX940-NEXT: s_setpc_b64 s[30:31] 5602 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5603 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5604 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 0, i32 0> 5605 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5606 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5607 ret void 5608} 5609 5610define void @s_shuffle_v3i16_v3i16__0_0_0() { 5611; GFX900-LABEL: s_shuffle_v3i16_v3i16__0_0_0: 5612; GFX900: ; %bb.0: 5613; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5614; GFX900-NEXT: ;;#ASMSTART 5615; GFX900-NEXT: ; def s[4:5] 5616; GFX900-NEXT: ;;#ASMEND 5617; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 5618; GFX900-NEXT: s_mov_b32 s9, s4 5619; GFX900-NEXT: ;;#ASMSTART 5620; GFX900-NEXT: ; use s[8:9] 5621; GFX900-NEXT: ;;#ASMEND 5622; GFX900-NEXT: s_setpc_b64 s[30:31] 5623; 5624; GFX90A-LABEL: s_shuffle_v3i16_v3i16__0_0_0: 5625; GFX90A: ; %bb.0: 5626; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5627; GFX90A-NEXT: ;;#ASMSTART 5628; GFX90A-NEXT: ; def s[4:5] 5629; GFX90A-NEXT: ;;#ASMEND 5630; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 5631; GFX90A-NEXT: s_mov_b32 s9, s4 5632; GFX90A-NEXT: ;;#ASMSTART 5633; GFX90A-NEXT: ; use s[8:9] 5634; GFX90A-NEXT: ;;#ASMEND 5635; GFX90A-NEXT: s_setpc_b64 s[30:31] 5636; 5637; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_0_0: 5638; GFX940: ; %bb.0: 5639; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5640; GFX940-NEXT: ;;#ASMSTART 5641; GFX940-NEXT: ; def s[0:1] 5642; GFX940-NEXT: ;;#ASMEND 5643; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 5644; GFX940-NEXT: s_mov_b32 s9, s0 5645; GFX940-NEXT: ;;#ASMSTART 5646; GFX940-NEXT: ; use s[8:9] 5647; GFX940-NEXT: ;;#ASMEND 5648; GFX940-NEXT: s_setpc_b64 s[30:31] 5649 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5650 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5651 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> zeroinitializer 5652 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5653 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5654 ret void 5655} 5656 5657define void @s_shuffle_v3i16_v3i16__1_0_0() { 5658; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_0_0: 5659; GFX900: ; %bb.0: 5660; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5661; GFX900-NEXT: ;;#ASMSTART 5662; GFX900-NEXT: ; def s[4:5] 5663; GFX900-NEXT: ;;#ASMEND 5664; GFX900-NEXT: s_lshr_b32 s5, s4, 16 5665; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5666; GFX900-NEXT: s_mov_b32 s9, s4 5667; GFX900-NEXT: ;;#ASMSTART 5668; GFX900-NEXT: ; use s[8:9] 5669; GFX900-NEXT: ;;#ASMEND 5670; GFX900-NEXT: s_setpc_b64 s[30:31] 5671; 5672; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_0_0: 5673; GFX90A: ; %bb.0: 5674; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5675; GFX90A-NEXT: ;;#ASMSTART 5676; GFX90A-NEXT: ; def s[4:5] 5677; GFX90A-NEXT: ;;#ASMEND 5678; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 5679; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5680; GFX90A-NEXT: s_mov_b32 s9, s4 5681; GFX90A-NEXT: ;;#ASMSTART 5682; GFX90A-NEXT: ; use s[8:9] 5683; GFX90A-NEXT: ;;#ASMEND 5684; GFX90A-NEXT: s_setpc_b64 s[30:31] 5685; 5686; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_0_0: 5687; GFX940: ; %bb.0: 5688; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5689; GFX940-NEXT: ;;#ASMSTART 5690; GFX940-NEXT: ; def s[0:1] 5691; GFX940-NEXT: ;;#ASMEND 5692; GFX940-NEXT: s_lshr_b32 s1, s0, 16 5693; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 5694; GFX940-NEXT: s_mov_b32 s9, s0 5695; GFX940-NEXT: ;;#ASMSTART 5696; GFX940-NEXT: ; use s[8:9] 5697; GFX940-NEXT: ;;#ASMEND 5698; GFX940-NEXT: s_setpc_b64 s[30:31] 5699 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5700 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5701 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 0, i32 0> 5702 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5703 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5704 ret void 5705} 5706 5707define void @s_shuffle_v3i16_v3i16__2_0_0() { 5708; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_0_0: 5709; GFX900: ; %bb.0: 5710; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5711; GFX900-NEXT: ;;#ASMSTART 5712; GFX900-NEXT: ; def s[4:5] 5713; GFX900-NEXT: ;;#ASMEND 5714; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5715; GFX900-NEXT: s_mov_b32 s9, s4 5716; GFX900-NEXT: ;;#ASMSTART 5717; GFX900-NEXT: ; use s[8:9] 5718; GFX900-NEXT: ;;#ASMEND 5719; GFX900-NEXT: s_setpc_b64 s[30:31] 5720; 5721; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_0_0: 5722; GFX90A: ; %bb.0: 5723; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5724; GFX90A-NEXT: ;;#ASMSTART 5725; GFX90A-NEXT: ; def s[4:5] 5726; GFX90A-NEXT: ;;#ASMEND 5727; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5728; GFX90A-NEXT: s_mov_b32 s9, s4 5729; GFX90A-NEXT: ;;#ASMSTART 5730; GFX90A-NEXT: ; use s[8:9] 5731; GFX90A-NEXT: ;;#ASMEND 5732; GFX90A-NEXT: s_setpc_b64 s[30:31] 5733; 5734; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_0_0: 5735; GFX940: ; %bb.0: 5736; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5737; GFX940-NEXT: ;;#ASMSTART 5738; GFX940-NEXT: ; def s[0:1] 5739; GFX940-NEXT: ;;#ASMEND 5740; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 5741; GFX940-NEXT: s_mov_b32 s9, s0 5742; GFX940-NEXT: ;;#ASMSTART 5743; GFX940-NEXT: ; use s[8:9] 5744; GFX940-NEXT: ;;#ASMEND 5745; GFX940-NEXT: s_setpc_b64 s[30:31] 5746 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5747 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5748 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 0, i32 0> 5749 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5750 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5751 ret void 5752} 5753 5754define void @s_shuffle_v3i16_v3i16__3_0_0() { 5755; GFX900-LABEL: s_shuffle_v3i16_v3i16__3_0_0: 5756; GFX900: ; %bb.0: 5757; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5758; GFX900-NEXT: ;;#ASMSTART 5759; GFX900-NEXT: ; def s[4:5] 5760; GFX900-NEXT: ;;#ASMEND 5761; GFX900-NEXT: s_lshl_b32 s8, s4, 16 5762; GFX900-NEXT: s_mov_b32 s9, s4 5763; GFX900-NEXT: ;;#ASMSTART 5764; GFX900-NEXT: ; use s[8:9] 5765; GFX900-NEXT: ;;#ASMEND 5766; GFX900-NEXT: s_setpc_b64 s[30:31] 5767; 5768; GFX90A-LABEL: s_shuffle_v3i16_v3i16__3_0_0: 5769; GFX90A: ; %bb.0: 5770; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5771; GFX90A-NEXT: ;;#ASMSTART 5772; GFX90A-NEXT: ; def s[4:5] 5773; GFX90A-NEXT: ;;#ASMEND 5774; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 5775; GFX90A-NEXT: s_mov_b32 s9, s4 5776; GFX90A-NEXT: ;;#ASMSTART 5777; GFX90A-NEXT: ; use s[8:9] 5778; GFX90A-NEXT: ;;#ASMEND 5779; GFX90A-NEXT: s_setpc_b64 s[30:31] 5780; 5781; GFX940-LABEL: s_shuffle_v3i16_v3i16__3_0_0: 5782; GFX940: ; %bb.0: 5783; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5784; GFX940-NEXT: ;;#ASMSTART 5785; GFX940-NEXT: ; def s[0:1] 5786; GFX940-NEXT: ;;#ASMEND 5787; GFX940-NEXT: s_lshl_b32 s8, s0, 16 5788; GFX940-NEXT: s_mov_b32 s9, s0 5789; GFX940-NEXT: ;;#ASMSTART 5790; GFX940-NEXT: ; use s[8:9] 5791; GFX940-NEXT: ;;#ASMEND 5792; GFX940-NEXT: s_setpc_b64 s[30:31] 5793 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5794 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5795 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 0, i32 0> 5796 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5797 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5798 ret void 5799} 5800 5801define void @s_shuffle_v3i16_v3i16__4_0_0() { 5802; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_0_0: 5803; GFX900: ; %bb.0: 5804; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5805; GFX900-NEXT: ;;#ASMSTART 5806; GFX900-NEXT: ; def s[4:5] 5807; GFX900-NEXT: ;;#ASMEND 5808; GFX900-NEXT: ;;#ASMSTART 5809; GFX900-NEXT: ; def s[6:7] 5810; GFX900-NEXT: ;;#ASMEND 5811; GFX900-NEXT: s_lshr_b32 s5, s6, 16 5812; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5813; GFX900-NEXT: s_mov_b32 s9, s4 5814; GFX900-NEXT: ;;#ASMSTART 5815; GFX900-NEXT: ; use s[8:9] 5816; GFX900-NEXT: ;;#ASMEND 5817; GFX900-NEXT: s_setpc_b64 s[30:31] 5818; 5819; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_0_0: 5820; GFX90A: ; %bb.0: 5821; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5822; GFX90A-NEXT: ;;#ASMSTART 5823; GFX90A-NEXT: ; def s[4:5] 5824; GFX90A-NEXT: ;;#ASMEND 5825; GFX90A-NEXT: ;;#ASMSTART 5826; GFX90A-NEXT: ; def s[6:7] 5827; GFX90A-NEXT: ;;#ASMEND 5828; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 5829; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5830; GFX90A-NEXT: s_mov_b32 s9, s4 5831; GFX90A-NEXT: ;;#ASMSTART 5832; GFX90A-NEXT: ; use s[8:9] 5833; GFX90A-NEXT: ;;#ASMEND 5834; GFX90A-NEXT: s_setpc_b64 s[30:31] 5835; 5836; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_0_0: 5837; GFX940: ; %bb.0: 5838; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5839; GFX940-NEXT: ;;#ASMSTART 5840; GFX940-NEXT: ; def s[0:1] 5841; GFX940-NEXT: ;;#ASMEND 5842; GFX940-NEXT: ;;#ASMSTART 5843; GFX940-NEXT: ; def s[2:3] 5844; GFX940-NEXT: ;;#ASMEND 5845; GFX940-NEXT: s_lshr_b32 s1, s2, 16 5846; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 5847; GFX940-NEXT: s_mov_b32 s9, s0 5848; GFX940-NEXT: ;;#ASMSTART 5849; GFX940-NEXT: ; use s[8:9] 5850; GFX940-NEXT: ;;#ASMEND 5851; GFX940-NEXT: s_setpc_b64 s[30:31] 5852 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5853 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5854 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5855 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5856 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 0, i32 0> 5857 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5858 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5859 ret void 5860} 5861 5862define void @s_shuffle_v3i16_v3i16__5_0_0() { 5863; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_0: 5864; GFX900: ; %bb.0: 5865; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5866; GFX900-NEXT: ;;#ASMSTART 5867; GFX900-NEXT: ; def s[4:5] 5868; GFX900-NEXT: ;;#ASMEND 5869; GFX900-NEXT: ;;#ASMSTART 5870; GFX900-NEXT: ; def s[6:7] 5871; GFX900-NEXT: ;;#ASMEND 5872; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 5873; GFX900-NEXT: s_mov_b32 s9, s4 5874; GFX900-NEXT: ;;#ASMSTART 5875; GFX900-NEXT: ; use s[8:9] 5876; GFX900-NEXT: ;;#ASMEND 5877; GFX900-NEXT: s_setpc_b64 s[30:31] 5878; 5879; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_0: 5880; GFX90A: ; %bb.0: 5881; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5882; GFX90A-NEXT: ;;#ASMSTART 5883; GFX90A-NEXT: ; def s[4:5] 5884; GFX90A-NEXT: ;;#ASMEND 5885; GFX90A-NEXT: ;;#ASMSTART 5886; GFX90A-NEXT: ; def s[6:7] 5887; GFX90A-NEXT: ;;#ASMEND 5888; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 5889; GFX90A-NEXT: s_mov_b32 s9, s4 5890; GFX90A-NEXT: ;;#ASMSTART 5891; GFX90A-NEXT: ; use s[8:9] 5892; GFX90A-NEXT: ;;#ASMEND 5893; GFX90A-NEXT: s_setpc_b64 s[30:31] 5894; 5895; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_0: 5896; GFX940: ; %bb.0: 5897; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5898; GFX940-NEXT: ;;#ASMSTART 5899; GFX940-NEXT: ; def s[0:1] 5900; GFX940-NEXT: ;;#ASMEND 5901; GFX940-NEXT: ;;#ASMSTART 5902; GFX940-NEXT: ; def s[2:3] 5903; GFX940-NEXT: ;;#ASMEND 5904; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 5905; GFX940-NEXT: s_mov_b32 s9, s0 5906; GFX940-NEXT: ;;#ASMSTART 5907; GFX940-NEXT: ; use s[8:9] 5908; GFX940-NEXT: ;;#ASMEND 5909; GFX940-NEXT: s_setpc_b64 s[30:31] 5910 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5911 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5912 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5913 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5914 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 0> 5915 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5916 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5917 ret void 5918} 5919 5920define void @s_shuffle_v3i16_v3i16__5_u_0() { 5921; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_0: 5922; GFX900: ; %bb.0: 5923; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5924; GFX900-NEXT: ;;#ASMSTART 5925; GFX900-NEXT: ; def s[4:5] 5926; GFX900-NEXT: ;;#ASMEND 5927; GFX900-NEXT: ;;#ASMSTART 5928; GFX900-NEXT: ; def s[6:7] 5929; GFX900-NEXT: ;;#ASMEND 5930; GFX900-NEXT: s_mov_b32 s8, s7 5931; GFX900-NEXT: s_mov_b32 s9, s4 5932; GFX900-NEXT: ;;#ASMSTART 5933; GFX900-NEXT: ; use s[8:9] 5934; GFX900-NEXT: ;;#ASMEND 5935; GFX900-NEXT: s_setpc_b64 s[30:31] 5936; 5937; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_0: 5938; GFX90A: ; %bb.0: 5939; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5940; GFX90A-NEXT: ;;#ASMSTART 5941; GFX90A-NEXT: ; def s[4:5] 5942; GFX90A-NEXT: ;;#ASMEND 5943; GFX90A-NEXT: ;;#ASMSTART 5944; GFX90A-NEXT: ; def s[6:7] 5945; GFX90A-NEXT: ;;#ASMEND 5946; GFX90A-NEXT: s_mov_b32 s8, s7 5947; GFX90A-NEXT: s_mov_b32 s9, s4 5948; GFX90A-NEXT: ;;#ASMSTART 5949; GFX90A-NEXT: ; use s[8:9] 5950; GFX90A-NEXT: ;;#ASMEND 5951; GFX90A-NEXT: s_setpc_b64 s[30:31] 5952; 5953; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_0: 5954; GFX940: ; %bb.0: 5955; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5956; GFX940-NEXT: ;;#ASMSTART 5957; GFX940-NEXT: ; def s[0:1] 5958; GFX940-NEXT: ;;#ASMEND 5959; GFX940-NEXT: ;;#ASMSTART 5960; GFX940-NEXT: ; def s[2:3] 5961; GFX940-NEXT: ;;#ASMEND 5962; GFX940-NEXT: s_mov_b32 s8, s3 5963; GFX940-NEXT: s_mov_b32 s9, s0 5964; GFX940-NEXT: ;;#ASMSTART 5965; GFX940-NEXT: ; use s[8:9] 5966; GFX940-NEXT: ;;#ASMEND 5967; GFX940-NEXT: s_setpc_b64 s[30:31] 5968 %vec0 = call <4 x i16> asm "; def $0", "=s"() 5969 %vec1 = call <4 x i16> asm "; def $0", "=s"() 5970 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5971 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 5972 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 0> 5973 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 5974 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 5975 ret void 5976} 5977 5978define void @s_shuffle_v3i16_v3i16__5_1_0() { 5979; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_0: 5980; GFX900: ; %bb.0: 5981; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5982; GFX900-NEXT: ;;#ASMSTART 5983; GFX900-NEXT: ; def s[4:5] 5984; GFX900-NEXT: ;;#ASMEND 5985; GFX900-NEXT: ;;#ASMSTART 5986; GFX900-NEXT: ; def s[6:7] 5987; GFX900-NEXT: ;;#ASMEND 5988; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 5989; GFX900-NEXT: s_mov_b32 s9, s4 5990; GFX900-NEXT: ;;#ASMSTART 5991; GFX900-NEXT: ; use s[8:9] 5992; GFX900-NEXT: ;;#ASMEND 5993; GFX900-NEXT: s_setpc_b64 s[30:31] 5994; 5995; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_0: 5996; GFX90A: ; %bb.0: 5997; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5998; GFX90A-NEXT: ;;#ASMSTART 5999; GFX90A-NEXT: ; def s[4:5] 6000; GFX90A-NEXT: ;;#ASMEND 6001; GFX90A-NEXT: ;;#ASMSTART 6002; GFX90A-NEXT: ; def s[6:7] 6003; GFX90A-NEXT: ;;#ASMEND 6004; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 6005; GFX90A-NEXT: s_mov_b32 s9, s4 6006; GFX90A-NEXT: ;;#ASMSTART 6007; GFX90A-NEXT: ; use s[8:9] 6008; GFX90A-NEXT: ;;#ASMEND 6009; GFX90A-NEXT: s_setpc_b64 s[30:31] 6010; 6011; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_0: 6012; GFX940: ; %bb.0: 6013; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6014; GFX940-NEXT: ;;#ASMSTART 6015; GFX940-NEXT: ; def s[0:1] 6016; GFX940-NEXT: ;;#ASMEND 6017; GFX940-NEXT: ;;#ASMSTART 6018; GFX940-NEXT: ; def s[2:3] 6019; GFX940-NEXT: ;;#ASMEND 6020; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 6021; GFX940-NEXT: s_mov_b32 s9, s0 6022; GFX940-NEXT: ;;#ASMSTART 6023; GFX940-NEXT: ; use s[8:9] 6024; GFX940-NEXT: ;;#ASMEND 6025; GFX940-NEXT: s_setpc_b64 s[30:31] 6026 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6027 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6028 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6029 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6030 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 0> 6031 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6032 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6033 ret void 6034} 6035 6036define void @s_shuffle_v3i16_v3i16__5_2_0() { 6037; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_0: 6038; GFX900: ; %bb.0: 6039; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6040; GFX900-NEXT: ;;#ASMSTART 6041; GFX900-NEXT: ; def s[4:5] 6042; GFX900-NEXT: ;;#ASMEND 6043; GFX900-NEXT: ;;#ASMSTART 6044; GFX900-NEXT: ; def s[6:7] 6045; GFX900-NEXT: ;;#ASMEND 6046; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 6047; GFX900-NEXT: s_mov_b32 s9, s4 6048; GFX900-NEXT: ;;#ASMSTART 6049; GFX900-NEXT: ; use s[8:9] 6050; GFX900-NEXT: ;;#ASMEND 6051; GFX900-NEXT: s_setpc_b64 s[30:31] 6052; 6053; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_0: 6054; GFX90A: ; %bb.0: 6055; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6056; GFX90A-NEXT: ;;#ASMSTART 6057; GFX90A-NEXT: ; def s[4:5] 6058; GFX90A-NEXT: ;;#ASMEND 6059; GFX90A-NEXT: ;;#ASMSTART 6060; GFX90A-NEXT: ; def s[6:7] 6061; GFX90A-NEXT: ;;#ASMEND 6062; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 6063; GFX90A-NEXT: s_mov_b32 s9, s4 6064; GFX90A-NEXT: ;;#ASMSTART 6065; GFX90A-NEXT: ; use s[8:9] 6066; GFX90A-NEXT: ;;#ASMEND 6067; GFX90A-NEXT: s_setpc_b64 s[30:31] 6068; 6069; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_0: 6070; GFX940: ; %bb.0: 6071; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6072; GFX940-NEXT: ;;#ASMSTART 6073; GFX940-NEXT: ; def s[0:1] 6074; GFX940-NEXT: ;;#ASMEND 6075; GFX940-NEXT: ;;#ASMSTART 6076; GFX940-NEXT: ; def s[2:3] 6077; GFX940-NEXT: ;;#ASMEND 6078; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 6079; GFX940-NEXT: s_mov_b32 s9, s0 6080; GFX940-NEXT: ;;#ASMSTART 6081; GFX940-NEXT: ; use s[8:9] 6082; GFX940-NEXT: ;;#ASMEND 6083; GFX940-NEXT: s_setpc_b64 s[30:31] 6084 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6085 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6086 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6087 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6088 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 0> 6089 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6090 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6091 ret void 6092} 6093 6094define void @s_shuffle_v3i16_v3i16__5_3_0() { 6095; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_0: 6096; GFX900: ; %bb.0: 6097; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6098; GFX900-NEXT: ;;#ASMSTART 6099; GFX900-NEXT: ; def s[4:5] 6100; GFX900-NEXT: ;;#ASMEND 6101; GFX900-NEXT: ;;#ASMSTART 6102; GFX900-NEXT: ; def s[6:7] 6103; GFX900-NEXT: ;;#ASMEND 6104; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s6 6105; GFX900-NEXT: s_mov_b32 s9, s4 6106; GFX900-NEXT: ;;#ASMSTART 6107; GFX900-NEXT: ; use s[8:9] 6108; GFX900-NEXT: ;;#ASMEND 6109; GFX900-NEXT: s_setpc_b64 s[30:31] 6110; 6111; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_0: 6112; GFX90A: ; %bb.0: 6113; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6114; GFX90A-NEXT: ;;#ASMSTART 6115; GFX90A-NEXT: ; def s[4:5] 6116; GFX90A-NEXT: ;;#ASMEND 6117; GFX90A-NEXT: ;;#ASMSTART 6118; GFX90A-NEXT: ; def s[6:7] 6119; GFX90A-NEXT: ;;#ASMEND 6120; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s6 6121; GFX90A-NEXT: s_mov_b32 s9, s4 6122; GFX90A-NEXT: ;;#ASMSTART 6123; GFX90A-NEXT: ; use s[8:9] 6124; GFX90A-NEXT: ;;#ASMEND 6125; GFX90A-NEXT: s_setpc_b64 s[30:31] 6126; 6127; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_0: 6128; GFX940: ; %bb.0: 6129; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6130; GFX940-NEXT: ;;#ASMSTART 6131; GFX940-NEXT: ; def s[0:1] 6132; GFX940-NEXT: ;;#ASMEND 6133; GFX940-NEXT: ;;#ASMSTART 6134; GFX940-NEXT: ; def s[2:3] 6135; GFX940-NEXT: ;;#ASMEND 6136; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 6137; GFX940-NEXT: s_mov_b32 s9, s0 6138; GFX940-NEXT: ;;#ASMSTART 6139; GFX940-NEXT: ; use s[8:9] 6140; GFX940-NEXT: ;;#ASMEND 6141; GFX940-NEXT: s_setpc_b64 s[30:31] 6142 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6143 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6144 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6145 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6146 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 0> 6147 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6148 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6149 ret void 6150} 6151 6152define void @s_shuffle_v3i16_v3i16__5_4_0() { 6153; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_0: 6154; GFX900: ; %bb.0: 6155; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6156; GFX900-NEXT: ;;#ASMSTART 6157; GFX900-NEXT: ; def s[4:5] 6158; GFX900-NEXT: ;;#ASMEND 6159; GFX900-NEXT: ;;#ASMSTART 6160; GFX900-NEXT: ; def s[6:7] 6161; GFX900-NEXT: ;;#ASMEND 6162; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s6 6163; GFX900-NEXT: s_mov_b32 s9, s4 6164; GFX900-NEXT: ;;#ASMSTART 6165; GFX900-NEXT: ; use s[8:9] 6166; GFX900-NEXT: ;;#ASMEND 6167; GFX900-NEXT: s_setpc_b64 s[30:31] 6168; 6169; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_0: 6170; GFX90A: ; %bb.0: 6171; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6172; GFX90A-NEXT: ;;#ASMSTART 6173; GFX90A-NEXT: ; def s[4:5] 6174; GFX90A-NEXT: ;;#ASMEND 6175; GFX90A-NEXT: ;;#ASMSTART 6176; GFX90A-NEXT: ; def s[6:7] 6177; GFX90A-NEXT: ;;#ASMEND 6178; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s6 6179; GFX90A-NEXT: s_mov_b32 s9, s4 6180; GFX90A-NEXT: ;;#ASMSTART 6181; GFX90A-NEXT: ; use s[8:9] 6182; GFX90A-NEXT: ;;#ASMEND 6183; GFX90A-NEXT: s_setpc_b64 s[30:31] 6184; 6185; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_0: 6186; GFX940: ; %bb.0: 6187; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6188; GFX940-NEXT: ;;#ASMSTART 6189; GFX940-NEXT: ; def s[0:1] 6190; GFX940-NEXT: ;;#ASMEND 6191; GFX940-NEXT: ;;#ASMSTART 6192; GFX940-NEXT: ; def s[2:3] 6193; GFX940-NEXT: ;;#ASMEND 6194; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 6195; GFX940-NEXT: s_mov_b32 s9, s0 6196; GFX940-NEXT: ;;#ASMSTART 6197; GFX940-NEXT: ; use s[8:9] 6198; GFX940-NEXT: ;;#ASMEND 6199; GFX940-NEXT: s_setpc_b64 s[30:31] 6200 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6201 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6202 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6203 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6204 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 0> 6205 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6206 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6207 ret void 6208} 6209 6210define void @s_shuffle_v3i16_v3i16__u_1_1() { 6211; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_1_1: 6212; GFX9: ; %bb.0: 6213; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6214; GFX9-NEXT: ;;#ASMSTART 6215; GFX9-NEXT: ; def s[8:9] 6216; GFX9-NEXT: ;;#ASMEND 6217; GFX9-NEXT: s_lshr_b32 s9, s8, 16 6218; GFX9-NEXT: ;;#ASMSTART 6219; GFX9-NEXT: ; use s[8:9] 6220; GFX9-NEXT: ;;#ASMEND 6221; GFX9-NEXT: s_setpc_b64 s[30:31] 6222 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6223 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6224 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 1, i32 1> 6225 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6226 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6227 ret void 6228} 6229 6230define void @s_shuffle_v3i16_v3i16__0_1_1() { 6231; GFX9-LABEL: s_shuffle_v3i16_v3i16__0_1_1: 6232; GFX9: ; %bb.0: 6233; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6234; GFX9-NEXT: ;;#ASMSTART 6235; GFX9-NEXT: ; def s[8:9] 6236; GFX9-NEXT: ;;#ASMEND 6237; GFX9-NEXT: s_lshr_b32 s9, s8, 16 6238; GFX9-NEXT: ;;#ASMSTART 6239; GFX9-NEXT: ; use s[8:9] 6240; GFX9-NEXT: ;;#ASMEND 6241; GFX9-NEXT: s_setpc_b64 s[30:31] 6242 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6243 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6244 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 1, i32 1> 6245 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6246 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6247 ret void 6248} 6249 6250define void @s_shuffle_v3i16_v3i16__1_1_1() { 6251; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_1_1: 6252; GFX900: ; %bb.0: 6253; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6254; GFX900-NEXT: ;;#ASMSTART 6255; GFX900-NEXT: ; def s[4:5] 6256; GFX900-NEXT: ;;#ASMEND 6257; GFX900-NEXT: s_lshr_b32 s9, s4, 16 6258; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4 6259; GFX900-NEXT: ;;#ASMSTART 6260; GFX900-NEXT: ; use s[8:9] 6261; GFX900-NEXT: ;;#ASMEND 6262; GFX900-NEXT: s_setpc_b64 s[30:31] 6263; 6264; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_1_1: 6265; GFX90A: ; %bb.0: 6266; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6267; GFX90A-NEXT: ;;#ASMSTART 6268; GFX90A-NEXT: ; def s[4:5] 6269; GFX90A-NEXT: ;;#ASMEND 6270; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 6271; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4 6272; GFX90A-NEXT: ;;#ASMSTART 6273; GFX90A-NEXT: ; use s[8:9] 6274; GFX90A-NEXT: ;;#ASMEND 6275; GFX90A-NEXT: s_setpc_b64 s[30:31] 6276; 6277; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_1_1: 6278; GFX940: ; %bb.0: 6279; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6280; GFX940-NEXT: ;;#ASMSTART 6281; GFX940-NEXT: ; def s[0:1] 6282; GFX940-NEXT: ;;#ASMEND 6283; GFX940-NEXT: s_lshr_b32 s9, s0, 16 6284; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 6285; GFX940-NEXT: ;;#ASMSTART 6286; GFX940-NEXT: ; use s[8:9] 6287; GFX940-NEXT: ;;#ASMEND 6288; GFX940-NEXT: s_setpc_b64 s[30:31] 6289 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6290 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6291 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 1, i32 1> 6292 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6293 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6294 ret void 6295} 6296 6297define void @s_shuffle_v3i16_v3i16__2_1_1() { 6298; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_1_1: 6299; GFX900: ; %bb.0: 6300; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6301; GFX900-NEXT: ;;#ASMSTART 6302; GFX900-NEXT: ; def s[4:5] 6303; GFX900-NEXT: ;;#ASMEND 6304; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 6305; GFX900-NEXT: s_lshr_b32 s9, s4, 16 6306; GFX900-NEXT: ;;#ASMSTART 6307; GFX900-NEXT: ; use s[8:9] 6308; GFX900-NEXT: ;;#ASMEND 6309; GFX900-NEXT: s_setpc_b64 s[30:31] 6310; 6311; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_1_1: 6312; GFX90A: ; %bb.0: 6313; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6314; GFX90A-NEXT: ;;#ASMSTART 6315; GFX90A-NEXT: ; def s[4:5] 6316; GFX90A-NEXT: ;;#ASMEND 6317; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 6318; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 6319; GFX90A-NEXT: ;;#ASMSTART 6320; GFX90A-NEXT: ; use s[8:9] 6321; GFX90A-NEXT: ;;#ASMEND 6322; GFX90A-NEXT: s_setpc_b64 s[30:31] 6323; 6324; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_1_1: 6325; GFX940: ; %bb.0: 6326; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6327; GFX940-NEXT: ;;#ASMSTART 6328; GFX940-NEXT: ; def s[0:1] 6329; GFX940-NEXT: ;;#ASMEND 6330; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 6331; GFX940-NEXT: s_lshr_b32 s9, s0, 16 6332; GFX940-NEXT: ;;#ASMSTART 6333; GFX940-NEXT: ; use s[8:9] 6334; GFX940-NEXT: ;;#ASMEND 6335; GFX940-NEXT: s_setpc_b64 s[30:31] 6336 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6337 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6338 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 1, i32 1> 6339 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6340 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6341 ret void 6342} 6343 6344define void @s_shuffle_v3i16_v3i16__3_1_1() { 6345; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_1_1: 6346; GFX9: ; %bb.0: 6347; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6348; GFX9-NEXT: ;;#ASMSTART 6349; GFX9-NEXT: ; def s[8:9] 6350; GFX9-NEXT: ;;#ASMEND 6351; GFX9-NEXT: s_lshr_b32 s9, s8, 16 6352; GFX9-NEXT: ;;#ASMSTART 6353; GFX9-NEXT: ; use s[8:9] 6354; GFX9-NEXT: ;;#ASMEND 6355; GFX9-NEXT: s_setpc_b64 s[30:31] 6356 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6357 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6358 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 1, i32 1> 6359 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6360 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6361 ret void 6362} 6363 6364define void @s_shuffle_v3i16_v3i16__4_1_1() { 6365; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_1_1: 6366; GFX900: ; %bb.0: 6367; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6368; GFX900-NEXT: ;;#ASMSTART 6369; GFX900-NEXT: ; def s[4:5] 6370; GFX900-NEXT: ;;#ASMEND 6371; GFX900-NEXT: ;;#ASMSTART 6372; GFX900-NEXT: ; def s[6:7] 6373; GFX900-NEXT: ;;#ASMEND 6374; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s4 6375; GFX900-NEXT: s_lshr_b32 s9, s4, 16 6376; GFX900-NEXT: ;;#ASMSTART 6377; GFX900-NEXT: ; use s[8:9] 6378; GFX900-NEXT: ;;#ASMEND 6379; GFX900-NEXT: s_setpc_b64 s[30:31] 6380; 6381; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_1_1: 6382; GFX90A: ; %bb.0: 6383; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6384; GFX90A-NEXT: ;;#ASMSTART 6385; GFX90A-NEXT: ; def s[4:5] 6386; GFX90A-NEXT: ;;#ASMEND 6387; GFX90A-NEXT: ;;#ASMSTART 6388; GFX90A-NEXT: ; def s[6:7] 6389; GFX90A-NEXT: ;;#ASMEND 6390; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s4 6391; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 6392; GFX90A-NEXT: ;;#ASMSTART 6393; GFX90A-NEXT: ; use s[8:9] 6394; GFX90A-NEXT: ;;#ASMEND 6395; GFX90A-NEXT: s_setpc_b64 s[30:31] 6396; 6397; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_1_1: 6398; GFX940: ; %bb.0: 6399; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6400; GFX940-NEXT: ;;#ASMSTART 6401; GFX940-NEXT: ; def s[0:1] 6402; GFX940-NEXT: ;;#ASMEND 6403; GFX940-NEXT: ;;#ASMSTART 6404; GFX940-NEXT: ; def s[2:3] 6405; GFX940-NEXT: ;;#ASMEND 6406; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 6407; GFX940-NEXT: s_lshr_b32 s9, s0, 16 6408; GFX940-NEXT: ;;#ASMSTART 6409; GFX940-NEXT: ; use s[8:9] 6410; GFX940-NEXT: ;;#ASMEND 6411; GFX940-NEXT: s_setpc_b64 s[30:31] 6412 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6413 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6414 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6415 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6416 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 1, i32 1> 6417 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6418 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6419 ret void 6420} 6421 6422define void @s_shuffle_v3i16_v3i16__5_1_1() { 6423; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_1: 6424; GFX900: ; %bb.0: 6425; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6426; GFX900-NEXT: ;;#ASMSTART 6427; GFX900-NEXT: ; def s[4:5] 6428; GFX900-NEXT: ;;#ASMEND 6429; GFX900-NEXT: ;;#ASMSTART 6430; GFX900-NEXT: ; def s[6:7] 6431; GFX900-NEXT: ;;#ASMEND 6432; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 6433; GFX900-NEXT: s_lshr_b32 s9, s4, 16 6434; GFX900-NEXT: ;;#ASMSTART 6435; GFX900-NEXT: ; use s[8:9] 6436; GFX900-NEXT: ;;#ASMEND 6437; GFX900-NEXT: s_setpc_b64 s[30:31] 6438; 6439; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_1: 6440; GFX90A: ; %bb.0: 6441; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6442; GFX90A-NEXT: ;;#ASMSTART 6443; GFX90A-NEXT: ; def s[4:5] 6444; GFX90A-NEXT: ;;#ASMEND 6445; GFX90A-NEXT: ;;#ASMSTART 6446; GFX90A-NEXT: ; def s[6:7] 6447; GFX90A-NEXT: ;;#ASMEND 6448; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 6449; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 6450; GFX90A-NEXT: ;;#ASMSTART 6451; GFX90A-NEXT: ; use s[8:9] 6452; GFX90A-NEXT: ;;#ASMEND 6453; GFX90A-NEXT: s_setpc_b64 s[30:31] 6454; 6455; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_1: 6456; GFX940: ; %bb.0: 6457; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6458; GFX940-NEXT: ;;#ASMSTART 6459; GFX940-NEXT: ; def s[0:1] 6460; GFX940-NEXT: ;;#ASMEND 6461; GFX940-NEXT: ;;#ASMSTART 6462; GFX940-NEXT: ; def s[2:3] 6463; GFX940-NEXT: ;;#ASMEND 6464; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 6465; GFX940-NEXT: s_lshr_b32 s9, s0, 16 6466; GFX940-NEXT: ;;#ASMSTART 6467; GFX940-NEXT: ; use s[8:9] 6468; GFX940-NEXT: ;;#ASMEND 6469; GFX940-NEXT: s_setpc_b64 s[30:31] 6470 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6471 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6472 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6473 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6474 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 1> 6475 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6476 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6477 ret void 6478} 6479 6480define void @s_shuffle_v3i16_v3i16__5_u_1() { 6481; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_1: 6482; GFX900: ; %bb.0: 6483; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6484; GFX900-NEXT: ;;#ASMSTART 6485; GFX900-NEXT: ; def s[4:5] 6486; GFX900-NEXT: ;;#ASMEND 6487; GFX900-NEXT: ;;#ASMSTART 6488; GFX900-NEXT: ; def s[6:7] 6489; GFX900-NEXT: ;;#ASMEND 6490; GFX900-NEXT: s_lshr_b32 s9, s4, 16 6491; GFX900-NEXT: s_mov_b32 s8, s7 6492; GFX900-NEXT: ;;#ASMSTART 6493; GFX900-NEXT: ; use s[8:9] 6494; GFX900-NEXT: ;;#ASMEND 6495; GFX900-NEXT: s_setpc_b64 s[30:31] 6496; 6497; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_1: 6498; GFX90A: ; %bb.0: 6499; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6500; GFX90A-NEXT: ;;#ASMSTART 6501; GFX90A-NEXT: ; def s[4:5] 6502; GFX90A-NEXT: ;;#ASMEND 6503; GFX90A-NEXT: ;;#ASMSTART 6504; GFX90A-NEXT: ; def s[6:7] 6505; GFX90A-NEXT: ;;#ASMEND 6506; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 6507; GFX90A-NEXT: s_mov_b32 s8, s7 6508; GFX90A-NEXT: ;;#ASMSTART 6509; GFX90A-NEXT: ; use s[8:9] 6510; GFX90A-NEXT: ;;#ASMEND 6511; GFX90A-NEXT: s_setpc_b64 s[30:31] 6512; 6513; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_1: 6514; GFX940: ; %bb.0: 6515; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6516; GFX940-NEXT: ;;#ASMSTART 6517; GFX940-NEXT: ; def s[0:1] 6518; GFX940-NEXT: ;;#ASMEND 6519; GFX940-NEXT: ;;#ASMSTART 6520; GFX940-NEXT: ; def s[2:3] 6521; GFX940-NEXT: ;;#ASMEND 6522; GFX940-NEXT: s_lshr_b32 s9, s0, 16 6523; GFX940-NEXT: s_mov_b32 s8, s3 6524; GFX940-NEXT: ;;#ASMSTART 6525; GFX940-NEXT: ; use s[8:9] 6526; GFX940-NEXT: ;;#ASMEND 6527; GFX940-NEXT: s_setpc_b64 s[30:31] 6528 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6529 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6530 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6531 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6532 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 1> 6533 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6534 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6535 ret void 6536} 6537 6538define void @s_shuffle_v3i16_v3i16__5_0_1() { 6539; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_1: 6540; GFX900: ; %bb.0: 6541; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6542; GFX900-NEXT: ;;#ASMSTART 6543; GFX900-NEXT: ; def s[4:5] 6544; GFX900-NEXT: ;;#ASMEND 6545; GFX900-NEXT: ;;#ASMSTART 6546; GFX900-NEXT: ; def s[6:7] 6547; GFX900-NEXT: ;;#ASMEND 6548; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 6549; GFX900-NEXT: s_lshr_b32 s9, s4, 16 6550; GFX900-NEXT: ;;#ASMSTART 6551; GFX900-NEXT: ; use s[8:9] 6552; GFX900-NEXT: ;;#ASMEND 6553; GFX900-NEXT: s_setpc_b64 s[30:31] 6554; 6555; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_1: 6556; GFX90A: ; %bb.0: 6557; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6558; GFX90A-NEXT: ;;#ASMSTART 6559; GFX90A-NEXT: ; def s[4:5] 6560; GFX90A-NEXT: ;;#ASMEND 6561; GFX90A-NEXT: ;;#ASMSTART 6562; GFX90A-NEXT: ; def s[6:7] 6563; GFX90A-NEXT: ;;#ASMEND 6564; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 6565; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 6566; GFX90A-NEXT: ;;#ASMSTART 6567; GFX90A-NEXT: ; use s[8:9] 6568; GFX90A-NEXT: ;;#ASMEND 6569; GFX90A-NEXT: s_setpc_b64 s[30:31] 6570; 6571; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_1: 6572; GFX940: ; %bb.0: 6573; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6574; GFX940-NEXT: ;;#ASMSTART 6575; GFX940-NEXT: ; def s[0:1] 6576; GFX940-NEXT: ;;#ASMEND 6577; GFX940-NEXT: ;;#ASMSTART 6578; GFX940-NEXT: ; def s[2:3] 6579; GFX940-NEXT: ;;#ASMEND 6580; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 6581; GFX940-NEXT: s_lshr_b32 s9, s0, 16 6582; GFX940-NEXT: ;;#ASMSTART 6583; GFX940-NEXT: ; use s[8:9] 6584; GFX940-NEXT: ;;#ASMEND 6585; GFX940-NEXT: s_setpc_b64 s[30:31] 6586 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6587 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6588 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6589 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6590 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 1> 6591 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6592 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6593 ret void 6594} 6595 6596define void @s_shuffle_v3i16_v3i16__5_2_1() { 6597; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_1: 6598; GFX900: ; %bb.0: 6599; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6600; GFX900-NEXT: ;;#ASMSTART 6601; GFX900-NEXT: ; def s[4:5] 6602; GFX900-NEXT: ;;#ASMEND 6603; GFX900-NEXT: ;;#ASMSTART 6604; GFX900-NEXT: ; def s[6:7] 6605; GFX900-NEXT: ;;#ASMEND 6606; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 6607; GFX900-NEXT: s_lshr_b32 s9, s4, 16 6608; GFX900-NEXT: ;;#ASMSTART 6609; GFX900-NEXT: ; use s[8:9] 6610; GFX900-NEXT: ;;#ASMEND 6611; GFX900-NEXT: s_setpc_b64 s[30:31] 6612; 6613; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_1: 6614; GFX90A: ; %bb.0: 6615; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6616; GFX90A-NEXT: ;;#ASMSTART 6617; GFX90A-NEXT: ; def s[4:5] 6618; GFX90A-NEXT: ;;#ASMEND 6619; GFX90A-NEXT: ;;#ASMSTART 6620; GFX90A-NEXT: ; def s[6:7] 6621; GFX90A-NEXT: ;;#ASMEND 6622; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 6623; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 6624; GFX90A-NEXT: ;;#ASMSTART 6625; GFX90A-NEXT: ; use s[8:9] 6626; GFX90A-NEXT: ;;#ASMEND 6627; GFX90A-NEXT: s_setpc_b64 s[30:31] 6628; 6629; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_1: 6630; GFX940: ; %bb.0: 6631; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6632; GFX940-NEXT: ;;#ASMSTART 6633; GFX940-NEXT: ; def s[0:1] 6634; GFX940-NEXT: ;;#ASMEND 6635; GFX940-NEXT: ;;#ASMSTART 6636; GFX940-NEXT: ; def s[2:3] 6637; GFX940-NEXT: ;;#ASMEND 6638; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 6639; GFX940-NEXT: s_lshr_b32 s9, s0, 16 6640; GFX940-NEXT: ;;#ASMSTART 6641; GFX940-NEXT: ; use s[8:9] 6642; GFX940-NEXT: ;;#ASMEND 6643; GFX940-NEXT: s_setpc_b64 s[30:31] 6644 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6645 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6646 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6647 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6648 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 1> 6649 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6650 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6651 ret void 6652} 6653 6654define void @s_shuffle_v3i16_v3i16__5_3_1() { 6655; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_1: 6656; GFX900: ; %bb.0: 6657; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6658; GFX900-NEXT: ;;#ASMSTART 6659; GFX900-NEXT: ; def s[4:5] 6660; GFX900-NEXT: ;;#ASMEND 6661; GFX900-NEXT: ;;#ASMSTART 6662; GFX900-NEXT: ; def s[6:7] 6663; GFX900-NEXT: ;;#ASMEND 6664; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s6 6665; GFX900-NEXT: s_lshr_b32 s9, s4, 16 6666; GFX900-NEXT: ;;#ASMSTART 6667; GFX900-NEXT: ; use s[8:9] 6668; GFX900-NEXT: ;;#ASMEND 6669; GFX900-NEXT: s_setpc_b64 s[30:31] 6670; 6671; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_1: 6672; GFX90A: ; %bb.0: 6673; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6674; GFX90A-NEXT: ;;#ASMSTART 6675; GFX90A-NEXT: ; def s[4:5] 6676; GFX90A-NEXT: ;;#ASMEND 6677; GFX90A-NEXT: ;;#ASMSTART 6678; GFX90A-NEXT: ; def s[6:7] 6679; GFX90A-NEXT: ;;#ASMEND 6680; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s6 6681; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 6682; GFX90A-NEXT: ;;#ASMSTART 6683; GFX90A-NEXT: ; use s[8:9] 6684; GFX90A-NEXT: ;;#ASMEND 6685; GFX90A-NEXT: s_setpc_b64 s[30:31] 6686; 6687; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_1: 6688; GFX940: ; %bb.0: 6689; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6690; GFX940-NEXT: ;;#ASMSTART 6691; GFX940-NEXT: ; def s[0:1] 6692; GFX940-NEXT: ;;#ASMEND 6693; GFX940-NEXT: ;;#ASMSTART 6694; GFX940-NEXT: ; def s[2:3] 6695; GFX940-NEXT: ;;#ASMEND 6696; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 6697; GFX940-NEXT: s_lshr_b32 s9, s0, 16 6698; GFX940-NEXT: ;;#ASMSTART 6699; GFX940-NEXT: ; use s[8:9] 6700; GFX940-NEXT: ;;#ASMEND 6701; GFX940-NEXT: s_setpc_b64 s[30:31] 6702 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6703 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6704 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6705 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6706 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 1> 6707 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6708 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6709 ret void 6710} 6711 6712define void @s_shuffle_v3i16_v3i16__5_4_1() { 6713; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_1: 6714; GFX900: ; %bb.0: 6715; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6716; GFX900-NEXT: ;;#ASMSTART 6717; GFX900-NEXT: ; def s[4:5] 6718; GFX900-NEXT: ;;#ASMEND 6719; GFX900-NEXT: ;;#ASMSTART 6720; GFX900-NEXT: ; def s[6:7] 6721; GFX900-NEXT: ;;#ASMEND 6722; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s6 6723; GFX900-NEXT: s_lshr_b32 s9, s4, 16 6724; GFX900-NEXT: ;;#ASMSTART 6725; GFX900-NEXT: ; use s[8:9] 6726; GFX900-NEXT: ;;#ASMEND 6727; GFX900-NEXT: s_setpc_b64 s[30:31] 6728; 6729; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_1: 6730; GFX90A: ; %bb.0: 6731; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6732; GFX90A-NEXT: ;;#ASMSTART 6733; GFX90A-NEXT: ; def s[4:5] 6734; GFX90A-NEXT: ;;#ASMEND 6735; GFX90A-NEXT: ;;#ASMSTART 6736; GFX90A-NEXT: ; def s[6:7] 6737; GFX90A-NEXT: ;;#ASMEND 6738; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s6 6739; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 6740; GFX90A-NEXT: ;;#ASMSTART 6741; GFX90A-NEXT: ; use s[8:9] 6742; GFX90A-NEXT: ;;#ASMEND 6743; GFX90A-NEXT: s_setpc_b64 s[30:31] 6744; 6745; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_1: 6746; GFX940: ; %bb.0: 6747; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6748; GFX940-NEXT: ;;#ASMSTART 6749; GFX940-NEXT: ; def s[0:1] 6750; GFX940-NEXT: ;;#ASMEND 6751; GFX940-NEXT: ;;#ASMSTART 6752; GFX940-NEXT: ; def s[2:3] 6753; GFX940-NEXT: ;;#ASMEND 6754; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 6755; GFX940-NEXT: s_lshr_b32 s9, s0, 16 6756; GFX940-NEXT: ;;#ASMSTART 6757; GFX940-NEXT: ; use s[8:9] 6758; GFX940-NEXT: ;;#ASMEND 6759; GFX940-NEXT: s_setpc_b64 s[30:31] 6760 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6761 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6762 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6763 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6764 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 1> 6765 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6766 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6767 ret void 6768} 6769 6770define void @s_shuffle_v3i16_v3i16__u_2_2() { 6771; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_2_2: 6772; GFX9: ; %bb.0: 6773; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6774; GFX9-NEXT: ;;#ASMSTART 6775; GFX9-NEXT: ; def s[8:9] 6776; GFX9-NEXT: ;;#ASMEND 6777; GFX9-NEXT: s_lshl_b32 s8, s9, 16 6778; GFX9-NEXT: ;;#ASMSTART 6779; GFX9-NEXT: ; use s[8:9] 6780; GFX9-NEXT: ;;#ASMEND 6781; GFX9-NEXT: s_setpc_b64 s[30:31] 6782 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6783 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6784 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 2, i32 2> 6785 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6786 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6787 ret void 6788} 6789 6790define void @s_shuffle_v3i16_v3i16__0_2_2() { 6791; GFX9-LABEL: s_shuffle_v3i16_v3i16__0_2_2: 6792; GFX9: ; %bb.0: 6793; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6794; GFX9-NEXT: ;;#ASMSTART 6795; GFX9-NEXT: ; def s[8:9] 6796; GFX9-NEXT: ;;#ASMEND 6797; GFX9-NEXT: s_pack_ll_b32_b16 s8, s8, s9 6798; GFX9-NEXT: ;;#ASMSTART 6799; GFX9-NEXT: ; use s[8:9] 6800; GFX9-NEXT: ;;#ASMEND 6801; GFX9-NEXT: s_setpc_b64 s[30:31] 6802 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6803 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6804 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 2, i32 2> 6805 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6806 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6807 ret void 6808} 6809 6810define void @s_shuffle_v3i16_v3i16__1_2_2() { 6811; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_2_2: 6812; GFX900: ; %bb.0: 6813; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6814; GFX900-NEXT: ;;#ASMSTART 6815; GFX900-NEXT: ; def s[8:9] 6816; GFX900-NEXT: ;;#ASMEND 6817; GFX900-NEXT: s_lshr_b32 s4, s8, 16 6818; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 6819; GFX900-NEXT: ;;#ASMSTART 6820; GFX900-NEXT: ; use s[8:9] 6821; GFX900-NEXT: ;;#ASMEND 6822; GFX900-NEXT: s_setpc_b64 s[30:31] 6823; 6824; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_2_2: 6825; GFX90A: ; %bb.0: 6826; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6827; GFX90A-NEXT: ;;#ASMSTART 6828; GFX90A-NEXT: ; def s[8:9] 6829; GFX90A-NEXT: ;;#ASMEND 6830; GFX90A-NEXT: s_lshr_b32 s4, s8, 16 6831; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 6832; GFX90A-NEXT: ;;#ASMSTART 6833; GFX90A-NEXT: ; use s[8:9] 6834; GFX90A-NEXT: ;;#ASMEND 6835; GFX90A-NEXT: s_setpc_b64 s[30:31] 6836; 6837; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_2_2: 6838; GFX940: ; %bb.0: 6839; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6840; GFX940-NEXT: ;;#ASMSTART 6841; GFX940-NEXT: ; def s[8:9] 6842; GFX940-NEXT: ;;#ASMEND 6843; GFX940-NEXT: s_lshr_b32 s0, s8, 16 6844; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 6845; GFX940-NEXT: ;;#ASMSTART 6846; GFX940-NEXT: ; use s[8:9] 6847; GFX940-NEXT: ;;#ASMEND 6848; GFX940-NEXT: s_setpc_b64 s[30:31] 6849 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6850 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6851 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 2, i32 2> 6852 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6853 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6854 ret void 6855} 6856 6857define void @s_shuffle_v3i16_v3i16__2_2_2() { 6858; GFX9-LABEL: s_shuffle_v3i16_v3i16__2_2_2: 6859; GFX9: ; %bb.0: 6860; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6861; GFX9-NEXT: ;;#ASMSTART 6862; GFX9-NEXT: ; def s[8:9] 6863; GFX9-NEXT: ;;#ASMEND 6864; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9 6865; GFX9-NEXT: ;;#ASMSTART 6866; GFX9-NEXT: ; use s[8:9] 6867; GFX9-NEXT: ;;#ASMEND 6868; GFX9-NEXT: s_setpc_b64 s[30:31] 6869 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6870 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6871 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 2, i32 2> 6872 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6873 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6874 ret void 6875} 6876 6877define void @s_shuffle_v3i16_v3i16__3_2_2() { 6878; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_2_2: 6879; GFX9: ; %bb.0: 6880; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6881; GFX9-NEXT: ;;#ASMSTART 6882; GFX9-NEXT: ; def s[8:9] 6883; GFX9-NEXT: ;;#ASMEND 6884; GFX9-NEXT: s_lshl_b32 s8, s9, 16 6885; GFX9-NEXT: ;;#ASMSTART 6886; GFX9-NEXT: ; use s[8:9] 6887; GFX9-NEXT: ;;#ASMEND 6888; GFX9-NEXT: s_setpc_b64 s[30:31] 6889 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6890 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6891 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 2, i32 2> 6892 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6893 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6894 ret void 6895} 6896 6897define void @s_shuffle_v3i16_v3i16__4_2_2() { 6898; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_2_2: 6899; GFX900: ; %bb.0: 6900; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6901; GFX900-NEXT: ;;#ASMSTART 6902; GFX900-NEXT: ; def s[4:5] 6903; GFX900-NEXT: ;;#ASMEND 6904; GFX900-NEXT: ;;#ASMSTART 6905; GFX900-NEXT: ; def s[8:9] 6906; GFX900-NEXT: ;;#ASMEND 6907; GFX900-NEXT: s_lshr_b32 s4, s4, 16 6908; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 6909; GFX900-NEXT: ;;#ASMSTART 6910; GFX900-NEXT: ; use s[8:9] 6911; GFX900-NEXT: ;;#ASMEND 6912; GFX900-NEXT: s_setpc_b64 s[30:31] 6913; 6914; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_2_2: 6915; GFX90A: ; %bb.0: 6916; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6917; GFX90A-NEXT: ;;#ASMSTART 6918; GFX90A-NEXT: ; def s[4:5] 6919; GFX90A-NEXT: ;;#ASMEND 6920; GFX90A-NEXT: ;;#ASMSTART 6921; GFX90A-NEXT: ; def s[8:9] 6922; GFX90A-NEXT: ;;#ASMEND 6923; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 6924; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 6925; GFX90A-NEXT: ;;#ASMSTART 6926; GFX90A-NEXT: ; use s[8:9] 6927; GFX90A-NEXT: ;;#ASMEND 6928; GFX90A-NEXT: s_setpc_b64 s[30:31] 6929; 6930; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_2_2: 6931; GFX940: ; %bb.0: 6932; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6933; GFX940-NEXT: ;;#ASMSTART 6934; GFX940-NEXT: ; def s[0:1] 6935; GFX940-NEXT: ;;#ASMEND 6936; GFX940-NEXT: ;;#ASMSTART 6937; GFX940-NEXT: ; def s[8:9] 6938; GFX940-NEXT: ;;#ASMEND 6939; GFX940-NEXT: s_lshr_b32 s0, s0, 16 6940; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 6941; GFX940-NEXT: ;;#ASMSTART 6942; GFX940-NEXT: ; use s[8:9] 6943; GFX940-NEXT: ;;#ASMEND 6944; GFX940-NEXT: s_setpc_b64 s[30:31] 6945 %vec0 = call <4 x i16> asm "; def $0", "=s"() 6946 %vec1 = call <4 x i16> asm "; def $0", "=s"() 6947 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6948 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 6949 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 2, i32 2> 6950 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 6951 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 6952 ret void 6953} 6954 6955define void @s_shuffle_v3i16_v3i16__5_2_2() { 6956; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_2: 6957; GFX900: ; %bb.0: 6958; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6959; GFX900-NEXT: ;;#ASMSTART 6960; GFX900-NEXT: ; def s[8:9] 6961; GFX900-NEXT: ;;#ASMEND 6962; GFX900-NEXT: ;;#ASMSTART 6963; GFX900-NEXT: ; def s[4:5] 6964; GFX900-NEXT: ;;#ASMEND 6965; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9 6966; GFX900-NEXT: ;;#ASMSTART 6967; GFX900-NEXT: ; use s[8:9] 6968; GFX900-NEXT: ;;#ASMEND 6969; GFX900-NEXT: s_setpc_b64 s[30:31] 6970; 6971; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_2: 6972; GFX90A: ; %bb.0: 6973; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6974; GFX90A-NEXT: ;;#ASMSTART 6975; GFX90A-NEXT: ; def s[8:9] 6976; GFX90A-NEXT: ;;#ASMEND 6977; GFX90A-NEXT: ;;#ASMSTART 6978; GFX90A-NEXT: ; def s[4:5] 6979; GFX90A-NEXT: ;;#ASMEND 6980; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9 6981; GFX90A-NEXT: ;;#ASMSTART 6982; GFX90A-NEXT: ; use s[8:9] 6983; GFX90A-NEXT: ;;#ASMEND 6984; GFX90A-NEXT: s_setpc_b64 s[30:31] 6985; 6986; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_2: 6987; GFX940: ; %bb.0: 6988; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6989; GFX940-NEXT: ;;#ASMSTART 6990; GFX940-NEXT: ; def s[8:9] 6991; GFX940-NEXT: ;;#ASMEND 6992; GFX940-NEXT: ;;#ASMSTART 6993; GFX940-NEXT: ; def s[0:1] 6994; GFX940-NEXT: ;;#ASMEND 6995; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 6996; GFX940-NEXT: ;;#ASMSTART 6997; GFX940-NEXT: ; use s[8:9] 6998; GFX940-NEXT: ;;#ASMEND 6999; GFX940-NEXT: s_setpc_b64 s[30:31] 7000 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7001 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7002 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7003 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7004 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 2> 7005 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7006 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7007 ret void 7008} 7009 7010define void @s_shuffle_v3i16_v3i16__5_u_2() { 7011; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_2: 7012; GFX900: ; %bb.0: 7013; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7014; GFX900-NEXT: ;;#ASMSTART 7015; GFX900-NEXT: ; def s[8:9] 7016; GFX900-NEXT: ;;#ASMEND 7017; GFX900-NEXT: ;;#ASMSTART 7018; GFX900-NEXT: ; def s[4:5] 7019; GFX900-NEXT: ;;#ASMEND 7020; GFX900-NEXT: s_mov_b32 s8, s5 7021; GFX900-NEXT: ;;#ASMSTART 7022; GFX900-NEXT: ; use s[8:9] 7023; GFX900-NEXT: ;;#ASMEND 7024; GFX900-NEXT: s_setpc_b64 s[30:31] 7025; 7026; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_2: 7027; GFX90A: ; %bb.0: 7028; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7029; GFX90A-NEXT: ;;#ASMSTART 7030; GFX90A-NEXT: ; def s[8:9] 7031; GFX90A-NEXT: ;;#ASMEND 7032; GFX90A-NEXT: ;;#ASMSTART 7033; GFX90A-NEXT: ; def s[4:5] 7034; GFX90A-NEXT: ;;#ASMEND 7035; GFX90A-NEXT: s_mov_b32 s8, s5 7036; GFX90A-NEXT: ;;#ASMSTART 7037; GFX90A-NEXT: ; use s[8:9] 7038; GFX90A-NEXT: ;;#ASMEND 7039; GFX90A-NEXT: s_setpc_b64 s[30:31] 7040; 7041; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_2: 7042; GFX940: ; %bb.0: 7043; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7044; GFX940-NEXT: ;;#ASMSTART 7045; GFX940-NEXT: ; def s[8:9] 7046; GFX940-NEXT: ;;#ASMEND 7047; GFX940-NEXT: ;;#ASMSTART 7048; GFX940-NEXT: ; def s[0:1] 7049; GFX940-NEXT: ;;#ASMEND 7050; GFX940-NEXT: s_mov_b32 s8, s1 7051; GFX940-NEXT: ;;#ASMSTART 7052; GFX940-NEXT: ; use s[8:9] 7053; GFX940-NEXT: ;;#ASMEND 7054; GFX940-NEXT: s_setpc_b64 s[30:31] 7055 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7056 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7057 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7058 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7059 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 2> 7060 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7061 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7062 ret void 7063} 7064 7065define void @s_shuffle_v3i16_v3i16__5_0_2() { 7066; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_2: 7067; GFX900: ; %bb.0: 7068; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7069; GFX900-NEXT: ;;#ASMSTART 7070; GFX900-NEXT: ; def s[8:9] 7071; GFX900-NEXT: ;;#ASMEND 7072; GFX900-NEXT: ;;#ASMSTART 7073; GFX900-NEXT: ; def s[4:5] 7074; GFX900-NEXT: ;;#ASMEND 7075; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s8 7076; GFX900-NEXT: ;;#ASMSTART 7077; GFX900-NEXT: ; use s[8:9] 7078; GFX900-NEXT: ;;#ASMEND 7079; GFX900-NEXT: s_setpc_b64 s[30:31] 7080; 7081; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_2: 7082; GFX90A: ; %bb.0: 7083; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7084; GFX90A-NEXT: ;;#ASMSTART 7085; GFX90A-NEXT: ; def s[8:9] 7086; GFX90A-NEXT: ;;#ASMEND 7087; GFX90A-NEXT: ;;#ASMSTART 7088; GFX90A-NEXT: ; def s[4:5] 7089; GFX90A-NEXT: ;;#ASMEND 7090; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s8 7091; GFX90A-NEXT: ;;#ASMSTART 7092; GFX90A-NEXT: ; use s[8:9] 7093; GFX90A-NEXT: ;;#ASMEND 7094; GFX90A-NEXT: s_setpc_b64 s[30:31] 7095; 7096; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_2: 7097; GFX940: ; %bb.0: 7098; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7099; GFX940-NEXT: ;;#ASMSTART 7100; GFX940-NEXT: ; def s[8:9] 7101; GFX940-NEXT: ;;#ASMEND 7102; GFX940-NEXT: ;;#ASMSTART 7103; GFX940-NEXT: ; def s[0:1] 7104; GFX940-NEXT: ;;#ASMEND 7105; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s8 7106; GFX940-NEXT: ;;#ASMSTART 7107; GFX940-NEXT: ; use s[8:9] 7108; GFX940-NEXT: ;;#ASMEND 7109; GFX940-NEXT: s_setpc_b64 s[30:31] 7110 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7111 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7112 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7113 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7114 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 2> 7115 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7116 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7117 ret void 7118} 7119 7120define void @s_shuffle_v3i16_v3i16__5_1_2() { 7121; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_2: 7122; GFX900: ; %bb.0: 7123; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7124; GFX900-NEXT: ;;#ASMSTART 7125; GFX900-NEXT: ; def s[8:9] 7126; GFX900-NEXT: ;;#ASMEND 7127; GFX900-NEXT: ;;#ASMSTART 7128; GFX900-NEXT: ; def s[4:5] 7129; GFX900-NEXT: ;;#ASMEND 7130; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s8 7131; GFX900-NEXT: ;;#ASMSTART 7132; GFX900-NEXT: ; use s[8:9] 7133; GFX900-NEXT: ;;#ASMEND 7134; GFX900-NEXT: s_setpc_b64 s[30:31] 7135; 7136; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_2: 7137; GFX90A: ; %bb.0: 7138; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7139; GFX90A-NEXT: ;;#ASMSTART 7140; GFX90A-NEXT: ; def s[8:9] 7141; GFX90A-NEXT: ;;#ASMEND 7142; GFX90A-NEXT: ;;#ASMSTART 7143; GFX90A-NEXT: ; def s[4:5] 7144; GFX90A-NEXT: ;;#ASMEND 7145; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s8 7146; GFX90A-NEXT: ;;#ASMSTART 7147; GFX90A-NEXT: ; use s[8:9] 7148; GFX90A-NEXT: ;;#ASMEND 7149; GFX90A-NEXT: s_setpc_b64 s[30:31] 7150; 7151; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_2: 7152; GFX940: ; %bb.0: 7153; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7154; GFX940-NEXT: ;;#ASMSTART 7155; GFX940-NEXT: ; def s[8:9] 7156; GFX940-NEXT: ;;#ASMEND 7157; GFX940-NEXT: ;;#ASMSTART 7158; GFX940-NEXT: ; def s[0:1] 7159; GFX940-NEXT: ;;#ASMEND 7160; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s8 7161; GFX940-NEXT: ;;#ASMSTART 7162; GFX940-NEXT: ; use s[8:9] 7163; GFX940-NEXT: ;;#ASMEND 7164; GFX940-NEXT: s_setpc_b64 s[30:31] 7165 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7166 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7167 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7168 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7169 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 2> 7170 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7171 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7172 ret void 7173} 7174 7175define void @s_shuffle_v3i16_v3i16__5_3_2() { 7176; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_2: 7177; GFX900: ; %bb.0: 7178; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7179; GFX900-NEXT: ;;#ASMSTART 7180; GFX900-NEXT: ; def s[8:9] 7181; GFX900-NEXT: ;;#ASMEND 7182; GFX900-NEXT: ;;#ASMSTART 7183; GFX900-NEXT: ; def s[4:5] 7184; GFX900-NEXT: ;;#ASMEND 7185; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7186; GFX900-NEXT: ;;#ASMSTART 7187; GFX900-NEXT: ; use s[8:9] 7188; GFX900-NEXT: ;;#ASMEND 7189; GFX900-NEXT: s_setpc_b64 s[30:31] 7190; 7191; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_2: 7192; GFX90A: ; %bb.0: 7193; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7194; GFX90A-NEXT: ;;#ASMSTART 7195; GFX90A-NEXT: ; def s[8:9] 7196; GFX90A-NEXT: ;;#ASMEND 7197; GFX90A-NEXT: ;;#ASMSTART 7198; GFX90A-NEXT: ; def s[4:5] 7199; GFX90A-NEXT: ;;#ASMEND 7200; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7201; GFX90A-NEXT: ;;#ASMSTART 7202; GFX90A-NEXT: ; use s[8:9] 7203; GFX90A-NEXT: ;;#ASMEND 7204; GFX90A-NEXT: s_setpc_b64 s[30:31] 7205; 7206; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_2: 7207; GFX940: ; %bb.0: 7208; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7209; GFX940-NEXT: ;;#ASMSTART 7210; GFX940-NEXT: ; def s[8:9] 7211; GFX940-NEXT: ;;#ASMEND 7212; GFX940-NEXT: ;;#ASMSTART 7213; GFX940-NEXT: ; def s[0:1] 7214; GFX940-NEXT: ;;#ASMEND 7215; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 7216; GFX940-NEXT: ;;#ASMSTART 7217; GFX940-NEXT: ; use s[8:9] 7218; GFX940-NEXT: ;;#ASMEND 7219; GFX940-NEXT: s_setpc_b64 s[30:31] 7220 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7221 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7222 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7223 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7224 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 2> 7225 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7226 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7227 ret void 7228} 7229 7230define void @s_shuffle_v3i16_v3i16__5_4_2() { 7231; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_2: 7232; GFX900: ; %bb.0: 7233; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7234; GFX900-NEXT: ;;#ASMSTART 7235; GFX900-NEXT: ; def s[8:9] 7236; GFX900-NEXT: ;;#ASMEND 7237; GFX900-NEXT: ;;#ASMSTART 7238; GFX900-NEXT: ; def s[4:5] 7239; GFX900-NEXT: ;;#ASMEND 7240; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 7241; GFX900-NEXT: ;;#ASMSTART 7242; GFX900-NEXT: ; use s[8:9] 7243; GFX900-NEXT: ;;#ASMEND 7244; GFX900-NEXT: s_setpc_b64 s[30:31] 7245; 7246; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_2: 7247; GFX90A: ; %bb.0: 7248; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7249; GFX90A-NEXT: ;;#ASMSTART 7250; GFX90A-NEXT: ; def s[8:9] 7251; GFX90A-NEXT: ;;#ASMEND 7252; GFX90A-NEXT: ;;#ASMSTART 7253; GFX90A-NEXT: ; def s[4:5] 7254; GFX90A-NEXT: ;;#ASMEND 7255; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 7256; GFX90A-NEXT: ;;#ASMSTART 7257; GFX90A-NEXT: ; use s[8:9] 7258; GFX90A-NEXT: ;;#ASMEND 7259; GFX90A-NEXT: s_setpc_b64 s[30:31] 7260; 7261; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_2: 7262; GFX940: ; %bb.0: 7263; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7264; GFX940-NEXT: ;;#ASMSTART 7265; GFX940-NEXT: ; def s[8:9] 7266; GFX940-NEXT: ;;#ASMEND 7267; GFX940-NEXT: ;;#ASMSTART 7268; GFX940-NEXT: ; def s[0:1] 7269; GFX940-NEXT: ;;#ASMEND 7270; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 7271; GFX940-NEXT: ;;#ASMSTART 7272; GFX940-NEXT: ; use s[8:9] 7273; GFX940-NEXT: ;;#ASMEND 7274; GFX940-NEXT: s_setpc_b64 s[30:31] 7275 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7276 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7277 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7278 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7279 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 2> 7280 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7281 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7282 ret void 7283} 7284 7285define void @s_shuffle_v3i16_v3i16__u_3_3() { 7286; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_3_3: 7287; GFX9: ; %bb.0: 7288; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7289; GFX9-NEXT: ;;#ASMSTART 7290; GFX9-NEXT: ; use s[8:9] 7291; GFX9-NEXT: ;;#ASMEND 7292; GFX9-NEXT: s_setpc_b64 s[30:31] 7293 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7294 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7295 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 3, i32 3> 7296 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7297 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7298 ret void 7299} 7300 7301define void @s_shuffle_v3i16_v3i16__0_3_3() { 7302; GFX900-LABEL: s_shuffle_v3i16_v3i16__0_3_3: 7303; GFX900: ; %bb.0: 7304; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7305; GFX900-NEXT: ;;#ASMSTART 7306; GFX900-NEXT: ; def s[8:9] 7307; GFX900-NEXT: ;;#ASMEND 7308; GFX900-NEXT: ;;#ASMSTART 7309; GFX900-NEXT: ; use s[8:9] 7310; GFX900-NEXT: ;;#ASMEND 7311; GFX900-NEXT: s_setpc_b64 s[30:31] 7312; 7313; GFX90A-LABEL: s_shuffle_v3i16_v3i16__0_3_3: 7314; GFX90A: ; %bb.0: 7315; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7316; GFX90A-NEXT: ;;#ASMSTART 7317; GFX90A-NEXT: ; def s[8:9] 7318; GFX90A-NEXT: ;;#ASMEND 7319; GFX90A-NEXT: ;;#ASMSTART 7320; GFX90A-NEXT: ; use s[8:9] 7321; GFX90A-NEXT: ;;#ASMEND 7322; GFX90A-NEXT: s_setpc_b64 s[30:31] 7323; 7324; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_3_3: 7325; GFX940: ; %bb.0: 7326; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7327; GFX940-NEXT: ;;#ASMSTART 7328; GFX940-NEXT: ; def s[8:9] 7329; GFX940-NEXT: ;;#ASMEND 7330; GFX940-NEXT: s_nop 0 7331; GFX940-NEXT: ;;#ASMSTART 7332; GFX940-NEXT: ; use s[8:9] 7333; GFX940-NEXT: ;;#ASMEND 7334; GFX940-NEXT: s_setpc_b64 s[30:31] 7335 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7336 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7337 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 3, i32 3> 7338 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7339 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7340 ret void 7341} 7342 7343define void @s_shuffle_v3i16_v3i16__1_3_3() { 7344; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_3_3: 7345; GFX900: ; %bb.0: 7346; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7347; GFX900-NEXT: ;;#ASMSTART 7348; GFX900-NEXT: ; def s[4:5] 7349; GFX900-NEXT: ;;#ASMEND 7350; GFX900-NEXT: s_lshr_b32 s8, s4, 16 7351; GFX900-NEXT: ;;#ASMSTART 7352; GFX900-NEXT: ; use s[8:9] 7353; GFX900-NEXT: ;;#ASMEND 7354; GFX900-NEXT: s_setpc_b64 s[30:31] 7355; 7356; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_3_3: 7357; GFX90A: ; %bb.0: 7358; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7359; GFX90A-NEXT: ;;#ASMSTART 7360; GFX90A-NEXT: ; def s[4:5] 7361; GFX90A-NEXT: ;;#ASMEND 7362; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 7363; GFX90A-NEXT: ;;#ASMSTART 7364; GFX90A-NEXT: ; use s[8:9] 7365; GFX90A-NEXT: ;;#ASMEND 7366; GFX90A-NEXT: s_setpc_b64 s[30:31] 7367; 7368; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_3_3: 7369; GFX940: ; %bb.0: 7370; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7371; GFX940-NEXT: ;;#ASMSTART 7372; GFX940-NEXT: ; def s[0:1] 7373; GFX940-NEXT: ;;#ASMEND 7374; GFX940-NEXT: s_lshr_b32 s8, s0, 16 7375; GFX940-NEXT: ;;#ASMSTART 7376; GFX940-NEXT: ; use s[8:9] 7377; GFX940-NEXT: ;;#ASMEND 7378; GFX940-NEXT: s_setpc_b64 s[30:31] 7379 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7380 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7381 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 3, i32 3> 7382 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7383 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7384 ret void 7385} 7386 7387define void @s_shuffle_v3i16_v3i16__2_3_3() { 7388; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_3_3: 7389; GFX900: ; %bb.0: 7390; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7391; GFX900-NEXT: ;;#ASMSTART 7392; GFX900-NEXT: ; def s[4:5] 7393; GFX900-NEXT: ;;#ASMEND 7394; GFX900-NEXT: s_mov_b32 s8, s5 7395; GFX900-NEXT: ;;#ASMSTART 7396; GFX900-NEXT: ; use s[8:9] 7397; GFX900-NEXT: ;;#ASMEND 7398; GFX900-NEXT: s_setpc_b64 s[30:31] 7399; 7400; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_3_3: 7401; GFX90A: ; %bb.0: 7402; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7403; GFX90A-NEXT: ;;#ASMSTART 7404; GFX90A-NEXT: ; def s[4:5] 7405; GFX90A-NEXT: ;;#ASMEND 7406; GFX90A-NEXT: s_mov_b32 s8, s5 7407; GFX90A-NEXT: ;;#ASMSTART 7408; GFX90A-NEXT: ; use s[8:9] 7409; GFX90A-NEXT: ;;#ASMEND 7410; GFX90A-NEXT: s_setpc_b64 s[30:31] 7411; 7412; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_3_3: 7413; GFX940: ; %bb.0: 7414; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7415; GFX940-NEXT: ;;#ASMSTART 7416; GFX940-NEXT: ; def s[0:1] 7417; GFX940-NEXT: ;;#ASMEND 7418; GFX940-NEXT: s_mov_b32 s8, s1 7419; GFX940-NEXT: ;;#ASMSTART 7420; GFX940-NEXT: ; use s[8:9] 7421; GFX940-NEXT: ;;#ASMEND 7422; GFX940-NEXT: s_setpc_b64 s[30:31] 7423 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7424 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7425 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 3, i32 3> 7426 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7427 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7428 ret void 7429} 7430 7431define void @s_shuffle_v3i16_v3i16__3_3_3() { 7432; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_3_3: 7433; GFX9: ; %bb.0: 7434; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7435; GFX9-NEXT: ;;#ASMSTART 7436; GFX9-NEXT: ; use s[8:9] 7437; GFX9-NEXT: ;;#ASMEND 7438; GFX9-NEXT: s_setpc_b64 s[30:31] 7439 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7440 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7441 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 3, i32 3> 7442 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7443 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7444 ret void 7445} 7446 7447define void @s_shuffle_v3i16_v3i16__4_3_3() { 7448; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_3_3: 7449; GFX900: ; %bb.0: 7450; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7451; GFX900-NEXT: ;;#ASMSTART 7452; GFX900-NEXT: ; def s[4:5] 7453; GFX900-NEXT: ;;#ASMEND 7454; GFX900-NEXT: s_lshr_b32 s5, s4, 16 7455; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7456; GFX900-NEXT: s_mov_b32 s9, s4 7457; GFX900-NEXT: ;;#ASMSTART 7458; GFX900-NEXT: ; use s[8:9] 7459; GFX900-NEXT: ;;#ASMEND 7460; GFX900-NEXT: s_setpc_b64 s[30:31] 7461; 7462; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_3_3: 7463; GFX90A: ; %bb.0: 7464; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7465; GFX90A-NEXT: ;;#ASMSTART 7466; GFX90A-NEXT: ; def s[4:5] 7467; GFX90A-NEXT: ;;#ASMEND 7468; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 7469; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7470; GFX90A-NEXT: s_mov_b32 s9, s4 7471; GFX90A-NEXT: ;;#ASMSTART 7472; GFX90A-NEXT: ; use s[8:9] 7473; GFX90A-NEXT: ;;#ASMEND 7474; GFX90A-NEXT: s_setpc_b64 s[30:31] 7475; 7476; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_3_3: 7477; GFX940: ; %bb.0: 7478; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7479; GFX940-NEXT: ;;#ASMSTART 7480; GFX940-NEXT: ; def s[0:1] 7481; GFX940-NEXT: ;;#ASMEND 7482; GFX940-NEXT: s_lshr_b32 s1, s0, 16 7483; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 7484; GFX940-NEXT: s_mov_b32 s9, s0 7485; GFX940-NEXT: ;;#ASMSTART 7486; GFX940-NEXT: ; use s[8:9] 7487; GFX940-NEXT: ;;#ASMEND 7488; GFX940-NEXT: s_setpc_b64 s[30:31] 7489 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7490 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7491 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7492 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7493 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 3, i32 3> 7494 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7495 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7496 ret void 7497} 7498 7499define void @s_shuffle_v3i16_v3i16__5_3_3() { 7500; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_3: 7501; GFX900: ; %bb.0: 7502; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7503; GFX900-NEXT: ;;#ASMSTART 7504; GFX900-NEXT: ; def s[4:5] 7505; GFX900-NEXT: ;;#ASMEND 7506; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7507; GFX900-NEXT: s_mov_b32 s9, s4 7508; GFX900-NEXT: ;;#ASMSTART 7509; GFX900-NEXT: ; use s[8:9] 7510; GFX900-NEXT: ;;#ASMEND 7511; GFX900-NEXT: s_setpc_b64 s[30:31] 7512; 7513; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_3: 7514; GFX90A: ; %bb.0: 7515; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7516; GFX90A-NEXT: ;;#ASMSTART 7517; GFX90A-NEXT: ; def s[4:5] 7518; GFX90A-NEXT: ;;#ASMEND 7519; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7520; GFX90A-NEXT: s_mov_b32 s9, s4 7521; GFX90A-NEXT: ;;#ASMSTART 7522; GFX90A-NEXT: ; use s[8:9] 7523; GFX90A-NEXT: ;;#ASMEND 7524; GFX90A-NEXT: s_setpc_b64 s[30:31] 7525; 7526; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_3: 7527; GFX940: ; %bb.0: 7528; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7529; GFX940-NEXT: ;;#ASMSTART 7530; GFX940-NEXT: ; def s[0:1] 7531; GFX940-NEXT: ;;#ASMEND 7532; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 7533; GFX940-NEXT: s_mov_b32 s9, s0 7534; GFX940-NEXT: ;;#ASMSTART 7535; GFX940-NEXT: ; use s[8:9] 7536; GFX940-NEXT: ;;#ASMEND 7537; GFX940-NEXT: s_setpc_b64 s[30:31] 7538 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7539 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7540 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7541 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7542 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 3> 7543 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7544 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7545 ret void 7546} 7547 7548define void @s_shuffle_v3i16_v3i16__5_u_3() { 7549; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_3: 7550; GFX900: ; %bb.0: 7551; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7552; GFX900-NEXT: ;;#ASMSTART 7553; GFX900-NEXT: ; def s[4:5] 7554; GFX900-NEXT: ;;#ASMEND 7555; GFX900-NEXT: s_mov_b32 s8, s5 7556; GFX900-NEXT: s_mov_b32 s9, s4 7557; GFX900-NEXT: ;;#ASMSTART 7558; GFX900-NEXT: ; use s[8:9] 7559; GFX900-NEXT: ;;#ASMEND 7560; GFX900-NEXT: s_setpc_b64 s[30:31] 7561; 7562; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_3: 7563; GFX90A: ; %bb.0: 7564; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7565; GFX90A-NEXT: ;;#ASMSTART 7566; GFX90A-NEXT: ; def s[4:5] 7567; GFX90A-NEXT: ;;#ASMEND 7568; GFX90A-NEXT: s_mov_b32 s8, s5 7569; GFX90A-NEXT: s_mov_b32 s9, s4 7570; GFX90A-NEXT: ;;#ASMSTART 7571; GFX90A-NEXT: ; use s[8:9] 7572; GFX90A-NEXT: ;;#ASMEND 7573; GFX90A-NEXT: s_setpc_b64 s[30:31] 7574; 7575; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_3: 7576; GFX940: ; %bb.0: 7577; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7578; GFX940-NEXT: ;;#ASMSTART 7579; GFX940-NEXT: ; def s[0:1] 7580; GFX940-NEXT: ;;#ASMEND 7581; GFX940-NEXT: s_mov_b32 s8, s1 7582; GFX940-NEXT: s_mov_b32 s9, s0 7583; GFX940-NEXT: ;;#ASMSTART 7584; GFX940-NEXT: ; use s[8:9] 7585; GFX940-NEXT: ;;#ASMEND 7586; GFX940-NEXT: s_setpc_b64 s[30:31] 7587 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7588 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7589 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7590 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7591 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 3> 7592 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7593 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7594 ret void 7595} 7596 7597define void @s_shuffle_v3i16_v3i16__5_0_3() { 7598; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_3: 7599; GFX900: ; %bb.0: 7600; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7601; GFX900-NEXT: ;;#ASMSTART 7602; GFX900-NEXT: ; def s[4:5] 7603; GFX900-NEXT: ;;#ASMEND 7604; GFX900-NEXT: ;;#ASMSTART 7605; GFX900-NEXT: ; def s[6:7] 7606; GFX900-NEXT: ;;#ASMEND 7607; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 7608; GFX900-NEXT: s_mov_b32 s9, s6 7609; GFX900-NEXT: ;;#ASMSTART 7610; GFX900-NEXT: ; use s[8:9] 7611; GFX900-NEXT: ;;#ASMEND 7612; GFX900-NEXT: s_setpc_b64 s[30:31] 7613; 7614; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_3: 7615; GFX90A: ; %bb.0: 7616; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7617; GFX90A-NEXT: ;;#ASMSTART 7618; GFX90A-NEXT: ; def s[4:5] 7619; GFX90A-NEXT: ;;#ASMEND 7620; GFX90A-NEXT: ;;#ASMSTART 7621; GFX90A-NEXT: ; def s[6:7] 7622; GFX90A-NEXT: ;;#ASMEND 7623; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 7624; GFX90A-NEXT: s_mov_b32 s9, s6 7625; GFX90A-NEXT: ;;#ASMSTART 7626; GFX90A-NEXT: ; use s[8:9] 7627; GFX90A-NEXT: ;;#ASMEND 7628; GFX90A-NEXT: s_setpc_b64 s[30:31] 7629; 7630; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_3: 7631; GFX940: ; %bb.0: 7632; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7633; GFX940-NEXT: ;;#ASMSTART 7634; GFX940-NEXT: ; def s[0:1] 7635; GFX940-NEXT: ;;#ASMEND 7636; GFX940-NEXT: ;;#ASMSTART 7637; GFX940-NEXT: ; def s[2:3] 7638; GFX940-NEXT: ;;#ASMEND 7639; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 7640; GFX940-NEXT: s_mov_b32 s9, s2 7641; GFX940-NEXT: ;;#ASMSTART 7642; GFX940-NEXT: ; use s[8:9] 7643; GFX940-NEXT: ;;#ASMEND 7644; GFX940-NEXT: s_setpc_b64 s[30:31] 7645 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7646 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7647 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7648 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7649 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 3> 7650 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7651 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7652 ret void 7653} 7654 7655define void @s_shuffle_v3i16_v3i16__5_1_3() { 7656; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_3: 7657; GFX900: ; %bb.0: 7658; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7659; GFX900-NEXT: ;;#ASMSTART 7660; GFX900-NEXT: ; def s[4:5] 7661; GFX900-NEXT: ;;#ASMEND 7662; GFX900-NEXT: ;;#ASMSTART 7663; GFX900-NEXT: ; def s[6:7] 7664; GFX900-NEXT: ;;#ASMEND 7665; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 7666; GFX900-NEXT: s_mov_b32 s9, s6 7667; GFX900-NEXT: ;;#ASMSTART 7668; GFX900-NEXT: ; use s[8:9] 7669; GFX900-NEXT: ;;#ASMEND 7670; GFX900-NEXT: s_setpc_b64 s[30:31] 7671; 7672; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_3: 7673; GFX90A: ; %bb.0: 7674; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7675; GFX90A-NEXT: ;;#ASMSTART 7676; GFX90A-NEXT: ; def s[4:5] 7677; GFX90A-NEXT: ;;#ASMEND 7678; GFX90A-NEXT: ;;#ASMSTART 7679; GFX90A-NEXT: ; def s[6:7] 7680; GFX90A-NEXT: ;;#ASMEND 7681; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 7682; GFX90A-NEXT: s_mov_b32 s9, s6 7683; GFX90A-NEXT: ;;#ASMSTART 7684; GFX90A-NEXT: ; use s[8:9] 7685; GFX90A-NEXT: ;;#ASMEND 7686; GFX90A-NEXT: s_setpc_b64 s[30:31] 7687; 7688; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_3: 7689; GFX940: ; %bb.0: 7690; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7691; GFX940-NEXT: ;;#ASMSTART 7692; GFX940-NEXT: ; def s[0:1] 7693; GFX940-NEXT: ;;#ASMEND 7694; GFX940-NEXT: ;;#ASMSTART 7695; GFX940-NEXT: ; def s[2:3] 7696; GFX940-NEXT: ;;#ASMEND 7697; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 7698; GFX940-NEXT: s_mov_b32 s9, s2 7699; GFX940-NEXT: ;;#ASMSTART 7700; GFX940-NEXT: ; use s[8:9] 7701; GFX940-NEXT: ;;#ASMEND 7702; GFX940-NEXT: s_setpc_b64 s[30:31] 7703 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7704 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7705 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7706 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7707 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 3> 7708 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7709 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7710 ret void 7711} 7712 7713define void @s_shuffle_v3i16_v3i16__5_2_3() { 7714; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_3: 7715; GFX900: ; %bb.0: 7716; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7717; GFX900-NEXT: ;;#ASMSTART 7718; GFX900-NEXT: ; def s[4:5] 7719; GFX900-NEXT: ;;#ASMEND 7720; GFX900-NEXT: ;;#ASMSTART 7721; GFX900-NEXT: ; def s[6:7] 7722; GFX900-NEXT: ;;#ASMEND 7723; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 7724; GFX900-NEXT: s_mov_b32 s9, s6 7725; GFX900-NEXT: ;;#ASMSTART 7726; GFX900-NEXT: ; use s[8:9] 7727; GFX900-NEXT: ;;#ASMEND 7728; GFX900-NEXT: s_setpc_b64 s[30:31] 7729; 7730; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_3: 7731; GFX90A: ; %bb.0: 7732; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7733; GFX90A-NEXT: ;;#ASMSTART 7734; GFX90A-NEXT: ; def s[4:5] 7735; GFX90A-NEXT: ;;#ASMEND 7736; GFX90A-NEXT: ;;#ASMSTART 7737; GFX90A-NEXT: ; def s[6:7] 7738; GFX90A-NEXT: ;;#ASMEND 7739; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 7740; GFX90A-NEXT: s_mov_b32 s9, s6 7741; GFX90A-NEXT: ;;#ASMSTART 7742; GFX90A-NEXT: ; use s[8:9] 7743; GFX90A-NEXT: ;;#ASMEND 7744; GFX90A-NEXT: s_setpc_b64 s[30:31] 7745; 7746; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_3: 7747; GFX940: ; %bb.0: 7748; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7749; GFX940-NEXT: ;;#ASMSTART 7750; GFX940-NEXT: ; def s[0:1] 7751; GFX940-NEXT: ;;#ASMEND 7752; GFX940-NEXT: ;;#ASMSTART 7753; GFX940-NEXT: ; def s[2:3] 7754; GFX940-NEXT: ;;#ASMEND 7755; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 7756; GFX940-NEXT: s_mov_b32 s9, s2 7757; GFX940-NEXT: ;;#ASMSTART 7758; GFX940-NEXT: ; use s[8:9] 7759; GFX940-NEXT: ;;#ASMEND 7760; GFX940-NEXT: s_setpc_b64 s[30:31] 7761 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7762 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7763 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7764 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7765 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 3> 7766 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7767 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7768 ret void 7769} 7770 7771define void @s_shuffle_v3i16_v3i16__5_4_3() { 7772; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_3: 7773; GFX900: ; %bb.0: 7774; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7775; GFX900-NEXT: ;;#ASMSTART 7776; GFX900-NEXT: ; def s[4:5] 7777; GFX900-NEXT: ;;#ASMEND 7778; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 7779; GFX900-NEXT: s_mov_b32 s9, s4 7780; GFX900-NEXT: ;;#ASMSTART 7781; GFX900-NEXT: ; use s[8:9] 7782; GFX900-NEXT: ;;#ASMEND 7783; GFX900-NEXT: s_setpc_b64 s[30:31] 7784; 7785; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_3: 7786; GFX90A: ; %bb.0: 7787; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7788; GFX90A-NEXT: ;;#ASMSTART 7789; GFX90A-NEXT: ; def s[4:5] 7790; GFX90A-NEXT: ;;#ASMEND 7791; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 7792; GFX90A-NEXT: s_mov_b32 s9, s4 7793; GFX90A-NEXT: ;;#ASMSTART 7794; GFX90A-NEXT: ; use s[8:9] 7795; GFX90A-NEXT: ;;#ASMEND 7796; GFX90A-NEXT: s_setpc_b64 s[30:31] 7797; 7798; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_3: 7799; GFX940: ; %bb.0: 7800; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7801; GFX940-NEXT: ;;#ASMSTART 7802; GFX940-NEXT: ; def s[0:1] 7803; GFX940-NEXT: ;;#ASMEND 7804; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 7805; GFX940-NEXT: s_mov_b32 s9, s0 7806; GFX940-NEXT: ;;#ASMSTART 7807; GFX940-NEXT: ; use s[8:9] 7808; GFX940-NEXT: ;;#ASMEND 7809; GFX940-NEXT: s_setpc_b64 s[30:31] 7810 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7811 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7812 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7813 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7814 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 3> 7815 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7816 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7817 ret void 7818} 7819 7820define void @s_shuffle_v3i16_v3i16__u_4_4() { 7821; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_4_4: 7822; GFX9: ; %bb.0: 7823; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7824; GFX9-NEXT: ;;#ASMSTART 7825; GFX9-NEXT: ; def s[8:9] 7826; GFX9-NEXT: ;;#ASMEND 7827; GFX9-NEXT: s_lshr_b32 s9, s8, 16 7828; GFX9-NEXT: ;;#ASMSTART 7829; GFX9-NEXT: ; use s[8:9] 7830; GFX9-NEXT: ;;#ASMEND 7831; GFX9-NEXT: s_setpc_b64 s[30:31] 7832 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7833 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7834 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7835 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7836 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 poison, i32 4, i32 4> 7837 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7838 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7839 ret void 7840} 7841 7842define void @s_shuffle_v3i16_v3i16__0_4_4() { 7843; GFX900-LABEL: s_shuffle_v3i16_v3i16__0_4_4: 7844; GFX900: ; %bb.0: 7845; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7846; GFX900-NEXT: ;;#ASMSTART 7847; GFX900-NEXT: ; def s[4:5] 7848; GFX900-NEXT: ;;#ASMEND 7849; GFX900-NEXT: ;;#ASMSTART 7850; GFX900-NEXT: ; def s[6:7] 7851; GFX900-NEXT: ;;#ASMEND 7852; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s6 7853; GFX900-NEXT: s_lshr_b32 s9, s6, 16 7854; GFX900-NEXT: ;;#ASMSTART 7855; GFX900-NEXT: ; use s[8:9] 7856; GFX900-NEXT: ;;#ASMEND 7857; GFX900-NEXT: s_setpc_b64 s[30:31] 7858; 7859; GFX90A-LABEL: s_shuffle_v3i16_v3i16__0_4_4: 7860; GFX90A: ; %bb.0: 7861; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7862; GFX90A-NEXT: ;;#ASMSTART 7863; GFX90A-NEXT: ; def s[4:5] 7864; GFX90A-NEXT: ;;#ASMEND 7865; GFX90A-NEXT: ;;#ASMSTART 7866; GFX90A-NEXT: ; def s[6:7] 7867; GFX90A-NEXT: ;;#ASMEND 7868; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s6 7869; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 7870; GFX90A-NEXT: ;;#ASMSTART 7871; GFX90A-NEXT: ; use s[8:9] 7872; GFX90A-NEXT: ;;#ASMEND 7873; GFX90A-NEXT: s_setpc_b64 s[30:31] 7874; 7875; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_4_4: 7876; GFX940: ; %bb.0: 7877; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7878; GFX940-NEXT: ;;#ASMSTART 7879; GFX940-NEXT: ; def s[0:1] 7880; GFX940-NEXT: ;;#ASMEND 7881; GFX940-NEXT: ;;#ASMSTART 7882; GFX940-NEXT: ; def s[2:3] 7883; GFX940-NEXT: ;;#ASMEND 7884; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 7885; GFX940-NEXT: s_lshr_b32 s9, s2, 16 7886; GFX940-NEXT: ;;#ASMSTART 7887; GFX940-NEXT: ; use s[8:9] 7888; GFX940-NEXT: ;;#ASMEND 7889; GFX940-NEXT: s_setpc_b64 s[30:31] 7890 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7891 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7892 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7893 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7894 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 0, i32 4, i32 4> 7895 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7896 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7897 ret void 7898} 7899 7900define void @s_shuffle_v3i16_v3i16__1_4_4() { 7901; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_4_4: 7902; GFX900: ; %bb.0: 7903; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7904; GFX900-NEXT: ;;#ASMSTART 7905; GFX900-NEXT: ; def s[4:5] 7906; GFX900-NEXT: ;;#ASMEND 7907; GFX900-NEXT: ;;#ASMSTART 7908; GFX900-NEXT: ; def s[6:7] 7909; GFX900-NEXT: ;;#ASMEND 7910; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s6 7911; GFX900-NEXT: s_lshr_b32 s9, s6, 16 7912; GFX900-NEXT: ;;#ASMSTART 7913; GFX900-NEXT: ; use s[8:9] 7914; GFX900-NEXT: ;;#ASMEND 7915; GFX900-NEXT: s_setpc_b64 s[30:31] 7916; 7917; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_4_4: 7918; GFX90A: ; %bb.0: 7919; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7920; GFX90A-NEXT: ;;#ASMSTART 7921; GFX90A-NEXT: ; def s[4:5] 7922; GFX90A-NEXT: ;;#ASMEND 7923; GFX90A-NEXT: ;;#ASMSTART 7924; GFX90A-NEXT: ; def s[6:7] 7925; GFX90A-NEXT: ;;#ASMEND 7926; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s6 7927; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 7928; GFX90A-NEXT: ;;#ASMSTART 7929; GFX90A-NEXT: ; use s[8:9] 7930; GFX90A-NEXT: ;;#ASMEND 7931; GFX90A-NEXT: s_setpc_b64 s[30:31] 7932; 7933; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_4_4: 7934; GFX940: ; %bb.0: 7935; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7936; GFX940-NEXT: ;;#ASMSTART 7937; GFX940-NEXT: ; def s[0:1] 7938; GFX940-NEXT: ;;#ASMEND 7939; GFX940-NEXT: ;;#ASMSTART 7940; GFX940-NEXT: ; def s[2:3] 7941; GFX940-NEXT: ;;#ASMEND 7942; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 7943; GFX940-NEXT: s_lshr_b32 s9, s2, 16 7944; GFX940-NEXT: ;;#ASMSTART 7945; GFX940-NEXT: ; use s[8:9] 7946; GFX940-NEXT: ;;#ASMEND 7947; GFX940-NEXT: s_setpc_b64 s[30:31] 7948 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7949 %vec1 = call <4 x i16> asm "; def $0", "=s"() 7950 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7951 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 7952 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 1, i32 4, i32 4> 7953 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7954 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7955 ret void 7956} 7957 7958define void @s_shuffle_v3i16_v3i16__2_4_4() { 7959; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_4_4: 7960; GFX900: ; %bb.0: 7961; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7962; GFX900-NEXT: ;;#ASMSTART 7963; GFX900-NEXT: ; def s[4:5] 7964; GFX900-NEXT: ;;#ASMEND 7965; GFX900-NEXT: ;;#ASMSTART 7966; GFX900-NEXT: ; def s[6:7] 7967; GFX900-NEXT: ;;#ASMEND 7968; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s6 7969; GFX900-NEXT: s_lshr_b32 s9, s6, 16 7970; GFX900-NEXT: ;;#ASMSTART 7971; GFX900-NEXT: ; use s[8:9] 7972; GFX900-NEXT: ;;#ASMEND 7973; GFX900-NEXT: s_setpc_b64 s[30:31] 7974; 7975; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_4_4: 7976; GFX90A: ; %bb.0: 7977; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7978; GFX90A-NEXT: ;;#ASMSTART 7979; GFX90A-NEXT: ; def s[4:5] 7980; GFX90A-NEXT: ;;#ASMEND 7981; GFX90A-NEXT: ;;#ASMSTART 7982; GFX90A-NEXT: ; def s[6:7] 7983; GFX90A-NEXT: ;;#ASMEND 7984; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s6 7985; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 7986; GFX90A-NEXT: ;;#ASMSTART 7987; GFX90A-NEXT: ; use s[8:9] 7988; GFX90A-NEXT: ;;#ASMEND 7989; GFX90A-NEXT: s_setpc_b64 s[30:31] 7990; 7991; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_4_4: 7992; GFX940: ; %bb.0: 7993; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7994; GFX940-NEXT: ;;#ASMSTART 7995; GFX940-NEXT: ; def s[0:1] 7996; GFX940-NEXT: ;;#ASMEND 7997; GFX940-NEXT: ;;#ASMSTART 7998; GFX940-NEXT: ; def s[2:3] 7999; GFX940-NEXT: ;;#ASMEND 8000; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 8001; GFX940-NEXT: s_lshr_b32 s9, s2, 16 8002; GFX940-NEXT: ;;#ASMSTART 8003; GFX940-NEXT: ; use s[8:9] 8004; GFX940-NEXT: ;;#ASMEND 8005; GFX940-NEXT: s_setpc_b64 s[30:31] 8006 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8007 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8008 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8009 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8010 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 2, i32 4, i32 4> 8011 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8012 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8013 ret void 8014} 8015 8016define void @s_shuffle_v3i16_v3i16__3_4_4() { 8017; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_4_4: 8018; GFX9: ; %bb.0: 8019; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8020; GFX9-NEXT: ;;#ASMSTART 8021; GFX9-NEXT: ; def s[8:9] 8022; GFX9-NEXT: ;;#ASMEND 8023; GFX9-NEXT: s_lshr_b32 s9, s8, 16 8024; GFX9-NEXT: ;;#ASMSTART 8025; GFX9-NEXT: ; use s[8:9] 8026; GFX9-NEXT: ;;#ASMEND 8027; GFX9-NEXT: s_setpc_b64 s[30:31] 8028 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8029 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8030 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8031 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8032 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 3, i32 4, i32 4> 8033 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8034 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8035 ret void 8036} 8037 8038define void @s_shuffle_v3i16_v3i16__4_4_4() { 8039; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_4_4: 8040; GFX900: ; %bb.0: 8041; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8042; GFX900-NEXT: ;;#ASMSTART 8043; GFX900-NEXT: ; def s[4:5] 8044; GFX900-NEXT: ;;#ASMEND 8045; GFX900-NEXT: s_lshr_b32 s9, s4, 16 8046; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4 8047; GFX900-NEXT: ;;#ASMSTART 8048; GFX900-NEXT: ; use s[8:9] 8049; GFX900-NEXT: ;;#ASMEND 8050; GFX900-NEXT: s_setpc_b64 s[30:31] 8051; 8052; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_4_4: 8053; GFX90A: ; %bb.0: 8054; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8055; GFX90A-NEXT: ;;#ASMSTART 8056; GFX90A-NEXT: ; def s[4:5] 8057; GFX90A-NEXT: ;;#ASMEND 8058; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 8059; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4 8060; GFX90A-NEXT: ;;#ASMSTART 8061; GFX90A-NEXT: ; use s[8:9] 8062; GFX90A-NEXT: ;;#ASMEND 8063; GFX90A-NEXT: s_setpc_b64 s[30:31] 8064; 8065; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_4_4: 8066; GFX940: ; %bb.0: 8067; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8068; GFX940-NEXT: ;;#ASMSTART 8069; GFX940-NEXT: ; def s[0:1] 8070; GFX940-NEXT: ;;#ASMEND 8071; GFX940-NEXT: s_lshr_b32 s9, s0, 16 8072; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 8073; GFX940-NEXT: ;;#ASMSTART 8074; GFX940-NEXT: ; use s[8:9] 8075; GFX940-NEXT: ;;#ASMEND 8076; GFX940-NEXT: s_setpc_b64 s[30:31] 8077 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8078 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8079 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8080 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8081 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 4, i32 4> 8082 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8083 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8084 ret void 8085} 8086 8087define void @s_shuffle_v3i16_v3i16__5_4_4() { 8088; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_4: 8089; GFX900: ; %bb.0: 8090; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8091; GFX900-NEXT: ;;#ASMSTART 8092; GFX900-NEXT: ; def s[4:5] 8093; GFX900-NEXT: ;;#ASMEND 8094; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 8095; GFX900-NEXT: s_lshr_b32 s9, s4, 16 8096; GFX900-NEXT: ;;#ASMSTART 8097; GFX900-NEXT: ; use s[8:9] 8098; GFX900-NEXT: ;;#ASMEND 8099; GFX900-NEXT: s_setpc_b64 s[30:31] 8100; 8101; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_4: 8102; GFX90A: ; %bb.0: 8103; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8104; GFX90A-NEXT: ;;#ASMSTART 8105; GFX90A-NEXT: ; def s[4:5] 8106; GFX90A-NEXT: ;;#ASMEND 8107; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 8108; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 8109; GFX90A-NEXT: ;;#ASMSTART 8110; GFX90A-NEXT: ; use s[8:9] 8111; GFX90A-NEXT: ;;#ASMEND 8112; GFX90A-NEXT: s_setpc_b64 s[30:31] 8113; 8114; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_4: 8115; GFX940: ; %bb.0: 8116; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8117; GFX940-NEXT: ;;#ASMSTART 8118; GFX940-NEXT: ; def s[0:1] 8119; GFX940-NEXT: ;;#ASMEND 8120; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 8121; GFX940-NEXT: s_lshr_b32 s9, s0, 16 8122; GFX940-NEXT: ;;#ASMSTART 8123; GFX940-NEXT: ; use s[8:9] 8124; GFX940-NEXT: ;;#ASMEND 8125; GFX940-NEXT: s_setpc_b64 s[30:31] 8126 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8127 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8128 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8129 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8130 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 4> 8131 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8132 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8133 ret void 8134} 8135 8136define void @s_shuffle_v3i16_v3i16__5_u_4() { 8137; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_4: 8138; GFX900: ; %bb.0: 8139; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8140; GFX900-NEXT: ;;#ASMSTART 8141; GFX900-NEXT: ; def s[4:5] 8142; GFX900-NEXT: ;;#ASMEND 8143; GFX900-NEXT: s_lshr_b32 s9, s4, 16 8144; GFX900-NEXT: s_mov_b32 s8, s5 8145; GFX900-NEXT: ;;#ASMSTART 8146; GFX900-NEXT: ; use s[8:9] 8147; GFX900-NEXT: ;;#ASMEND 8148; GFX900-NEXT: s_setpc_b64 s[30:31] 8149; 8150; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_4: 8151; GFX90A: ; %bb.0: 8152; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8153; GFX90A-NEXT: ;;#ASMSTART 8154; GFX90A-NEXT: ; def s[4:5] 8155; GFX90A-NEXT: ;;#ASMEND 8156; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 8157; GFX90A-NEXT: s_mov_b32 s8, s5 8158; GFX90A-NEXT: ;;#ASMSTART 8159; GFX90A-NEXT: ; use s[8:9] 8160; GFX90A-NEXT: ;;#ASMEND 8161; GFX90A-NEXT: s_setpc_b64 s[30:31] 8162; 8163; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_4: 8164; GFX940: ; %bb.0: 8165; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8166; GFX940-NEXT: ;;#ASMSTART 8167; GFX940-NEXT: ; def s[0:1] 8168; GFX940-NEXT: ;;#ASMEND 8169; GFX940-NEXT: s_lshr_b32 s9, s0, 16 8170; GFX940-NEXT: s_mov_b32 s8, s1 8171; GFX940-NEXT: ;;#ASMSTART 8172; GFX940-NEXT: ; use s[8:9] 8173; GFX940-NEXT: ;;#ASMEND 8174; GFX940-NEXT: s_setpc_b64 s[30:31] 8175 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8176 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8177 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8178 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8179 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 4> 8180 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8181 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8182 ret void 8183} 8184 8185define void @s_shuffle_v3i16_v3i16__5_0_4() { 8186; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_4: 8187; GFX900: ; %bb.0: 8188; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8189; GFX900-NEXT: ;;#ASMSTART 8190; GFX900-NEXT: ; def s[4:5] 8191; GFX900-NEXT: ;;#ASMEND 8192; GFX900-NEXT: ;;#ASMSTART 8193; GFX900-NEXT: ; def s[6:7] 8194; GFX900-NEXT: ;;#ASMEND 8195; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 8196; GFX900-NEXT: s_lshr_b32 s9, s6, 16 8197; GFX900-NEXT: ;;#ASMSTART 8198; GFX900-NEXT: ; use s[8:9] 8199; GFX900-NEXT: ;;#ASMEND 8200; GFX900-NEXT: s_setpc_b64 s[30:31] 8201; 8202; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_4: 8203; GFX90A: ; %bb.0: 8204; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8205; GFX90A-NEXT: ;;#ASMSTART 8206; GFX90A-NEXT: ; def s[4:5] 8207; GFX90A-NEXT: ;;#ASMEND 8208; GFX90A-NEXT: ;;#ASMSTART 8209; GFX90A-NEXT: ; def s[6:7] 8210; GFX90A-NEXT: ;;#ASMEND 8211; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 8212; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 8213; GFX90A-NEXT: ;;#ASMSTART 8214; GFX90A-NEXT: ; use s[8:9] 8215; GFX90A-NEXT: ;;#ASMEND 8216; GFX90A-NEXT: s_setpc_b64 s[30:31] 8217; 8218; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_4: 8219; GFX940: ; %bb.0: 8220; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8221; GFX940-NEXT: ;;#ASMSTART 8222; GFX940-NEXT: ; def s[0:1] 8223; GFX940-NEXT: ;;#ASMEND 8224; GFX940-NEXT: ;;#ASMSTART 8225; GFX940-NEXT: ; def s[2:3] 8226; GFX940-NEXT: ;;#ASMEND 8227; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 8228; GFX940-NEXT: s_lshr_b32 s9, s2, 16 8229; GFX940-NEXT: ;;#ASMSTART 8230; GFX940-NEXT: ; use s[8:9] 8231; GFX940-NEXT: ;;#ASMEND 8232; GFX940-NEXT: s_setpc_b64 s[30:31] 8233 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8234 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8235 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8236 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8237 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 4> 8238 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8239 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8240 ret void 8241} 8242 8243define void @s_shuffle_v3i16_v3i16__5_1_4() { 8244; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_4: 8245; GFX900: ; %bb.0: 8246; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8247; GFX900-NEXT: ;;#ASMSTART 8248; GFX900-NEXT: ; def s[4:5] 8249; GFX900-NEXT: ;;#ASMEND 8250; GFX900-NEXT: ;;#ASMSTART 8251; GFX900-NEXT: ; def s[6:7] 8252; GFX900-NEXT: ;;#ASMEND 8253; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 8254; GFX900-NEXT: s_lshr_b32 s9, s6, 16 8255; GFX900-NEXT: ;;#ASMSTART 8256; GFX900-NEXT: ; use s[8:9] 8257; GFX900-NEXT: ;;#ASMEND 8258; GFX900-NEXT: s_setpc_b64 s[30:31] 8259; 8260; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_4: 8261; GFX90A: ; %bb.0: 8262; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8263; GFX90A-NEXT: ;;#ASMSTART 8264; GFX90A-NEXT: ; def s[4:5] 8265; GFX90A-NEXT: ;;#ASMEND 8266; GFX90A-NEXT: ;;#ASMSTART 8267; GFX90A-NEXT: ; def s[6:7] 8268; GFX90A-NEXT: ;;#ASMEND 8269; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 8270; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 8271; GFX90A-NEXT: ;;#ASMSTART 8272; GFX90A-NEXT: ; use s[8:9] 8273; GFX90A-NEXT: ;;#ASMEND 8274; GFX90A-NEXT: s_setpc_b64 s[30:31] 8275; 8276; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_4: 8277; GFX940: ; %bb.0: 8278; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8279; GFX940-NEXT: ;;#ASMSTART 8280; GFX940-NEXT: ; def s[0:1] 8281; GFX940-NEXT: ;;#ASMEND 8282; GFX940-NEXT: ;;#ASMSTART 8283; GFX940-NEXT: ; def s[2:3] 8284; GFX940-NEXT: ;;#ASMEND 8285; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 8286; GFX940-NEXT: s_lshr_b32 s9, s2, 16 8287; GFX940-NEXT: ;;#ASMSTART 8288; GFX940-NEXT: ; use s[8:9] 8289; GFX940-NEXT: ;;#ASMEND 8290; GFX940-NEXT: s_setpc_b64 s[30:31] 8291 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8292 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8293 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8294 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8295 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 4> 8296 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8297 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8298 ret void 8299} 8300 8301define void @s_shuffle_v3i16_v3i16__5_2_4() { 8302; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_4: 8303; GFX900: ; %bb.0: 8304; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8305; GFX900-NEXT: ;;#ASMSTART 8306; GFX900-NEXT: ; def s[4:5] 8307; GFX900-NEXT: ;;#ASMEND 8308; GFX900-NEXT: ;;#ASMSTART 8309; GFX900-NEXT: ; def s[6:7] 8310; GFX900-NEXT: ;;#ASMEND 8311; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 8312; GFX900-NEXT: s_lshr_b32 s9, s6, 16 8313; GFX900-NEXT: ;;#ASMSTART 8314; GFX900-NEXT: ; use s[8:9] 8315; GFX900-NEXT: ;;#ASMEND 8316; GFX900-NEXT: s_setpc_b64 s[30:31] 8317; 8318; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_4: 8319; GFX90A: ; %bb.0: 8320; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8321; GFX90A-NEXT: ;;#ASMSTART 8322; GFX90A-NEXT: ; def s[4:5] 8323; GFX90A-NEXT: ;;#ASMEND 8324; GFX90A-NEXT: ;;#ASMSTART 8325; GFX90A-NEXT: ; def s[6:7] 8326; GFX90A-NEXT: ;;#ASMEND 8327; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 8328; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 8329; GFX90A-NEXT: ;;#ASMSTART 8330; GFX90A-NEXT: ; use s[8:9] 8331; GFX90A-NEXT: ;;#ASMEND 8332; GFX90A-NEXT: s_setpc_b64 s[30:31] 8333; 8334; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_4: 8335; GFX940: ; %bb.0: 8336; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8337; GFX940-NEXT: ;;#ASMSTART 8338; GFX940-NEXT: ; def s[0:1] 8339; GFX940-NEXT: ;;#ASMEND 8340; GFX940-NEXT: ;;#ASMSTART 8341; GFX940-NEXT: ; def s[2:3] 8342; GFX940-NEXT: ;;#ASMEND 8343; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 8344; GFX940-NEXT: s_lshr_b32 s9, s2, 16 8345; GFX940-NEXT: ;;#ASMSTART 8346; GFX940-NEXT: ; use s[8:9] 8347; GFX940-NEXT: ;;#ASMEND 8348; GFX940-NEXT: s_setpc_b64 s[30:31] 8349 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8350 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8351 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8352 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8353 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 4> 8354 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8355 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8356 ret void 8357} 8358 8359define void @s_shuffle_v3i16_v3i16__5_3_4() { 8360; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_4: 8361; GFX900: ; %bb.0: 8362; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8363; GFX900-NEXT: ;;#ASMSTART 8364; GFX900-NEXT: ; def s[4:5] 8365; GFX900-NEXT: ;;#ASMEND 8366; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8367; GFX900-NEXT: s_lshr_b32 s9, s4, 16 8368; GFX900-NEXT: ;;#ASMSTART 8369; GFX900-NEXT: ; use s[8:9] 8370; GFX900-NEXT: ;;#ASMEND 8371; GFX900-NEXT: s_setpc_b64 s[30:31] 8372; 8373; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_4: 8374; GFX90A: ; %bb.0: 8375; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8376; GFX90A-NEXT: ;;#ASMSTART 8377; GFX90A-NEXT: ; def s[4:5] 8378; GFX90A-NEXT: ;;#ASMEND 8379; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8380; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 8381; GFX90A-NEXT: ;;#ASMSTART 8382; GFX90A-NEXT: ; use s[8:9] 8383; GFX90A-NEXT: ;;#ASMEND 8384; GFX90A-NEXT: s_setpc_b64 s[30:31] 8385; 8386; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_4: 8387; GFX940: ; %bb.0: 8388; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8389; GFX940-NEXT: ;;#ASMSTART 8390; GFX940-NEXT: ; def s[0:1] 8391; GFX940-NEXT: ;;#ASMEND 8392; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 8393; GFX940-NEXT: s_lshr_b32 s9, s0, 16 8394; GFX940-NEXT: ;;#ASMSTART 8395; GFX940-NEXT: ; use s[8:9] 8396; GFX940-NEXT: ;;#ASMEND 8397; GFX940-NEXT: s_setpc_b64 s[30:31] 8398 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8399 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8400 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8401 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8402 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 4> 8403 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8404 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8405 ret void 8406} 8407 8408define void @s_shuffle_v3i16_v3i16__u_5_5() { 8409; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_5_5: 8410; GFX9: ; %bb.0: 8411; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8412; GFX9-NEXT: ;;#ASMSTART 8413; GFX9-NEXT: ; def s[8:9] 8414; GFX9-NEXT: ;;#ASMEND 8415; GFX9-NEXT: s_lshl_b32 s8, s9, 16 8416; GFX9-NEXT: ;;#ASMSTART 8417; GFX9-NEXT: ; use s[8:9] 8418; GFX9-NEXT: ;;#ASMEND 8419; GFX9-NEXT: s_setpc_b64 s[30:31] 8420 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8421 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8422 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8423 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8424 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 poison, i32 5, i32 5> 8425 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8426 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8427 ret void 8428} 8429 8430define void @s_shuffle_v3i16_v3i16__0_5_5() { 8431; GFX900-LABEL: s_shuffle_v3i16_v3i16__0_5_5: 8432; GFX900: ; %bb.0: 8433; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8434; GFX900-NEXT: ;;#ASMSTART 8435; GFX900-NEXT: ; def s[8:9] 8436; GFX900-NEXT: ;;#ASMEND 8437; GFX900-NEXT: ;;#ASMSTART 8438; GFX900-NEXT: ; def s[4:5] 8439; GFX900-NEXT: ;;#ASMEND 8440; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 8441; GFX900-NEXT: ;;#ASMSTART 8442; GFX900-NEXT: ; use s[8:9] 8443; GFX900-NEXT: ;;#ASMEND 8444; GFX900-NEXT: s_setpc_b64 s[30:31] 8445; 8446; GFX90A-LABEL: s_shuffle_v3i16_v3i16__0_5_5: 8447; GFX90A: ; %bb.0: 8448; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8449; GFX90A-NEXT: ;;#ASMSTART 8450; GFX90A-NEXT: ; def s[8:9] 8451; GFX90A-NEXT: ;;#ASMEND 8452; GFX90A-NEXT: ;;#ASMSTART 8453; GFX90A-NEXT: ; def s[4:5] 8454; GFX90A-NEXT: ;;#ASMEND 8455; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 8456; GFX90A-NEXT: ;;#ASMSTART 8457; GFX90A-NEXT: ; use s[8:9] 8458; GFX90A-NEXT: ;;#ASMEND 8459; GFX90A-NEXT: s_setpc_b64 s[30:31] 8460; 8461; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_5_5: 8462; GFX940: ; %bb.0: 8463; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8464; GFX940-NEXT: ;;#ASMSTART 8465; GFX940-NEXT: ; def s[8:9] 8466; GFX940-NEXT: ;;#ASMEND 8467; GFX940-NEXT: ;;#ASMSTART 8468; GFX940-NEXT: ; def s[0:1] 8469; GFX940-NEXT: ;;#ASMEND 8470; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 8471; GFX940-NEXT: ;;#ASMSTART 8472; GFX940-NEXT: ; use s[8:9] 8473; GFX940-NEXT: ;;#ASMEND 8474; GFX940-NEXT: s_setpc_b64 s[30:31] 8475 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8476 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8477 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8478 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8479 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 0, i32 5, i32 5> 8480 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8481 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8482 ret void 8483} 8484 8485define void @s_shuffle_v3i16_v3i16__1_5_5() { 8486; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_5_5: 8487; GFX900: ; %bb.0: 8488; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8489; GFX900-NEXT: ;;#ASMSTART 8490; GFX900-NEXT: ; def s[4:5] 8491; GFX900-NEXT: ;;#ASMEND 8492; GFX900-NEXT: ;;#ASMSTART 8493; GFX900-NEXT: ; def s[8:9] 8494; GFX900-NEXT: ;;#ASMEND 8495; GFX900-NEXT: s_lshr_b32 s4, s4, 16 8496; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 8497; GFX900-NEXT: ;;#ASMSTART 8498; GFX900-NEXT: ; use s[8:9] 8499; GFX900-NEXT: ;;#ASMEND 8500; GFX900-NEXT: s_setpc_b64 s[30:31] 8501; 8502; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_5_5: 8503; GFX90A: ; %bb.0: 8504; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8505; GFX90A-NEXT: ;;#ASMSTART 8506; GFX90A-NEXT: ; def s[4:5] 8507; GFX90A-NEXT: ;;#ASMEND 8508; GFX90A-NEXT: ;;#ASMSTART 8509; GFX90A-NEXT: ; def s[8:9] 8510; GFX90A-NEXT: ;;#ASMEND 8511; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 8512; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 8513; GFX90A-NEXT: ;;#ASMSTART 8514; GFX90A-NEXT: ; use s[8:9] 8515; GFX90A-NEXT: ;;#ASMEND 8516; GFX90A-NEXT: s_setpc_b64 s[30:31] 8517; 8518; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_5_5: 8519; GFX940: ; %bb.0: 8520; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8521; GFX940-NEXT: ;;#ASMSTART 8522; GFX940-NEXT: ; def s[0:1] 8523; GFX940-NEXT: ;;#ASMEND 8524; GFX940-NEXT: ;;#ASMSTART 8525; GFX940-NEXT: ; def s[8:9] 8526; GFX940-NEXT: ;;#ASMEND 8527; GFX940-NEXT: s_lshr_b32 s0, s0, 16 8528; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 8529; GFX940-NEXT: ;;#ASMSTART 8530; GFX940-NEXT: ; use s[8:9] 8531; GFX940-NEXT: ;;#ASMEND 8532; GFX940-NEXT: s_setpc_b64 s[30:31] 8533 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8534 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8535 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8536 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8537 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 1, i32 5, i32 5> 8538 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8539 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8540 ret void 8541} 8542 8543define void @s_shuffle_v3i16_v3i16__2_5_5() { 8544; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_5_5: 8545; GFX900: ; %bb.0: 8546; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8547; GFX900-NEXT: ;;#ASMSTART 8548; GFX900-NEXT: ; def s[8:9] 8549; GFX900-NEXT: ;;#ASMEND 8550; GFX900-NEXT: ;;#ASMSTART 8551; GFX900-NEXT: ; def s[4:5] 8552; GFX900-NEXT: ;;#ASMEND 8553; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9 8554; GFX900-NEXT: ;;#ASMSTART 8555; GFX900-NEXT: ; use s[8:9] 8556; GFX900-NEXT: ;;#ASMEND 8557; GFX900-NEXT: s_setpc_b64 s[30:31] 8558; 8559; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_5_5: 8560; GFX90A: ; %bb.0: 8561; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8562; GFX90A-NEXT: ;;#ASMSTART 8563; GFX90A-NEXT: ; def s[8:9] 8564; GFX90A-NEXT: ;;#ASMEND 8565; GFX90A-NEXT: ;;#ASMSTART 8566; GFX90A-NEXT: ; def s[4:5] 8567; GFX90A-NEXT: ;;#ASMEND 8568; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9 8569; GFX90A-NEXT: ;;#ASMSTART 8570; GFX90A-NEXT: ; use s[8:9] 8571; GFX90A-NEXT: ;;#ASMEND 8572; GFX90A-NEXT: s_setpc_b64 s[30:31] 8573; 8574; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_5_5: 8575; GFX940: ; %bb.0: 8576; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8577; GFX940-NEXT: ;;#ASMSTART 8578; GFX940-NEXT: ; def s[8:9] 8579; GFX940-NEXT: ;;#ASMEND 8580; GFX940-NEXT: ;;#ASMSTART 8581; GFX940-NEXT: ; def s[0:1] 8582; GFX940-NEXT: ;;#ASMEND 8583; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 8584; GFX940-NEXT: ;;#ASMSTART 8585; GFX940-NEXT: ; use s[8:9] 8586; GFX940-NEXT: ;;#ASMEND 8587; GFX940-NEXT: s_setpc_b64 s[30:31] 8588 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8589 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8590 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8591 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8592 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 2, i32 5, i32 5> 8593 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8594 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8595 ret void 8596} 8597 8598define void @s_shuffle_v3i16_v3i16__3_5_5() { 8599; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_5_5: 8600; GFX9: ; %bb.0: 8601; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8602; GFX9-NEXT: ;;#ASMSTART 8603; GFX9-NEXT: ; def s[8:9] 8604; GFX9-NEXT: ;;#ASMEND 8605; GFX9-NEXT: s_pack_ll_b32_b16 s8, s8, s9 8606; GFX9-NEXT: ;;#ASMSTART 8607; GFX9-NEXT: ; use s[8:9] 8608; GFX9-NEXT: ;;#ASMEND 8609; GFX9-NEXT: s_setpc_b64 s[30:31] 8610 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8611 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8612 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8613 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8614 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 3, i32 5, i32 5> 8615 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8616 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8617 ret void 8618} 8619 8620define void @s_shuffle_v3i16_v3i16__4_5_5() { 8621; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_5_5: 8622; GFX900: ; %bb.0: 8623; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8624; GFX900-NEXT: ;;#ASMSTART 8625; GFX900-NEXT: ; def s[8:9] 8626; GFX900-NEXT: ;;#ASMEND 8627; GFX900-NEXT: s_lshr_b32 s4, s8, 16 8628; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 8629; GFX900-NEXT: ;;#ASMSTART 8630; GFX900-NEXT: ; use s[8:9] 8631; GFX900-NEXT: ;;#ASMEND 8632; GFX900-NEXT: s_setpc_b64 s[30:31] 8633; 8634; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_5_5: 8635; GFX90A: ; %bb.0: 8636; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8637; GFX90A-NEXT: ;;#ASMSTART 8638; GFX90A-NEXT: ; def s[8:9] 8639; GFX90A-NEXT: ;;#ASMEND 8640; GFX90A-NEXT: s_lshr_b32 s4, s8, 16 8641; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 8642; GFX90A-NEXT: ;;#ASMSTART 8643; GFX90A-NEXT: ; use s[8:9] 8644; GFX90A-NEXT: ;;#ASMEND 8645; GFX90A-NEXT: s_setpc_b64 s[30:31] 8646; 8647; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_5_5: 8648; GFX940: ; %bb.0: 8649; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8650; GFX940-NEXT: ;;#ASMSTART 8651; GFX940-NEXT: ; def s[8:9] 8652; GFX940-NEXT: ;;#ASMEND 8653; GFX940-NEXT: s_lshr_b32 s0, s8, 16 8654; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 8655; GFX940-NEXT: ;;#ASMSTART 8656; GFX940-NEXT: ; use s[8:9] 8657; GFX940-NEXT: ;;#ASMEND 8658; GFX940-NEXT: s_setpc_b64 s[30:31] 8659 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8660 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8661 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8662 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8663 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 5, i32 5> 8664 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8665 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8666 ret void 8667} 8668 8669define void @s_shuffle_v3i16_v3i16__5_u_5() { 8670; GFX9-LABEL: s_shuffle_v3i16_v3i16__5_u_5: 8671; GFX9: ; %bb.0: 8672; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8673; GFX9-NEXT: ;;#ASMSTART 8674; GFX9-NEXT: ; def s[8:9] 8675; GFX9-NEXT: ;;#ASMEND 8676; GFX9-NEXT: s_mov_b32 s8, s9 8677; GFX9-NEXT: ;;#ASMSTART 8678; GFX9-NEXT: ; use s[8:9] 8679; GFX9-NEXT: ;;#ASMEND 8680; GFX9-NEXT: s_setpc_b64 s[30:31] 8681 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8682 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8683 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8684 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8685 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 5> 8686 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8687 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8688 ret void 8689} 8690 8691define void @s_shuffle_v3i16_v3i16__5_0_5() { 8692; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_5: 8693; GFX900: ; %bb.0: 8694; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8695; GFX900-NEXT: ;;#ASMSTART 8696; GFX900-NEXT: ; def s[8:9] 8697; GFX900-NEXT: ;;#ASMEND 8698; GFX900-NEXT: ;;#ASMSTART 8699; GFX900-NEXT: ; def s[4:5] 8700; GFX900-NEXT: ;;#ASMEND 8701; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4 8702; GFX900-NEXT: ;;#ASMSTART 8703; GFX900-NEXT: ; use s[8:9] 8704; GFX900-NEXT: ;;#ASMEND 8705; GFX900-NEXT: s_setpc_b64 s[30:31] 8706; 8707; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_5: 8708; GFX90A: ; %bb.0: 8709; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8710; GFX90A-NEXT: ;;#ASMSTART 8711; GFX90A-NEXT: ; def s[8:9] 8712; GFX90A-NEXT: ;;#ASMEND 8713; GFX90A-NEXT: ;;#ASMSTART 8714; GFX90A-NEXT: ; def s[4:5] 8715; GFX90A-NEXT: ;;#ASMEND 8716; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4 8717; GFX90A-NEXT: ;;#ASMSTART 8718; GFX90A-NEXT: ; use s[8:9] 8719; GFX90A-NEXT: ;;#ASMEND 8720; GFX90A-NEXT: s_setpc_b64 s[30:31] 8721; 8722; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_5: 8723; GFX940: ; %bb.0: 8724; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8725; GFX940-NEXT: ;;#ASMSTART 8726; GFX940-NEXT: ; def s[8:9] 8727; GFX940-NEXT: ;;#ASMEND 8728; GFX940-NEXT: ;;#ASMSTART 8729; GFX940-NEXT: ; def s[0:1] 8730; GFX940-NEXT: ;;#ASMEND 8731; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 8732; GFX940-NEXT: ;;#ASMSTART 8733; GFX940-NEXT: ; use s[8:9] 8734; GFX940-NEXT: ;;#ASMEND 8735; GFX940-NEXT: s_setpc_b64 s[30:31] 8736 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8737 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8738 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8739 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8740 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 5> 8741 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8742 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8743 ret void 8744} 8745 8746define void @s_shuffle_v3i16_v3i16__5_1_5() { 8747; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_5: 8748; GFX900: ; %bb.0: 8749; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8750; GFX900-NEXT: ;;#ASMSTART 8751; GFX900-NEXT: ; def s[8:9] 8752; GFX900-NEXT: ;;#ASMEND 8753; GFX900-NEXT: ;;#ASMSTART 8754; GFX900-NEXT: ; def s[4:5] 8755; GFX900-NEXT: ;;#ASMEND 8756; GFX900-NEXT: s_pack_lh_b32_b16 s8, s9, s4 8757; GFX900-NEXT: ;;#ASMSTART 8758; GFX900-NEXT: ; use s[8:9] 8759; GFX900-NEXT: ;;#ASMEND 8760; GFX900-NEXT: s_setpc_b64 s[30:31] 8761; 8762; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_5: 8763; GFX90A: ; %bb.0: 8764; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8765; GFX90A-NEXT: ;;#ASMSTART 8766; GFX90A-NEXT: ; def s[8:9] 8767; GFX90A-NEXT: ;;#ASMEND 8768; GFX90A-NEXT: ;;#ASMSTART 8769; GFX90A-NEXT: ; def s[4:5] 8770; GFX90A-NEXT: ;;#ASMEND 8771; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s9, s4 8772; GFX90A-NEXT: ;;#ASMSTART 8773; GFX90A-NEXT: ; use s[8:9] 8774; GFX90A-NEXT: ;;#ASMEND 8775; GFX90A-NEXT: s_setpc_b64 s[30:31] 8776; 8777; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_5: 8778; GFX940: ; %bb.0: 8779; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8780; GFX940-NEXT: ;;#ASMSTART 8781; GFX940-NEXT: ; def s[8:9] 8782; GFX940-NEXT: ;;#ASMEND 8783; GFX940-NEXT: ;;#ASMSTART 8784; GFX940-NEXT: ; def s[0:1] 8785; GFX940-NEXT: ;;#ASMEND 8786; GFX940-NEXT: s_pack_lh_b32_b16 s8, s9, s0 8787; GFX940-NEXT: ;;#ASMSTART 8788; GFX940-NEXT: ; use s[8:9] 8789; GFX940-NEXT: ;;#ASMEND 8790; GFX940-NEXT: s_setpc_b64 s[30:31] 8791 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8792 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8793 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8794 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8795 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 5> 8796 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8797 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8798 ret void 8799} 8800 8801define void @s_shuffle_v3i16_v3i16__5_2_5() { 8802; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_5: 8803; GFX900: ; %bb.0: 8804; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8805; GFX900-NEXT: ;;#ASMSTART 8806; GFX900-NEXT: ; def s[8:9] 8807; GFX900-NEXT: ;;#ASMEND 8808; GFX900-NEXT: ;;#ASMSTART 8809; GFX900-NEXT: ; def s[4:5] 8810; GFX900-NEXT: ;;#ASMEND 8811; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s5 8812; GFX900-NEXT: ;;#ASMSTART 8813; GFX900-NEXT: ; use s[8:9] 8814; GFX900-NEXT: ;;#ASMEND 8815; GFX900-NEXT: s_setpc_b64 s[30:31] 8816; 8817; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_5: 8818; GFX90A: ; %bb.0: 8819; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8820; GFX90A-NEXT: ;;#ASMSTART 8821; GFX90A-NEXT: ; def s[8:9] 8822; GFX90A-NEXT: ;;#ASMEND 8823; GFX90A-NEXT: ;;#ASMSTART 8824; GFX90A-NEXT: ; def s[4:5] 8825; GFX90A-NEXT: ;;#ASMEND 8826; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s5 8827; GFX90A-NEXT: ;;#ASMSTART 8828; GFX90A-NEXT: ; use s[8:9] 8829; GFX90A-NEXT: ;;#ASMEND 8830; GFX90A-NEXT: s_setpc_b64 s[30:31] 8831; 8832; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_5: 8833; GFX940: ; %bb.0: 8834; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8835; GFX940-NEXT: ;;#ASMSTART 8836; GFX940-NEXT: ; def s[8:9] 8837; GFX940-NEXT: ;;#ASMEND 8838; GFX940-NEXT: ;;#ASMSTART 8839; GFX940-NEXT: ; def s[0:1] 8840; GFX940-NEXT: ;;#ASMEND 8841; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 8842; GFX940-NEXT: ;;#ASMSTART 8843; GFX940-NEXT: ; use s[8:9] 8844; GFX940-NEXT: ;;#ASMEND 8845; GFX940-NEXT: s_setpc_b64 s[30:31] 8846 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8847 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8848 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8849 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8850 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 5> 8851 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8852 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8853 ret void 8854} 8855 8856define void @s_shuffle_v3i16_v3i16__5_3_5() { 8857; GFX9-LABEL: s_shuffle_v3i16_v3i16__5_3_5: 8858; GFX9: ; %bb.0: 8859; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8860; GFX9-NEXT: ;;#ASMSTART 8861; GFX9-NEXT: ; def s[8:9] 8862; GFX9-NEXT: ;;#ASMEND 8863; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s8 8864; GFX9-NEXT: ;;#ASMSTART 8865; GFX9-NEXT: ; use s[8:9] 8866; GFX9-NEXT: ;;#ASMEND 8867; GFX9-NEXT: s_setpc_b64 s[30:31] 8868 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8869 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8870 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8871 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8872 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 5> 8873 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8874 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8875 ret void 8876} 8877 8878define void @s_shuffle_v3i16_v3i16__5_4_5() { 8879; GFX9-LABEL: s_shuffle_v3i16_v3i16__5_4_5: 8880; GFX9: ; %bb.0: 8881; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8882; GFX9-NEXT: ;;#ASMSTART 8883; GFX9-NEXT: ; def s[8:9] 8884; GFX9-NEXT: ;;#ASMEND 8885; GFX9-NEXT: s_pack_lh_b32_b16 s8, s9, s8 8886; GFX9-NEXT: ;;#ASMSTART 8887; GFX9-NEXT: ; use s[8:9] 8888; GFX9-NEXT: ;;#ASMEND 8889; GFX9-NEXT: s_setpc_b64 s[30:31] 8890 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8891 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8892 %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8893 %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2> 8894 %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 5> 8895 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8896 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8897 ret void 8898} 8899;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 8900; GFX90APLUS: {{.*}} 8901