1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v3i16_v4i16__u_u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v3i16_v4i16__u_u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <4 x i16> asm "; def $0", "=v"() 13 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> poison 14 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 15 ret void 16} 17 18define void @v_shuffle_v3i16_v4i16__0_u_u(ptr addrspace(1) inreg %ptr) { 19; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_u_u: 20; GFX900: ; %bb.0: 21; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX900-NEXT: v_mov_b32_e32 v2, 0 23; GFX900-NEXT: ;;#ASMSTART 24; GFX900-NEXT: ; def v[0:1] 25; GFX900-NEXT: ;;#ASMEND 26; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 27; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 28; GFX900-NEXT: s_waitcnt vmcnt(0) 29; GFX900-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_u_u: 32; GFX90A: ; %bb.0: 33; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX90A-NEXT: v_mov_b32_e32 v2, 0 35; GFX90A-NEXT: ;;#ASMSTART 36; GFX90A-NEXT: ; def v[0:1] 37; GFX90A-NEXT: ;;#ASMEND 38; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 39; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 40; GFX90A-NEXT: s_waitcnt vmcnt(0) 41; GFX90A-NEXT: s_setpc_b64 s[30:31] 42; 43; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_u_u: 44; GFX940: ; %bb.0: 45; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX940-NEXT: v_mov_b32_e32 v2, 0 47; GFX940-NEXT: ;;#ASMSTART 48; GFX940-NEXT: ; def v[0:1] 49; GFX940-NEXT: ;;#ASMEND 50; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 51; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 52; GFX940-NEXT: s_waitcnt vmcnt(0) 53; GFX940-NEXT: s_setpc_b64 s[30:31] 54 %vec0 = call <4 x i16> asm "; def $0", "=v"() 55 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 poison, i32 poison> 56 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 57 ret void 58} 59 60define void @v_shuffle_v3i16_v4i16__1_u_u(ptr addrspace(1) inreg %ptr) { 61; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_u_u: 62; GFX900: ; %bb.0: 63; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GFX900-NEXT: ;;#ASMSTART 65; GFX900-NEXT: ; def v[0:1] 66; GFX900-NEXT: ;;#ASMEND 67; GFX900-NEXT: v_mov_b32_e32 v2, 0 68; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 69; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 70; GFX900-NEXT: s_waitcnt vmcnt(0) 71; GFX900-NEXT: s_setpc_b64 s[30:31] 72; 73; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_u_u: 74; GFX90A: ; %bb.0: 75; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GFX90A-NEXT: ;;#ASMSTART 77; GFX90A-NEXT: ; def v[0:1] 78; GFX90A-NEXT: ;;#ASMEND 79; GFX90A-NEXT: v_mov_b32_e32 v2, 0 80; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 81; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 82; GFX90A-NEXT: s_waitcnt vmcnt(0) 83; GFX90A-NEXT: s_setpc_b64 s[30:31] 84; 85; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_u_u: 86; GFX940: ; %bb.0: 87; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GFX940-NEXT: ;;#ASMSTART 89; GFX940-NEXT: ; def v[0:1] 90; GFX940-NEXT: ;;#ASMEND 91; GFX940-NEXT: v_mov_b32_e32 v2, 0 92; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 93; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 94; GFX940-NEXT: s_waitcnt vmcnt(0) 95; GFX940-NEXT: s_setpc_b64 s[30:31] 96 %vec0 = call <4 x i16> asm "; def $0", "=v"() 97 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 poison, i32 poison> 98 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 99 ret void 100} 101 102define void @v_shuffle_v3i16_v4i16__2_u_u(ptr addrspace(1) inreg %ptr) { 103; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_u_u: 104; GFX900: ; %bb.0: 105; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GFX900-NEXT: v_mov_b32_e32 v2, 0 107; GFX900-NEXT: ;;#ASMSTART 108; GFX900-NEXT: ; def v[0:1] 109; GFX900-NEXT: ;;#ASMEND 110; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 111; GFX900-NEXT: s_waitcnt vmcnt(0) 112; GFX900-NEXT: s_setpc_b64 s[30:31] 113; 114; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_u_u: 115; GFX90A: ; %bb.0: 116; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GFX90A-NEXT: v_mov_b32_e32 v2, 0 118; GFX90A-NEXT: ;;#ASMSTART 119; GFX90A-NEXT: ; def v[0:1] 120; GFX90A-NEXT: ;;#ASMEND 121; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 122; GFX90A-NEXT: s_waitcnt vmcnt(0) 123; GFX90A-NEXT: s_setpc_b64 s[30:31] 124; 125; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_u_u: 126; GFX940: ; %bb.0: 127; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; GFX940-NEXT: v_mov_b32_e32 v2, 0 129; GFX940-NEXT: ;;#ASMSTART 130; GFX940-NEXT: ; def v[0:1] 131; GFX940-NEXT: ;;#ASMEND 132; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 133; GFX940-NEXT: s_waitcnt vmcnt(0) 134; GFX940-NEXT: s_setpc_b64 s[30:31] 135 %vec0 = call <4 x i16> asm "; def $0", "=v"() 136 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 poison, i32 poison> 137 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 138 ret void 139} 140 141define void @v_shuffle_v3i16_v4i16__3_u_u(ptr addrspace(1) inreg %ptr) { 142; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_u_u: 143; GFX900: ; %bb.0: 144; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GFX900-NEXT: ;;#ASMSTART 146; GFX900-NEXT: ; def v[0:1] 147; GFX900-NEXT: ;;#ASMEND 148; GFX900-NEXT: v_mov_b32_e32 v2, 0 149; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 150; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 151; GFX900-NEXT: s_waitcnt vmcnt(0) 152; GFX900-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_u_u: 155; GFX90A: ; %bb.0: 156; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX90A-NEXT: ;;#ASMSTART 158; GFX90A-NEXT: ; def v[0:1] 159; GFX90A-NEXT: ;;#ASMEND 160; GFX90A-NEXT: v_mov_b32_e32 v2, 0 161; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 162; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 163; GFX90A-NEXT: s_waitcnt vmcnt(0) 164; GFX90A-NEXT: s_setpc_b64 s[30:31] 165; 166; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_u_u: 167; GFX940: ; %bb.0: 168; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; GFX940-NEXT: ;;#ASMSTART 170; GFX940-NEXT: ; def v[0:1] 171; GFX940-NEXT: ;;#ASMEND 172; GFX940-NEXT: v_mov_b32_e32 v2, 0 173; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 174; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 175; GFX940-NEXT: s_waitcnt vmcnt(0) 176; GFX940-NEXT: s_setpc_b64 s[30:31] 177 %vec0 = call <4 x i16> asm "; def $0", "=v"() 178 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 poison, i32 poison> 179 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 180 ret void 181} 182 183define void @v_shuffle_v3i16_v4i16__4_u_u(ptr addrspace(1) inreg %ptr) { 184; GFX9-LABEL: v_shuffle_v3i16_v4i16__4_u_u: 185; GFX9: ; %bb.0: 186; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; GFX9-NEXT: s_setpc_b64 s[30:31] 188 %vec0 = call <4 x i16> asm "; def $0", "=v"() 189 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 poison, i32 poison> 190 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 191 ret void 192} 193 194define void @v_shuffle_v3i16_v4i16__5_u_u(ptr addrspace(1) inreg %ptr) { 195; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_u_u: 196; GFX900: ; %bb.0: 197; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 198; GFX900-NEXT: ;;#ASMSTART 199; GFX900-NEXT: ; def v[0:1] 200; GFX900-NEXT: ;;#ASMEND 201; GFX900-NEXT: v_mov_b32_e32 v2, 0 202; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 203; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 204; GFX900-NEXT: s_waitcnt vmcnt(0) 205; GFX900-NEXT: s_setpc_b64 s[30:31] 206; 207; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_u_u: 208; GFX90A: ; %bb.0: 209; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; GFX90A-NEXT: ;;#ASMSTART 211; GFX90A-NEXT: ; def v[0:1] 212; GFX90A-NEXT: ;;#ASMEND 213; GFX90A-NEXT: v_mov_b32_e32 v2, 0 214; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 215; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 216; GFX90A-NEXT: s_waitcnt vmcnt(0) 217; GFX90A-NEXT: s_setpc_b64 s[30:31] 218; 219; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_u_u: 220; GFX940: ; %bb.0: 221; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 222; GFX940-NEXT: ;;#ASMSTART 223; GFX940-NEXT: ; def v[0:1] 224; GFX940-NEXT: ;;#ASMEND 225; GFX940-NEXT: v_mov_b32_e32 v2, 0 226; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 227; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 228; GFX940-NEXT: s_waitcnt vmcnt(0) 229; GFX940-NEXT: s_setpc_b64 s[30:31] 230 %vec0 = call <4 x i16> asm "; def $0", "=v"() 231 %vec1 = call <4 x i16> asm "; def $0", "=v"() 232 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison> 233 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 234 ret void 235} 236 237define void @v_shuffle_v3i16_v4i16__6_u_u(ptr addrspace(1) inreg %ptr) { 238; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_u_u: 239; GFX900: ; %bb.0: 240; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 241; GFX900-NEXT: v_mov_b32_e32 v2, 0 242; GFX900-NEXT: ;;#ASMSTART 243; GFX900-NEXT: ; def v[0:1] 244; GFX900-NEXT: ;;#ASMEND 245; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 246; GFX900-NEXT: s_waitcnt vmcnt(0) 247; GFX900-NEXT: s_setpc_b64 s[30:31] 248; 249; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_u_u: 250; GFX90A: ; %bb.0: 251; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 252; GFX90A-NEXT: v_mov_b32_e32 v2, 0 253; GFX90A-NEXT: ;;#ASMSTART 254; GFX90A-NEXT: ; def v[0:1] 255; GFX90A-NEXT: ;;#ASMEND 256; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 257; GFX90A-NEXT: s_waitcnt vmcnt(0) 258; GFX90A-NEXT: s_setpc_b64 s[30:31] 259; 260; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_u_u: 261; GFX940: ; %bb.0: 262; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GFX940-NEXT: v_mov_b32_e32 v2, 0 264; GFX940-NEXT: ;;#ASMSTART 265; GFX940-NEXT: ; def v[0:1] 266; GFX940-NEXT: ;;#ASMEND 267; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 268; GFX940-NEXT: s_waitcnt vmcnt(0) 269; GFX940-NEXT: s_setpc_b64 s[30:31] 270 %vec0 = call <4 x i16> asm "; def $0", "=v"() 271 %vec1 = call <4 x i16> asm "; def $0", "=v"() 272 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison> 273 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 274 ret void 275} 276 277define void @v_shuffle_v3i16_v4i16__7_u_u(ptr addrspace(1) inreg %ptr) { 278; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_u: 279; GFX900: ; %bb.0: 280; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX900-NEXT: ;;#ASMSTART 282; GFX900-NEXT: ; def v[0:1] 283; GFX900-NEXT: ;;#ASMEND 284; GFX900-NEXT: v_mov_b32_e32 v2, 0 285; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 286; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 287; GFX900-NEXT: s_waitcnt vmcnt(0) 288; GFX900-NEXT: s_setpc_b64 s[30:31] 289; 290; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_u: 291; GFX90A: ; %bb.0: 292; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 293; GFX90A-NEXT: ;;#ASMSTART 294; GFX90A-NEXT: ; def v[0:1] 295; GFX90A-NEXT: ;;#ASMEND 296; GFX90A-NEXT: v_mov_b32_e32 v2, 0 297; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 298; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 299; GFX90A-NEXT: s_waitcnt vmcnt(0) 300; GFX90A-NEXT: s_setpc_b64 s[30:31] 301; 302; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_u: 303; GFX940: ; %bb.0: 304; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 305; GFX940-NEXT: ;;#ASMSTART 306; GFX940-NEXT: ; def v[0:1] 307; GFX940-NEXT: ;;#ASMEND 308; GFX940-NEXT: v_mov_b32_e32 v2, 0 309; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 310; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 311; GFX940-NEXT: s_waitcnt vmcnt(0) 312; GFX940-NEXT: s_setpc_b64 s[30:31] 313 %vec0 = call <4 x i16> asm "; def $0", "=v"() 314 %vec1 = call <4 x i16> asm "; def $0", "=v"() 315 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison> 316 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 317 ret void 318} 319 320define void @v_shuffle_v3i16_v4i16__7_0_u(ptr addrspace(1) inreg %ptr) { 321; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_u: 322; GFX900: ; %bb.0: 323; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 324; GFX900-NEXT: ;;#ASMSTART 325; GFX900-NEXT: ; def v[0:1] 326; GFX900-NEXT: ;;#ASMEND 327; GFX900-NEXT: v_mov_b32_e32 v3, 0 328; GFX900-NEXT: ;;#ASMSTART 329; GFX900-NEXT: ; def v[1:2] 330; GFX900-NEXT: ;;#ASMEND 331; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 332; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 333; GFX900-NEXT: s_waitcnt vmcnt(0) 334; GFX900-NEXT: s_setpc_b64 s[30:31] 335; 336; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_u: 337; GFX90A: ; %bb.0: 338; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GFX90A-NEXT: ;;#ASMSTART 340; GFX90A-NEXT: ; def v[0:1] 341; GFX90A-NEXT: ;;#ASMEND 342; GFX90A-NEXT: v_mov_b32_e32 v4, 0 343; GFX90A-NEXT: ;;#ASMSTART 344; GFX90A-NEXT: ; def v[2:3] 345; GFX90A-NEXT: ;;#ASMEND 346; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 347; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 348; GFX90A-NEXT: s_waitcnt vmcnt(0) 349; GFX90A-NEXT: s_setpc_b64 s[30:31] 350; 351; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_u: 352; GFX940: ; %bb.0: 353; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 354; GFX940-NEXT: ;;#ASMSTART 355; GFX940-NEXT: ; def v[0:1] 356; GFX940-NEXT: ;;#ASMEND 357; GFX940-NEXT: v_mov_b32_e32 v4, 0 358; GFX940-NEXT: ;;#ASMSTART 359; GFX940-NEXT: ; def v[2:3] 360; GFX940-NEXT: ;;#ASMEND 361; GFX940-NEXT: s_nop 0 362; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 363; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 364; GFX940-NEXT: s_waitcnt vmcnt(0) 365; GFX940-NEXT: s_setpc_b64 s[30:31] 366 %vec0 = call <4 x i16> asm "; def $0", "=v"() 367 %vec1 = call <4 x i16> asm "; def $0", "=v"() 368 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 poison> 369 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 370 ret void 371} 372 373define void @v_shuffle_v3i16_v4i16__7_1_u(ptr addrspace(1) inreg %ptr) { 374; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_u: 375; GFX900: ; %bb.0: 376; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX900-NEXT: ;;#ASMSTART 378; GFX900-NEXT: ; def v[0:1] 379; GFX900-NEXT: ;;#ASMEND 380; GFX900-NEXT: s_mov_b32 s4, 0x7060302 381; GFX900-NEXT: v_mov_b32_e32 v3, 0 382; GFX900-NEXT: ;;#ASMSTART 383; GFX900-NEXT: ; def v[1:2] 384; GFX900-NEXT: ;;#ASMEND 385; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 386; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 387; GFX900-NEXT: s_waitcnt vmcnt(0) 388; GFX900-NEXT: s_setpc_b64 s[30:31] 389; 390; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_u: 391; GFX90A: ; %bb.0: 392; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 393; GFX90A-NEXT: ;;#ASMSTART 394; GFX90A-NEXT: ; def v[0:1] 395; GFX90A-NEXT: ;;#ASMEND 396; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 397; GFX90A-NEXT: v_mov_b32_e32 v4, 0 398; GFX90A-NEXT: ;;#ASMSTART 399; GFX90A-NEXT: ; def v[2:3] 400; GFX90A-NEXT: ;;#ASMEND 401; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 402; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 403; GFX90A-NEXT: s_waitcnt vmcnt(0) 404; GFX90A-NEXT: s_setpc_b64 s[30:31] 405; 406; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_u: 407; GFX940: ; %bb.0: 408; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 409; GFX940-NEXT: ;;#ASMSTART 410; GFX940-NEXT: ; def v[0:1] 411; GFX940-NEXT: ;;#ASMEND 412; GFX940-NEXT: s_mov_b32 s2, 0x7060302 413; GFX940-NEXT: v_mov_b32_e32 v4, 0 414; GFX940-NEXT: ;;#ASMSTART 415; GFX940-NEXT: ; def v[2:3] 416; GFX940-NEXT: ;;#ASMEND 417; GFX940-NEXT: s_nop 0 418; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 419; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 420; GFX940-NEXT: s_waitcnt vmcnt(0) 421; GFX940-NEXT: s_setpc_b64 s[30:31] 422 %vec0 = call <4 x i16> asm "; def $0", "=v"() 423 %vec1 = call <4 x i16> asm "; def $0", "=v"() 424 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 poison> 425 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 426 ret void 427} 428 429define void @v_shuffle_v3i16_v4i16__7_2_u(ptr addrspace(1) inreg %ptr) { 430; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_u: 431; GFX900: ; %bb.0: 432; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 433; GFX900-NEXT: ;;#ASMSTART 434; GFX900-NEXT: ; def v[0:1] 435; GFX900-NEXT: ;;#ASMEND 436; GFX900-NEXT: v_mov_b32_e32 v4, 0 437; GFX900-NEXT: ;;#ASMSTART 438; GFX900-NEXT: ; def v[2:3] 439; GFX900-NEXT: ;;#ASMEND 440; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 441; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 442; GFX900-NEXT: s_waitcnt vmcnt(0) 443; GFX900-NEXT: s_setpc_b64 s[30:31] 444; 445; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_u: 446; GFX90A: ; %bb.0: 447; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 448; GFX90A-NEXT: ;;#ASMSTART 449; GFX90A-NEXT: ; def v[0:1] 450; GFX90A-NEXT: ;;#ASMEND 451; GFX90A-NEXT: v_mov_b32_e32 v4, 0 452; GFX90A-NEXT: ;;#ASMSTART 453; GFX90A-NEXT: ; def v[2:3] 454; GFX90A-NEXT: ;;#ASMEND 455; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 456; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 457; GFX90A-NEXT: s_waitcnt vmcnt(0) 458; GFX90A-NEXT: s_setpc_b64 s[30:31] 459; 460; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_u: 461; GFX940: ; %bb.0: 462; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 463; GFX940-NEXT: ;;#ASMSTART 464; GFX940-NEXT: ; def v[0:1] 465; GFX940-NEXT: ;;#ASMEND 466; GFX940-NEXT: v_mov_b32_e32 v4, 0 467; GFX940-NEXT: ;;#ASMSTART 468; GFX940-NEXT: ; def v[2:3] 469; GFX940-NEXT: ;;#ASMEND 470; GFX940-NEXT: s_nop 0 471; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 472; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 473; GFX940-NEXT: s_waitcnt vmcnt(0) 474; GFX940-NEXT: s_setpc_b64 s[30:31] 475 %vec0 = call <4 x i16> asm "; def $0", "=v"() 476 %vec1 = call <4 x i16> asm "; def $0", "=v"() 477 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 poison> 478 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 479 ret void 480} 481 482define void @v_shuffle_v3i16_v4i16__7_3_u(ptr addrspace(1) inreg %ptr) { 483; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_u: 484; GFX900: ; %bb.0: 485; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; GFX900-NEXT: ;;#ASMSTART 487; GFX900-NEXT: ; def v[0:1] 488; GFX900-NEXT: ;;#ASMEND 489; GFX900-NEXT: s_mov_b32 s4, 0x7060302 490; GFX900-NEXT: v_mov_b32_e32 v4, 0 491; GFX900-NEXT: ;;#ASMSTART 492; GFX900-NEXT: ; def v[2:3] 493; GFX900-NEXT: ;;#ASMEND 494; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 495; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 496; GFX900-NEXT: s_waitcnt vmcnt(0) 497; GFX900-NEXT: s_setpc_b64 s[30:31] 498; 499; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_u: 500; GFX90A: ; %bb.0: 501; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 502; GFX90A-NEXT: ;;#ASMSTART 503; GFX90A-NEXT: ; def v[0:1] 504; GFX90A-NEXT: ;;#ASMEND 505; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 506; GFX90A-NEXT: v_mov_b32_e32 v4, 0 507; GFX90A-NEXT: ;;#ASMSTART 508; GFX90A-NEXT: ; def v[2:3] 509; GFX90A-NEXT: ;;#ASMEND 510; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 511; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 512; GFX90A-NEXT: s_waitcnt vmcnt(0) 513; GFX90A-NEXT: s_setpc_b64 s[30:31] 514; 515; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_u: 516; GFX940: ; %bb.0: 517; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 518; GFX940-NEXT: ;;#ASMSTART 519; GFX940-NEXT: ; def v[0:1] 520; GFX940-NEXT: ;;#ASMEND 521; GFX940-NEXT: s_mov_b32 s2, 0x7060302 522; GFX940-NEXT: v_mov_b32_e32 v4, 0 523; GFX940-NEXT: ;;#ASMSTART 524; GFX940-NEXT: ; def v[2:3] 525; GFX940-NEXT: ;;#ASMEND 526; GFX940-NEXT: s_nop 0 527; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 528; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 529; GFX940-NEXT: s_waitcnt vmcnt(0) 530; GFX940-NEXT: s_setpc_b64 s[30:31] 531 %vec0 = call <4 x i16> asm "; def $0", "=v"() 532 %vec1 = call <4 x i16> asm "; def $0", "=v"() 533 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 poison> 534 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 535 ret void 536} 537 538define void @v_shuffle_v3i16_v4i16__7_4_u(ptr addrspace(1) inreg %ptr) { 539; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_u: 540; GFX900: ; %bb.0: 541; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 542; GFX900-NEXT: ;;#ASMSTART 543; GFX900-NEXT: ; def v[0:1] 544; GFX900-NEXT: ;;#ASMEND 545; GFX900-NEXT: v_mov_b32_e32 v2, 0 546; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 547; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 548; GFX900-NEXT: s_waitcnt vmcnt(0) 549; GFX900-NEXT: s_setpc_b64 s[30:31] 550; 551; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_u: 552; GFX90A: ; %bb.0: 553; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; GFX90A-NEXT: ;;#ASMSTART 555; GFX90A-NEXT: ; def v[0:1] 556; GFX90A-NEXT: ;;#ASMEND 557; GFX90A-NEXT: v_mov_b32_e32 v2, 0 558; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 559; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 560; GFX90A-NEXT: s_waitcnt vmcnt(0) 561; GFX90A-NEXT: s_setpc_b64 s[30:31] 562; 563; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_u: 564; GFX940: ; %bb.0: 565; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 566; GFX940-NEXT: ;;#ASMSTART 567; GFX940-NEXT: ; def v[0:1] 568; GFX940-NEXT: ;;#ASMEND 569; GFX940-NEXT: v_mov_b32_e32 v2, 0 570; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 571; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 572; GFX940-NEXT: s_waitcnt vmcnt(0) 573; GFX940-NEXT: s_setpc_b64 s[30:31] 574 %vec0 = call <4 x i16> asm "; def $0", "=v"() 575 %vec1 = call <4 x i16> asm "; def $0", "=v"() 576 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 poison> 577 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 578 ret void 579} 580 581define void @v_shuffle_v3i16_v4i16__7_5_u(ptr addrspace(1) inreg %ptr) { 582; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_u: 583; GFX900: ; %bb.0: 584; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 585; GFX900-NEXT: ;;#ASMSTART 586; GFX900-NEXT: ; def v[0:1] 587; GFX900-NEXT: ;;#ASMEND 588; GFX900-NEXT: s_mov_b32 s4, 0x7060302 589; GFX900-NEXT: v_mov_b32_e32 v2, 0 590; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 591; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 592; GFX900-NEXT: s_waitcnt vmcnt(0) 593; GFX900-NEXT: s_setpc_b64 s[30:31] 594; 595; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_u: 596; GFX90A: ; %bb.0: 597; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 598; GFX90A-NEXT: ;;#ASMSTART 599; GFX90A-NEXT: ; def v[0:1] 600; GFX90A-NEXT: ;;#ASMEND 601; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 602; GFX90A-NEXT: v_mov_b32_e32 v2, 0 603; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 604; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 605; GFX90A-NEXT: s_waitcnt vmcnt(0) 606; GFX90A-NEXT: s_setpc_b64 s[30:31] 607; 608; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_u: 609; GFX940: ; %bb.0: 610; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 611; GFX940-NEXT: ;;#ASMSTART 612; GFX940-NEXT: ; def v[0:1] 613; GFX940-NEXT: ;;#ASMEND 614; GFX940-NEXT: s_mov_b32 s2, 0x7060302 615; GFX940-NEXT: v_mov_b32_e32 v2, 0 616; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 617; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 618; GFX940-NEXT: s_waitcnt vmcnt(0) 619; GFX940-NEXT: s_setpc_b64 s[30:31] 620 %vec0 = call <4 x i16> asm "; def $0", "=v"() 621 %vec1 = call <4 x i16> asm "; def $0", "=v"() 622 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 poison> 623 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 624 ret void 625} 626 627define void @v_shuffle_v3i16_v4i16__7_6_u(ptr addrspace(1) inreg %ptr) { 628; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_u: 629; GFX900: ; %bb.0: 630; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 631; GFX900-NEXT: ;;#ASMSTART 632; GFX900-NEXT: ; def v[0:1] 633; GFX900-NEXT: ;;#ASMEND 634; GFX900-NEXT: v_mov_b32_e32 v2, 0 635; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 636; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 637; GFX900-NEXT: s_waitcnt vmcnt(0) 638; GFX900-NEXT: s_setpc_b64 s[30:31] 639; 640; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_u: 641; GFX90A: ; %bb.0: 642; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 643; GFX90A-NEXT: ;;#ASMSTART 644; GFX90A-NEXT: ; def v[0:1] 645; GFX90A-NEXT: ;;#ASMEND 646; GFX90A-NEXT: v_mov_b32_e32 v2, 0 647; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 648; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 649; GFX90A-NEXT: s_waitcnt vmcnt(0) 650; GFX90A-NEXT: s_setpc_b64 s[30:31] 651; 652; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_u: 653; GFX940: ; %bb.0: 654; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 655; GFX940-NEXT: ;;#ASMSTART 656; GFX940-NEXT: ; def v[0:1] 657; GFX940-NEXT: ;;#ASMEND 658; GFX940-NEXT: v_mov_b32_e32 v2, 0 659; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 660; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 661; GFX940-NEXT: s_waitcnt vmcnt(0) 662; GFX940-NEXT: s_setpc_b64 s[30:31] 663 %vec0 = call <4 x i16> asm "; def $0", "=v"() 664 %vec1 = call <4 x i16> asm "; def $0", "=v"() 665 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 poison> 666 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 667 ret void 668} 669 670define void @v_shuffle_v3i16_v4i16__7_7_u(ptr addrspace(1) inreg %ptr) { 671; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_u: 672; GFX900: ; %bb.0: 673; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 674; GFX900-NEXT: ;;#ASMSTART 675; GFX900-NEXT: ; def v[0:1] 676; GFX900-NEXT: ;;#ASMEND 677; GFX900-NEXT: s_mov_b32 s4, 0x7060302 678; GFX900-NEXT: v_mov_b32_e32 v2, 0 679; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 680; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 681; GFX900-NEXT: s_waitcnt vmcnt(0) 682; GFX900-NEXT: s_setpc_b64 s[30:31] 683; 684; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_u: 685; GFX90A: ; %bb.0: 686; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 687; GFX90A-NEXT: ;;#ASMSTART 688; GFX90A-NEXT: ; def v[0:1] 689; GFX90A-NEXT: ;;#ASMEND 690; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 691; GFX90A-NEXT: v_mov_b32_e32 v2, 0 692; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 693; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 694; GFX90A-NEXT: s_waitcnt vmcnt(0) 695; GFX90A-NEXT: s_setpc_b64 s[30:31] 696; 697; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_u: 698; GFX940: ; %bb.0: 699; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 700; GFX940-NEXT: ;;#ASMSTART 701; GFX940-NEXT: ; def v[0:1] 702; GFX940-NEXT: ;;#ASMEND 703; GFX940-NEXT: s_mov_b32 s2, 0x7060302 704; GFX940-NEXT: v_mov_b32_e32 v2, 0 705; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 706; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 707; GFX940-NEXT: s_waitcnt vmcnt(0) 708; GFX940-NEXT: s_setpc_b64 s[30:31] 709 %vec0 = call <4 x i16> asm "; def $0", "=v"() 710 %vec1 = call <4 x i16> asm "; def $0", "=v"() 711 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 poison> 712 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 713 ret void 714} 715 716define void @v_shuffle_v3i16_v4i16__7_7_0(ptr addrspace(1) inreg %ptr) { 717; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_0: 718; GFX900: ; %bb.0: 719; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 720; GFX900-NEXT: ;;#ASMSTART 721; GFX900-NEXT: ; def v[0:1] 722; GFX900-NEXT: ;;#ASMEND 723; GFX900-NEXT: v_mov_b32_e32 v3, 0 724; GFX900-NEXT: ;;#ASMSTART 725; GFX900-NEXT: ; def v[1:2] 726; GFX900-NEXT: ;;#ASMEND 727; GFX900-NEXT: s_mov_b32 s4, 0x7060302 728; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 729; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 730; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 731; GFX900-NEXT: s_waitcnt vmcnt(0) 732; GFX900-NEXT: s_setpc_b64 s[30:31] 733; 734; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_0: 735; GFX90A: ; %bb.0: 736; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 737; GFX90A-NEXT: v_mov_b32_e32 v4, 0 738; GFX90A-NEXT: ;;#ASMSTART 739; GFX90A-NEXT: ; def v[0:1] 740; GFX90A-NEXT: ;;#ASMEND 741; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 742; GFX90A-NEXT: ;;#ASMSTART 743; GFX90A-NEXT: ; def v[2:3] 744; GFX90A-NEXT: ;;#ASMEND 745; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 746; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 747; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 748; GFX90A-NEXT: s_waitcnt vmcnt(0) 749; GFX90A-NEXT: s_setpc_b64 s[30:31] 750; 751; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_0: 752; GFX940: ; %bb.0: 753; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 754; GFX940-NEXT: v_mov_b32_e32 v4, 0 755; GFX940-NEXT: ;;#ASMSTART 756; GFX940-NEXT: ; def v[0:1] 757; GFX940-NEXT: ;;#ASMEND 758; GFX940-NEXT: s_mov_b32 s2, 0x7060302 759; GFX940-NEXT: ;;#ASMSTART 760; GFX940-NEXT: ; def v[2:3] 761; GFX940-NEXT: ;;#ASMEND 762; GFX940-NEXT: s_nop 0 763; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 764; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 765; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 766; GFX940-NEXT: s_waitcnt vmcnt(0) 767; GFX940-NEXT: s_setpc_b64 s[30:31] 768 %vec0 = call <4 x i16> asm "; def $0", "=v"() 769 %vec1 = call <4 x i16> asm "; def $0", "=v"() 770 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 0> 771 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 772 ret void 773} 774 775define void @v_shuffle_v3i16_v4i16__7_7_1(ptr addrspace(1) inreg %ptr) { 776; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_1: 777; GFX900: ; %bb.0: 778; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 779; GFX900-NEXT: ;;#ASMSTART 780; GFX900-NEXT: ; def v[0:1] 781; GFX900-NEXT: ;;#ASMEND 782; GFX900-NEXT: v_mov_b32_e32 v3, 0 783; GFX900-NEXT: ;;#ASMSTART 784; GFX900-NEXT: ; def v[1:2] 785; GFX900-NEXT: ;;#ASMEND 786; GFX900-NEXT: s_mov_b32 s4, 0x7060302 787; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 788; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 789; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 790; GFX900-NEXT: s_waitcnt vmcnt(0) 791; GFX900-NEXT: s_setpc_b64 s[30:31] 792; 793; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_1: 794; GFX90A: ; %bb.0: 795; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 796; GFX90A-NEXT: v_mov_b32_e32 v4, 0 797; GFX90A-NEXT: ;;#ASMSTART 798; GFX90A-NEXT: ; def v[0:1] 799; GFX90A-NEXT: ;;#ASMEND 800; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 801; GFX90A-NEXT: ;;#ASMSTART 802; GFX90A-NEXT: ; def v[2:3] 803; GFX90A-NEXT: ;;#ASMEND 804; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 805; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 806; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 807; GFX90A-NEXT: s_waitcnt vmcnt(0) 808; GFX90A-NEXT: s_setpc_b64 s[30:31] 809; 810; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_1: 811; GFX940: ; %bb.0: 812; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 813; GFX940-NEXT: v_mov_b32_e32 v4, 0 814; GFX940-NEXT: ;;#ASMSTART 815; GFX940-NEXT: ; def v[0:1] 816; GFX940-NEXT: ;;#ASMEND 817; GFX940-NEXT: s_mov_b32 s2, 0x7060302 818; GFX940-NEXT: ;;#ASMSTART 819; GFX940-NEXT: ; def v[2:3] 820; GFX940-NEXT: ;;#ASMEND 821; GFX940-NEXT: s_nop 0 822; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 823; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 824; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 825; GFX940-NEXT: s_waitcnt vmcnt(0) 826; GFX940-NEXT: s_setpc_b64 s[30:31] 827 %vec0 = call <4 x i16> asm "; def $0", "=v"() 828 %vec1 = call <4 x i16> asm "; def $0", "=v"() 829 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 1> 830 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 831 ret void 832} 833 834define void @v_shuffle_v3i16_v4i16__7_7_2(ptr addrspace(1) inreg %ptr) { 835; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_2: 836; GFX900: ; %bb.0: 837; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 838; GFX900-NEXT: v_mov_b32_e32 v4, 0 839; GFX900-NEXT: ;;#ASMSTART 840; GFX900-NEXT: ; def v[0:1] 841; GFX900-NEXT: ;;#ASMEND 842; GFX900-NEXT: s_mov_b32 s4, 0x7060302 843; GFX900-NEXT: ;;#ASMSTART 844; GFX900-NEXT: ; def v[2:3] 845; GFX900-NEXT: ;;#ASMEND 846; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 847; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 848; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 849; GFX900-NEXT: s_waitcnt vmcnt(0) 850; GFX900-NEXT: s_setpc_b64 s[30:31] 851; 852; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_2: 853; GFX90A: ; %bb.0: 854; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 855; GFX90A-NEXT: v_mov_b32_e32 v4, 0 856; GFX90A-NEXT: ;;#ASMSTART 857; GFX90A-NEXT: ; def v[0:1] 858; GFX90A-NEXT: ;;#ASMEND 859; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 860; GFX90A-NEXT: ;;#ASMSTART 861; GFX90A-NEXT: ; def v[2:3] 862; GFX90A-NEXT: ;;#ASMEND 863; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 864; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 865; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 866; GFX90A-NEXT: s_waitcnt vmcnt(0) 867; GFX90A-NEXT: s_setpc_b64 s[30:31] 868; 869; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_2: 870; GFX940: ; %bb.0: 871; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 872; GFX940-NEXT: v_mov_b32_e32 v4, 0 873; GFX940-NEXT: ;;#ASMSTART 874; GFX940-NEXT: ; def v[0:1] 875; GFX940-NEXT: ;;#ASMEND 876; GFX940-NEXT: s_mov_b32 s2, 0x7060302 877; GFX940-NEXT: ;;#ASMSTART 878; GFX940-NEXT: ; def v[2:3] 879; GFX940-NEXT: ;;#ASMEND 880; GFX940-NEXT: s_nop 0 881; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 882; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 883; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 884; GFX940-NEXT: s_waitcnt vmcnt(0) 885; GFX940-NEXT: s_setpc_b64 s[30:31] 886 %vec0 = call <4 x i16> asm "; def $0", "=v"() 887 %vec1 = call <4 x i16> asm "; def $0", "=v"() 888 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 2> 889 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 890 ret void 891} 892 893define void @v_shuffle_v3i16_v4i16__7_7_3(ptr addrspace(1) inreg %ptr) { 894; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_3: 895; GFX900: ; %bb.0: 896; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 897; GFX900-NEXT: v_mov_b32_e32 v4, 0 898; GFX900-NEXT: ;;#ASMSTART 899; GFX900-NEXT: ; def v[0:1] 900; GFX900-NEXT: ;;#ASMEND 901; GFX900-NEXT: s_mov_b32 s4, 0x7060302 902; GFX900-NEXT: ;;#ASMSTART 903; GFX900-NEXT: ; def v[2:3] 904; GFX900-NEXT: ;;#ASMEND 905; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 906; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 907; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 908; GFX900-NEXT: s_waitcnt vmcnt(0) 909; GFX900-NEXT: s_setpc_b64 s[30:31] 910; 911; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_3: 912; GFX90A: ; %bb.0: 913; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 914; GFX90A-NEXT: v_mov_b32_e32 v4, 0 915; GFX90A-NEXT: ;;#ASMSTART 916; GFX90A-NEXT: ; def v[0:1] 917; GFX90A-NEXT: ;;#ASMEND 918; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 919; GFX90A-NEXT: ;;#ASMSTART 920; GFX90A-NEXT: ; def v[2:3] 921; GFX90A-NEXT: ;;#ASMEND 922; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 923; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 924; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 925; GFX90A-NEXT: s_waitcnt vmcnt(0) 926; GFX90A-NEXT: s_setpc_b64 s[30:31] 927; 928; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_3: 929; GFX940: ; %bb.0: 930; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 931; GFX940-NEXT: v_mov_b32_e32 v4, 0 932; GFX940-NEXT: ;;#ASMSTART 933; GFX940-NEXT: ; def v[0:1] 934; GFX940-NEXT: ;;#ASMEND 935; GFX940-NEXT: s_mov_b32 s2, 0x7060302 936; GFX940-NEXT: ;;#ASMSTART 937; GFX940-NEXT: ; def v[2:3] 938; GFX940-NEXT: ;;#ASMEND 939; GFX940-NEXT: s_nop 0 940; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 941; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 942; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 943; GFX940-NEXT: s_waitcnt vmcnt(0) 944; GFX940-NEXT: s_setpc_b64 s[30:31] 945 %vec0 = call <4 x i16> asm "; def $0", "=v"() 946 %vec1 = call <4 x i16> asm "; def $0", "=v"() 947 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 3> 948 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 949 ret void 950} 951 952define void @v_shuffle_v3i16_v4i16__7_7_4(ptr addrspace(1) inreg %ptr) { 953; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_4: 954; GFX900: ; %bb.0: 955; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 956; GFX900-NEXT: v_mov_b32_e32 v2, 0 957; GFX900-NEXT: ;;#ASMSTART 958; GFX900-NEXT: ; def v[0:1] 959; GFX900-NEXT: ;;#ASMEND 960; GFX900-NEXT: s_mov_b32 s4, 0x7060302 961; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 962; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 963; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 964; GFX900-NEXT: s_waitcnt vmcnt(0) 965; GFX900-NEXT: s_setpc_b64 s[30:31] 966; 967; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_4: 968; GFX90A: ; %bb.0: 969; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 970; GFX90A-NEXT: v_mov_b32_e32 v2, 0 971; GFX90A-NEXT: ;;#ASMSTART 972; GFX90A-NEXT: ; def v[0:1] 973; GFX90A-NEXT: ;;#ASMEND 974; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 975; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 976; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 977; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 978; GFX90A-NEXT: s_waitcnt vmcnt(0) 979; GFX90A-NEXT: s_setpc_b64 s[30:31] 980; 981; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_4: 982; GFX940: ; %bb.0: 983; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 984; GFX940-NEXT: v_mov_b32_e32 v2, 0 985; GFX940-NEXT: ;;#ASMSTART 986; GFX940-NEXT: ; def v[0:1] 987; GFX940-NEXT: ;;#ASMEND 988; GFX940-NEXT: s_mov_b32 s2, 0x7060302 989; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 990; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 991; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 992; GFX940-NEXT: s_waitcnt vmcnt(0) 993; GFX940-NEXT: s_setpc_b64 s[30:31] 994 %vec0 = call <4 x i16> asm "; def $0", "=v"() 995 %vec1 = call <4 x i16> asm "; def $0", "=v"() 996 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 4> 997 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 998 ret void 999} 1000 1001define void @v_shuffle_v3i16_v4i16__7_7_5(ptr addrspace(1) inreg %ptr) { 1002; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_5: 1003; GFX900: ; %bb.0: 1004; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1005; GFX900-NEXT: v_mov_b32_e32 v2, 0 1006; GFX900-NEXT: ;;#ASMSTART 1007; GFX900-NEXT: ; def v[0:1] 1008; GFX900-NEXT: ;;#ASMEND 1009; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1010; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 1011; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 1012; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1013; GFX900-NEXT: s_waitcnt vmcnt(0) 1014; GFX900-NEXT: s_setpc_b64 s[30:31] 1015; 1016; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_5: 1017; GFX90A: ; %bb.0: 1018; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1019; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1020; GFX90A-NEXT: ;;#ASMSTART 1021; GFX90A-NEXT: ; def v[0:1] 1022; GFX90A-NEXT: ;;#ASMEND 1023; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1024; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 1025; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 1026; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1027; GFX90A-NEXT: s_waitcnt vmcnt(0) 1028; GFX90A-NEXT: s_setpc_b64 s[30:31] 1029; 1030; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_5: 1031; GFX940: ; %bb.0: 1032; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1033; GFX940-NEXT: v_mov_b32_e32 v2, 0 1034; GFX940-NEXT: ;;#ASMSTART 1035; GFX940-NEXT: ; def v[0:1] 1036; GFX940-NEXT: ;;#ASMEND 1037; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1038; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 1039; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 1040; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1041; GFX940-NEXT: s_waitcnt vmcnt(0) 1042; GFX940-NEXT: s_setpc_b64 s[30:31] 1043 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1044 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1045 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 5> 1046 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1047 ret void 1048} 1049 1050define void @v_shuffle_v3i16_v4i16__7_7_6(ptr addrspace(1) inreg %ptr) { 1051; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_6: 1052; GFX900: ; %bb.0: 1053; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1054; GFX900-NEXT: v_mov_b32_e32 v2, 0 1055; GFX900-NEXT: ;;#ASMSTART 1056; GFX900-NEXT: ; def v[0:1] 1057; GFX900-NEXT: ;;#ASMEND 1058; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1059; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 1060; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 1061; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1062; GFX900-NEXT: s_waitcnt vmcnt(0) 1063; GFX900-NEXT: s_setpc_b64 s[30:31] 1064; 1065; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_6: 1066; GFX90A: ; %bb.0: 1067; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1068; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1069; GFX90A-NEXT: ;;#ASMSTART 1070; GFX90A-NEXT: ; def v[0:1] 1071; GFX90A-NEXT: ;;#ASMEND 1072; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1073; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 1074; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 1075; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1076; GFX90A-NEXT: s_waitcnt vmcnt(0) 1077; GFX90A-NEXT: s_setpc_b64 s[30:31] 1078; 1079; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_6: 1080; GFX940: ; %bb.0: 1081; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1082; GFX940-NEXT: v_mov_b32_e32 v2, 0 1083; GFX940-NEXT: ;;#ASMSTART 1084; GFX940-NEXT: ; def v[0:1] 1085; GFX940-NEXT: ;;#ASMEND 1086; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1087; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 1088; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 1089; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1090; GFX940-NEXT: s_waitcnt vmcnt(0) 1091; GFX940-NEXT: s_setpc_b64 s[30:31] 1092 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1093 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1094 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 6> 1095 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1096 ret void 1097} 1098 1099define void @v_shuffle_v3i16_v4i16__7_7_7(ptr addrspace(1) inreg %ptr) { 1100; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_7: 1101; GFX900: ; %bb.0: 1102; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1103; GFX900-NEXT: ;;#ASMSTART 1104; GFX900-NEXT: ; def v[0:1] 1105; GFX900-NEXT: ;;#ASMEND 1106; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1107; GFX900-NEXT: v_mov_b32_e32 v2, 0 1108; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 1109; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 1110; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1111; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1112; GFX900-NEXT: s_waitcnt vmcnt(0) 1113; GFX900-NEXT: s_setpc_b64 s[30:31] 1114; 1115; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_7: 1116; GFX90A: ; %bb.0: 1117; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1118; GFX90A-NEXT: ;;#ASMSTART 1119; GFX90A-NEXT: ; def v[0:1] 1120; GFX90A-NEXT: ;;#ASMEND 1121; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1122; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1123; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v1 1124; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 1125; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1126; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1127; GFX90A-NEXT: s_waitcnt vmcnt(0) 1128; GFX90A-NEXT: s_setpc_b64 s[30:31] 1129; 1130; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_7: 1131; GFX940: ; %bb.0: 1132; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1133; GFX940-NEXT: ;;#ASMSTART 1134; GFX940-NEXT: ; def v[0:1] 1135; GFX940-NEXT: ;;#ASMEND 1136; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1137; GFX940-NEXT: v_mov_b32_e32 v2, 0 1138; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 1139; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 1140; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1141; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1142; GFX940-NEXT: s_waitcnt vmcnt(0) 1143; GFX940-NEXT: s_setpc_b64 s[30:31] 1144 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1145 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1146 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 7> 1147 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1148 ret void 1149} 1150 1151define void @v_shuffle_v3i16_v4i16__u_0_0(ptr addrspace(1) inreg %ptr) { 1152; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_0_0: 1153; GFX900: ; %bb.0: 1154; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1155; GFX900-NEXT: v_mov_b32_e32 v2, 0 1156; GFX900-NEXT: ;;#ASMSTART 1157; GFX900-NEXT: ; def v[0:1] 1158; GFX900-NEXT: ;;#ASMEND 1159; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1160; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1161; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1162; GFX900-NEXT: s_waitcnt vmcnt(0) 1163; GFX900-NEXT: s_setpc_b64 s[30:31] 1164; 1165; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_0_0: 1166; GFX90A: ; %bb.0: 1167; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1168; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1169; GFX90A-NEXT: ;;#ASMSTART 1170; GFX90A-NEXT: ; def v[0:1] 1171; GFX90A-NEXT: ;;#ASMEND 1172; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1173; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1174; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1175; GFX90A-NEXT: s_waitcnt vmcnt(0) 1176; GFX90A-NEXT: s_setpc_b64 s[30:31] 1177; 1178; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_0_0: 1179; GFX940: ; %bb.0: 1180; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1181; GFX940-NEXT: v_mov_b32_e32 v2, 0 1182; GFX940-NEXT: ;;#ASMSTART 1183; GFX940-NEXT: ; def v[0:1] 1184; GFX940-NEXT: ;;#ASMEND 1185; GFX940-NEXT: s_nop 0 1186; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1187; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1188; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1189; GFX940-NEXT: s_waitcnt vmcnt(0) 1190; GFX940-NEXT: s_setpc_b64 s[30:31] 1191 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1192 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 0, i32 0> 1193 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1194 ret void 1195} 1196 1197define void @v_shuffle_v3i16_v4i16__0_0_0(ptr addrspace(1) inreg %ptr) { 1198; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_0_0: 1199; GFX900: ; %bb.0: 1200; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1201; GFX900-NEXT: v_mov_b32_e32 v2, 0 1202; GFX900-NEXT: ;;#ASMSTART 1203; GFX900-NEXT: ; def v[0:1] 1204; GFX900-NEXT: ;;#ASMEND 1205; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1206; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1207; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1208; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1209; GFX900-NEXT: s_waitcnt vmcnt(0) 1210; GFX900-NEXT: s_setpc_b64 s[30:31] 1211; 1212; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_0_0: 1213; GFX90A: ; %bb.0: 1214; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1215; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1216; GFX90A-NEXT: ;;#ASMSTART 1217; GFX90A-NEXT: ; def v[0:1] 1218; GFX90A-NEXT: ;;#ASMEND 1219; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1220; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1221; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1222; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1223; GFX90A-NEXT: s_waitcnt vmcnt(0) 1224; GFX90A-NEXT: s_setpc_b64 s[30:31] 1225; 1226; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_0_0: 1227; GFX940: ; %bb.0: 1228; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1229; GFX940-NEXT: v_mov_b32_e32 v2, 0 1230; GFX940-NEXT: ;;#ASMSTART 1231; GFX940-NEXT: ; def v[0:1] 1232; GFX940-NEXT: ;;#ASMEND 1233; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1234; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1235; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1236; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1237; GFX940-NEXT: s_waitcnt vmcnt(0) 1238; GFX940-NEXT: s_setpc_b64 s[30:31] 1239 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1240 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> zeroinitializer 1241 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1242 ret void 1243} 1244 1245define void @v_shuffle_v3i16_v4i16__1_0_0(ptr addrspace(1) inreg %ptr) { 1246; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_0_0: 1247; GFX900: ; %bb.0: 1248; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1249; GFX900-NEXT: v_mov_b32_e32 v2, 0 1250; GFX900-NEXT: ;;#ASMSTART 1251; GFX900-NEXT: ; def v[0:1] 1252; GFX900-NEXT: ;;#ASMEND 1253; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16 1254; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1255; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1256; GFX900-NEXT: s_waitcnt vmcnt(0) 1257; GFX900-NEXT: s_setpc_b64 s[30:31] 1258; 1259; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_0_0: 1260; GFX90A: ; %bb.0: 1261; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1262; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1263; GFX90A-NEXT: ;;#ASMSTART 1264; GFX90A-NEXT: ; def v[0:1] 1265; GFX90A-NEXT: ;;#ASMEND 1266; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16 1267; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1268; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1269; GFX90A-NEXT: s_waitcnt vmcnt(0) 1270; GFX90A-NEXT: s_setpc_b64 s[30:31] 1271; 1272; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_0_0: 1273; GFX940: ; %bb.0: 1274; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1275; GFX940-NEXT: v_mov_b32_e32 v2, 0 1276; GFX940-NEXT: ;;#ASMSTART 1277; GFX940-NEXT: ; def v[0:1] 1278; GFX940-NEXT: ;;#ASMEND 1279; GFX940-NEXT: s_nop 0 1280; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 1281; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1282; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1283; GFX940-NEXT: s_waitcnt vmcnt(0) 1284; GFX940-NEXT: s_setpc_b64 s[30:31] 1285 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1286 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 0, i32 0> 1287 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1288 ret void 1289} 1290 1291define void @v_shuffle_v3i16_v4i16__2_0_0(ptr addrspace(1) inreg %ptr) { 1292; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_0_0: 1293; GFX900: ; %bb.0: 1294; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1295; GFX900-NEXT: v_mov_b32_e32 v2, 0 1296; GFX900-NEXT: ;;#ASMSTART 1297; GFX900-NEXT: ; def v[0:1] 1298; GFX900-NEXT: ;;#ASMEND 1299; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1300; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 1301; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1302; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1303; GFX900-NEXT: s_waitcnt vmcnt(0) 1304; GFX900-NEXT: s_setpc_b64 s[30:31] 1305; 1306; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_0_0: 1307; GFX90A: ; %bb.0: 1308; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1309; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1310; GFX90A-NEXT: ;;#ASMSTART 1311; GFX90A-NEXT: ; def v[0:1] 1312; GFX90A-NEXT: ;;#ASMEND 1313; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1314; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 1315; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1316; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1317; GFX90A-NEXT: s_waitcnt vmcnt(0) 1318; GFX90A-NEXT: s_setpc_b64 s[30:31] 1319; 1320; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_0_0: 1321; GFX940: ; %bb.0: 1322; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1323; GFX940-NEXT: v_mov_b32_e32 v2, 0 1324; GFX940-NEXT: ;;#ASMSTART 1325; GFX940-NEXT: ; def v[0:1] 1326; GFX940-NEXT: ;;#ASMEND 1327; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1328; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 1329; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1330; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1331; GFX940-NEXT: s_waitcnt vmcnt(0) 1332; GFX940-NEXT: s_setpc_b64 s[30:31] 1333 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1334 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 0, i32 0> 1335 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1336 ret void 1337} 1338 1339define void @v_shuffle_v3i16_v4i16__3_0_0(ptr addrspace(1) inreg %ptr) { 1340; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_0_0: 1341; GFX900: ; %bb.0: 1342; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1343; GFX900-NEXT: v_mov_b32_e32 v2, 0 1344; GFX900-NEXT: ;;#ASMSTART 1345; GFX900-NEXT: ; def v[0:1] 1346; GFX900-NEXT: ;;#ASMEND 1347; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 1348; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1349; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1350; GFX900-NEXT: s_waitcnt vmcnt(0) 1351; GFX900-NEXT: s_setpc_b64 s[30:31] 1352; 1353; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_0_0: 1354; GFX90A: ; %bb.0: 1355; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1356; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1357; GFX90A-NEXT: ;;#ASMSTART 1358; GFX90A-NEXT: ; def v[0:1] 1359; GFX90A-NEXT: ;;#ASMEND 1360; GFX90A-NEXT: v_alignbit_b32 v1, v0, v1, 16 1361; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1362; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1363; GFX90A-NEXT: s_waitcnt vmcnt(0) 1364; GFX90A-NEXT: s_setpc_b64 s[30:31] 1365; 1366; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_0_0: 1367; GFX940: ; %bb.0: 1368; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1369; GFX940-NEXT: v_mov_b32_e32 v2, 0 1370; GFX940-NEXT: ;;#ASMSTART 1371; GFX940-NEXT: ; def v[0:1] 1372; GFX940-NEXT: ;;#ASMEND 1373; GFX940-NEXT: s_nop 0 1374; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 1375; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1376; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1377; GFX940-NEXT: s_waitcnt vmcnt(0) 1378; GFX940-NEXT: s_setpc_b64 s[30:31] 1379 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1380 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 0, i32 0> 1381 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1382 ret void 1383} 1384 1385define void @v_shuffle_v3i16_v4i16__4_0_0(ptr addrspace(1) inreg %ptr) { 1386; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_0_0: 1387; GFX900: ; %bb.0: 1388; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1389; GFX900-NEXT: v_mov_b32_e32 v2, 0 1390; GFX900-NEXT: ;;#ASMSTART 1391; GFX900-NEXT: ; def v[0:1] 1392; GFX900-NEXT: ;;#ASMEND 1393; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1394; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1395; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1396; GFX900-NEXT: s_waitcnt vmcnt(0) 1397; GFX900-NEXT: s_setpc_b64 s[30:31] 1398; 1399; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_0_0: 1400; GFX90A: ; %bb.0: 1401; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1402; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1403; GFX90A-NEXT: ;;#ASMSTART 1404; GFX90A-NEXT: ; def v[0:1] 1405; GFX90A-NEXT: ;;#ASMEND 1406; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1407; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1408; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1409; GFX90A-NEXT: s_waitcnt vmcnt(0) 1410; GFX90A-NEXT: s_setpc_b64 s[30:31] 1411; 1412; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_0_0: 1413; GFX940: ; %bb.0: 1414; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1415; GFX940-NEXT: v_mov_b32_e32 v2, 0 1416; GFX940-NEXT: ;;#ASMSTART 1417; GFX940-NEXT: ; def v[0:1] 1418; GFX940-NEXT: ;;#ASMEND 1419; GFX940-NEXT: s_nop 0 1420; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1421; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1422; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1423; GFX940-NEXT: s_waitcnt vmcnt(0) 1424; GFX940-NEXT: s_setpc_b64 s[30:31] 1425 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1426 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 0, i32 0> 1427 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1428 ret void 1429} 1430 1431define void @v_shuffle_v3i16_v4i16__5_0_0(ptr addrspace(1) inreg %ptr) { 1432; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_0_0: 1433; GFX900: ; %bb.0: 1434; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1435; GFX900-NEXT: ;;#ASMSTART 1436; GFX900-NEXT: ; def v[0:1] 1437; GFX900-NEXT: ;;#ASMEND 1438; GFX900-NEXT: v_mov_b32_e32 v3, 0 1439; GFX900-NEXT: ;;#ASMSTART 1440; GFX900-NEXT: ; def v[1:2] 1441; GFX900-NEXT: ;;#ASMEND 1442; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 1443; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1444; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1445; GFX900-NEXT: s_waitcnt vmcnt(0) 1446; GFX900-NEXT: s_setpc_b64 s[30:31] 1447; 1448; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_0_0: 1449; GFX90A: ; %bb.0: 1450; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1451; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1452; GFX90A-NEXT: ;;#ASMSTART 1453; GFX90A-NEXT: ; def v[0:1] 1454; GFX90A-NEXT: ;;#ASMEND 1455; GFX90A-NEXT: ;;#ASMSTART 1456; GFX90A-NEXT: ; def v[2:3] 1457; GFX90A-NEXT: ;;#ASMEND 1458; GFX90A-NEXT: v_alignbit_b32 v1, v0, v2, 16 1459; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1460; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1461; GFX90A-NEXT: s_waitcnt vmcnt(0) 1462; GFX90A-NEXT: s_setpc_b64 s[30:31] 1463; 1464; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_0_0: 1465; GFX940: ; %bb.0: 1466; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1467; GFX940-NEXT: v_mov_b32_e32 v4, 0 1468; GFX940-NEXT: ;;#ASMSTART 1469; GFX940-NEXT: ; def v[0:1] 1470; GFX940-NEXT: ;;#ASMEND 1471; GFX940-NEXT: ;;#ASMSTART 1472; GFX940-NEXT: ; def v[2:3] 1473; GFX940-NEXT: ;;#ASMEND 1474; GFX940-NEXT: s_nop 0 1475; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 1476; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1477; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1478; GFX940-NEXT: s_waitcnt vmcnt(0) 1479; GFX940-NEXT: s_setpc_b64 s[30:31] 1480 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1481 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1482 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 0, i32 0> 1483 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1484 ret void 1485} 1486 1487define void @v_shuffle_v3i16_v4i16__6_0_0(ptr addrspace(1) inreg %ptr) { 1488; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_0_0: 1489; GFX900: ; %bb.0: 1490; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1491; GFX900-NEXT: ;;#ASMSTART 1492; GFX900-NEXT: ; def v[0:1] 1493; GFX900-NEXT: ;;#ASMEND 1494; GFX900-NEXT: v_mov_b32_e32 v3, 0 1495; GFX900-NEXT: ;;#ASMSTART 1496; GFX900-NEXT: ; def v[1:2] 1497; GFX900-NEXT: ;;#ASMEND 1498; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1499; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 1500; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1501; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1502; GFX900-NEXT: s_waitcnt vmcnt(0) 1503; GFX900-NEXT: s_setpc_b64 s[30:31] 1504; 1505; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_0_0: 1506; GFX90A: ; %bb.0: 1507; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1508; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1509; GFX90A-NEXT: ;;#ASMSTART 1510; GFX90A-NEXT: ; def v[0:1] 1511; GFX90A-NEXT: ;;#ASMEND 1512; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1513; GFX90A-NEXT: ;;#ASMSTART 1514; GFX90A-NEXT: ; def v[2:3] 1515; GFX90A-NEXT: ;;#ASMEND 1516; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4 1517; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1518; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1519; GFX90A-NEXT: s_waitcnt vmcnt(0) 1520; GFX90A-NEXT: s_setpc_b64 s[30:31] 1521; 1522; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_0_0: 1523; GFX940: ; %bb.0: 1524; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1525; GFX940-NEXT: v_mov_b32_e32 v4, 0 1526; GFX940-NEXT: ;;#ASMSTART 1527; GFX940-NEXT: ; def v[0:1] 1528; GFX940-NEXT: ;;#ASMEND 1529; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1530; GFX940-NEXT: ;;#ASMSTART 1531; GFX940-NEXT: ; def v[2:3] 1532; GFX940-NEXT: ;;#ASMEND 1533; GFX940-NEXT: s_nop 0 1534; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 1535; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1536; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1537; GFX940-NEXT: s_waitcnt vmcnt(0) 1538; GFX940-NEXT: s_setpc_b64 s[30:31] 1539 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1540 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1541 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 0, i32 0> 1542 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1543 ret void 1544} 1545 1546define void @v_shuffle_v3i16_v4i16__7_0_0(ptr addrspace(1) inreg %ptr) { 1547; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_0: 1548; GFX900: ; %bb.0: 1549; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1550; GFX900-NEXT: ;;#ASMSTART 1551; GFX900-NEXT: ; def v[0:1] 1552; GFX900-NEXT: ;;#ASMEND 1553; GFX900-NEXT: v_mov_b32_e32 v3, 0 1554; GFX900-NEXT: ;;#ASMSTART 1555; GFX900-NEXT: ; def v[1:2] 1556; GFX900-NEXT: ;;#ASMEND 1557; GFX900-NEXT: v_alignbit_b32 v1, v0, v2, 16 1558; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1559; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1560; GFX900-NEXT: s_waitcnt vmcnt(0) 1561; GFX900-NEXT: s_setpc_b64 s[30:31] 1562; 1563; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_0: 1564; GFX90A: ; %bb.0: 1565; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1566; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1567; GFX90A-NEXT: ;;#ASMSTART 1568; GFX90A-NEXT: ; def v[0:1] 1569; GFX90A-NEXT: ;;#ASMEND 1570; GFX90A-NEXT: ;;#ASMSTART 1571; GFX90A-NEXT: ; def v[2:3] 1572; GFX90A-NEXT: ;;#ASMEND 1573; GFX90A-NEXT: v_alignbit_b32 v1, v0, v3, 16 1574; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1575; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1576; GFX90A-NEXT: s_waitcnt vmcnt(0) 1577; GFX90A-NEXT: s_setpc_b64 s[30:31] 1578; 1579; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_0: 1580; GFX940: ; %bb.0: 1581; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1582; GFX940-NEXT: v_mov_b32_e32 v4, 0 1583; GFX940-NEXT: ;;#ASMSTART 1584; GFX940-NEXT: ; def v[0:1] 1585; GFX940-NEXT: ;;#ASMEND 1586; GFX940-NEXT: ;;#ASMSTART 1587; GFX940-NEXT: ; def v[2:3] 1588; GFX940-NEXT: ;;#ASMEND 1589; GFX940-NEXT: s_nop 0 1590; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 1591; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1592; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1593; GFX940-NEXT: s_waitcnt vmcnt(0) 1594; GFX940-NEXT: s_setpc_b64 s[30:31] 1595 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1596 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1597 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 0> 1598 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1599 ret void 1600} 1601 1602define void @v_shuffle_v3i16_v4i16__7_u_0(ptr addrspace(1) inreg %ptr) { 1603; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_0: 1604; GFX900: ; %bb.0: 1605; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1606; GFX900-NEXT: ;;#ASMSTART 1607; GFX900-NEXT: ; def v[0:1] 1608; GFX900-NEXT: ;;#ASMEND 1609; GFX900-NEXT: v_mov_b32_e32 v3, 0 1610; GFX900-NEXT: ;;#ASMSTART 1611; GFX900-NEXT: ; def v[1:2] 1612; GFX900-NEXT: ;;#ASMEND 1613; GFX900-NEXT: v_alignbit_b32 v1, s4, v2, 16 1614; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1615; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1616; GFX900-NEXT: s_waitcnt vmcnt(0) 1617; GFX900-NEXT: s_setpc_b64 s[30:31] 1618; 1619; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_0: 1620; GFX90A: ; %bb.0: 1621; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1622; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1623; GFX90A-NEXT: ;;#ASMSTART 1624; GFX90A-NEXT: ; def v[0:1] 1625; GFX90A-NEXT: ;;#ASMEND 1626; GFX90A-NEXT: ;;#ASMSTART 1627; GFX90A-NEXT: ; def v[2:3] 1628; GFX90A-NEXT: ;;#ASMEND 1629; GFX90A-NEXT: v_alignbit_b32 v1, s4, v3, 16 1630; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1631; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1632; GFX90A-NEXT: s_waitcnt vmcnt(0) 1633; GFX90A-NEXT: s_setpc_b64 s[30:31] 1634; 1635; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_0: 1636; GFX940: ; %bb.0: 1637; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1638; GFX940-NEXT: v_mov_b32_e32 v4, 0 1639; GFX940-NEXT: ;;#ASMSTART 1640; GFX940-NEXT: ; def v[0:1] 1641; GFX940-NEXT: ;;#ASMEND 1642; GFX940-NEXT: ;;#ASMSTART 1643; GFX940-NEXT: ; def v[2:3] 1644; GFX940-NEXT: ;;#ASMEND 1645; GFX940-NEXT: s_nop 0 1646; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16 1647; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1648; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1649; GFX940-NEXT: s_waitcnt vmcnt(0) 1650; GFX940-NEXT: s_setpc_b64 s[30:31] 1651 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1652 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1653 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 0> 1654 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1655 ret void 1656} 1657 1658define void @v_shuffle_v3i16_v4i16__7_1_0(ptr addrspace(1) inreg %ptr) { 1659; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_0: 1660; GFX900: ; %bb.0: 1661; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1662; GFX900-NEXT: ;;#ASMSTART 1663; GFX900-NEXT: ; def v[0:1] 1664; GFX900-NEXT: ;;#ASMEND 1665; GFX900-NEXT: v_mov_b32_e32 v3, 0 1666; GFX900-NEXT: ;;#ASMSTART 1667; GFX900-NEXT: ; def v[1:2] 1668; GFX900-NEXT: ;;#ASMEND 1669; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1670; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 1671; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1672; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1673; GFX900-NEXT: s_waitcnt vmcnt(0) 1674; GFX900-NEXT: s_setpc_b64 s[30:31] 1675; 1676; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_0: 1677; GFX90A: ; %bb.0: 1678; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1679; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1680; GFX90A-NEXT: ;;#ASMSTART 1681; GFX90A-NEXT: ; def v[0:1] 1682; GFX90A-NEXT: ;;#ASMEND 1683; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1684; GFX90A-NEXT: ;;#ASMSTART 1685; GFX90A-NEXT: ; def v[2:3] 1686; GFX90A-NEXT: ;;#ASMEND 1687; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4 1688; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1689; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1690; GFX90A-NEXT: s_waitcnt vmcnt(0) 1691; GFX90A-NEXT: s_setpc_b64 s[30:31] 1692; 1693; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_0: 1694; GFX940: ; %bb.0: 1695; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1696; GFX940-NEXT: v_mov_b32_e32 v4, 0 1697; GFX940-NEXT: ;;#ASMSTART 1698; GFX940-NEXT: ; def v[0:1] 1699; GFX940-NEXT: ;;#ASMEND 1700; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1701; GFX940-NEXT: ;;#ASMSTART 1702; GFX940-NEXT: ; def v[2:3] 1703; GFX940-NEXT: ;;#ASMEND 1704; GFX940-NEXT: s_nop 0 1705; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 1706; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1707; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1708; GFX940-NEXT: s_waitcnt vmcnt(0) 1709; GFX940-NEXT: s_setpc_b64 s[30:31] 1710 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1711 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1712 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 0> 1713 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1714 ret void 1715} 1716 1717define void @v_shuffle_v3i16_v4i16__7_2_0(ptr addrspace(1) inreg %ptr) { 1718; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_0: 1719; GFX900: ; %bb.0: 1720; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1721; GFX900-NEXT: v_mov_b32_e32 v4, 0 1722; GFX900-NEXT: ;;#ASMSTART 1723; GFX900-NEXT: ; def v[0:1] 1724; GFX900-NEXT: ;;#ASMEND 1725; GFX900-NEXT: ;;#ASMSTART 1726; GFX900-NEXT: ; def v[2:3] 1727; GFX900-NEXT: ;;#ASMEND 1728; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 1729; GFX900-NEXT: global_store_short v4, v0, s[16:17] offset:4 1730; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 1731; GFX900-NEXT: s_waitcnt vmcnt(0) 1732; GFX900-NEXT: s_setpc_b64 s[30:31] 1733; 1734; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_0: 1735; GFX90A: ; %bb.0: 1736; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1737; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1738; GFX90A-NEXT: ;;#ASMSTART 1739; GFX90A-NEXT: ; def v[0:1] 1740; GFX90A-NEXT: ;;#ASMEND 1741; GFX90A-NEXT: ;;#ASMSTART 1742; GFX90A-NEXT: ; def v[2:3] 1743; GFX90A-NEXT: ;;#ASMEND 1744; GFX90A-NEXT: v_alignbit_b32 v1, v1, v3, 16 1745; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1746; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1747; GFX90A-NEXT: s_waitcnt vmcnt(0) 1748; GFX90A-NEXT: s_setpc_b64 s[30:31] 1749; 1750; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_0: 1751; GFX940: ; %bb.0: 1752; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1753; GFX940-NEXT: v_mov_b32_e32 v4, 0 1754; GFX940-NEXT: ;;#ASMSTART 1755; GFX940-NEXT: ; def v[0:1] 1756; GFX940-NEXT: ;;#ASMEND 1757; GFX940-NEXT: ;;#ASMSTART 1758; GFX940-NEXT: ; def v[2:3] 1759; GFX940-NEXT: ;;#ASMEND 1760; GFX940-NEXT: s_nop 0 1761; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 1762; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1763; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1764; GFX940-NEXT: s_waitcnt vmcnt(0) 1765; GFX940-NEXT: s_setpc_b64 s[30:31] 1766 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1767 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1768 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 0> 1769 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1770 ret void 1771} 1772 1773define void @v_shuffle_v3i16_v4i16__7_3_0(ptr addrspace(1) inreg %ptr) { 1774; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_0: 1775; GFX900: ; %bb.0: 1776; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1777; GFX900-NEXT: v_mov_b32_e32 v4, 0 1778; GFX900-NEXT: ;;#ASMSTART 1779; GFX900-NEXT: ; def v[0:1] 1780; GFX900-NEXT: ;;#ASMEND 1781; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1782; GFX900-NEXT: ;;#ASMSTART 1783; GFX900-NEXT: ; def v[2:3] 1784; GFX900-NEXT: ;;#ASMEND 1785; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 1786; GFX900-NEXT: global_store_short v4, v0, s[16:17] offset:4 1787; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 1788; GFX900-NEXT: s_waitcnt vmcnt(0) 1789; GFX900-NEXT: s_setpc_b64 s[30:31] 1790; 1791; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_0: 1792; GFX90A: ; %bb.0: 1793; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1794; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1795; GFX90A-NEXT: ;;#ASMSTART 1796; GFX90A-NEXT: ; def v[0:1] 1797; GFX90A-NEXT: ;;#ASMEND 1798; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1799; GFX90A-NEXT: ;;#ASMSTART 1800; GFX90A-NEXT: ; def v[2:3] 1801; GFX90A-NEXT: ;;#ASMEND 1802; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4 1803; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1804; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1805; GFX90A-NEXT: s_waitcnt vmcnt(0) 1806; GFX90A-NEXT: s_setpc_b64 s[30:31] 1807; 1808; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_0: 1809; GFX940: ; %bb.0: 1810; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1811; GFX940-NEXT: v_mov_b32_e32 v4, 0 1812; GFX940-NEXT: ;;#ASMSTART 1813; GFX940-NEXT: ; def v[0:1] 1814; GFX940-NEXT: ;;#ASMEND 1815; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1816; GFX940-NEXT: ;;#ASMSTART 1817; GFX940-NEXT: ; def v[2:3] 1818; GFX940-NEXT: ;;#ASMEND 1819; GFX940-NEXT: s_nop 0 1820; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 1821; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1822; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1823; GFX940-NEXT: s_waitcnt vmcnt(0) 1824; GFX940-NEXT: s_setpc_b64 s[30:31] 1825 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1826 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1827 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 0> 1828 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1829 ret void 1830} 1831 1832define void @v_shuffle_v3i16_v4i16__7_4_0(ptr addrspace(1) inreg %ptr) { 1833; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_0: 1834; GFX900: ; %bb.0: 1835; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1836; GFX900-NEXT: ;;#ASMSTART 1837; GFX900-NEXT: ; def v[0:1] 1838; GFX900-NEXT: ;;#ASMEND 1839; GFX900-NEXT: v_mov_b32_e32 v3, 0 1840; GFX900-NEXT: ;;#ASMSTART 1841; GFX900-NEXT: ; def v[1:2] 1842; GFX900-NEXT: ;;#ASMEND 1843; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16 1844; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1845; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1846; GFX900-NEXT: s_waitcnt vmcnt(0) 1847; GFX900-NEXT: s_setpc_b64 s[30:31] 1848; 1849; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_0: 1850; GFX90A: ; %bb.0: 1851; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1852; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1853; GFX90A-NEXT: ;;#ASMSTART 1854; GFX90A-NEXT: ; def v[0:1] 1855; GFX90A-NEXT: ;;#ASMEND 1856; GFX90A-NEXT: ;;#ASMSTART 1857; GFX90A-NEXT: ; def v[2:3] 1858; GFX90A-NEXT: ;;#ASMEND 1859; GFX90A-NEXT: v_alignbit_b32 v1, v2, v3, 16 1860; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1861; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1862; GFX90A-NEXT: s_waitcnt vmcnt(0) 1863; GFX90A-NEXT: s_setpc_b64 s[30:31] 1864; 1865; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_0: 1866; GFX940: ; %bb.0: 1867; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1868; GFX940-NEXT: v_mov_b32_e32 v4, 0 1869; GFX940-NEXT: ;;#ASMSTART 1870; GFX940-NEXT: ; def v[0:1] 1871; GFX940-NEXT: ;;#ASMEND 1872; GFX940-NEXT: ;;#ASMSTART 1873; GFX940-NEXT: ; def v[2:3] 1874; GFX940-NEXT: ;;#ASMEND 1875; GFX940-NEXT: s_nop 0 1876; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16 1877; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1878; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1879; GFX940-NEXT: s_waitcnt vmcnt(0) 1880; GFX940-NEXT: s_setpc_b64 s[30:31] 1881 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1882 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1883 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 0> 1884 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1885 ret void 1886} 1887 1888define void @v_shuffle_v3i16_v4i16__7_5_0(ptr addrspace(1) inreg %ptr) { 1889; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_0: 1890; GFX900: ; %bb.0: 1891; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1892; GFX900-NEXT: ;;#ASMSTART 1893; GFX900-NEXT: ; def v[0:1] 1894; GFX900-NEXT: ;;#ASMEND 1895; GFX900-NEXT: v_mov_b32_e32 v3, 0 1896; GFX900-NEXT: ;;#ASMSTART 1897; GFX900-NEXT: ; def v[1:2] 1898; GFX900-NEXT: ;;#ASMEND 1899; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1900; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 1901; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1902; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1903; GFX900-NEXT: s_waitcnt vmcnt(0) 1904; GFX900-NEXT: s_setpc_b64 s[30:31] 1905; 1906; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_0: 1907; GFX90A: ; %bb.0: 1908; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1909; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1910; GFX90A-NEXT: ;;#ASMSTART 1911; GFX90A-NEXT: ; def v[0:1] 1912; GFX90A-NEXT: ;;#ASMEND 1913; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1914; GFX90A-NEXT: ;;#ASMSTART 1915; GFX90A-NEXT: ; def v[2:3] 1916; GFX90A-NEXT: ;;#ASMEND 1917; GFX90A-NEXT: v_perm_b32 v1, v2, v3, s4 1918; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1919; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1920; GFX90A-NEXT: s_waitcnt vmcnt(0) 1921; GFX90A-NEXT: s_setpc_b64 s[30:31] 1922; 1923; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_0: 1924; GFX940: ; %bb.0: 1925; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1926; GFX940-NEXT: v_mov_b32_e32 v4, 0 1927; GFX940-NEXT: ;;#ASMSTART 1928; GFX940-NEXT: ; def v[0:1] 1929; GFX940-NEXT: ;;#ASMEND 1930; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1931; GFX940-NEXT: ;;#ASMSTART 1932; GFX940-NEXT: ; def v[2:3] 1933; GFX940-NEXT: ;;#ASMEND 1934; GFX940-NEXT: s_nop 0 1935; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 1936; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1937; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1938; GFX940-NEXT: s_waitcnt vmcnt(0) 1939; GFX940-NEXT: s_setpc_b64 s[30:31] 1940 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1941 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1942 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 0> 1943 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1944 ret void 1945} 1946 1947define void @v_shuffle_v3i16_v4i16__7_6_0(ptr addrspace(1) inreg %ptr) { 1948; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_0: 1949; GFX900: ; %bb.0: 1950; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1951; GFX900-NEXT: ;;#ASMSTART 1952; GFX900-NEXT: ; def v[0:1] 1953; GFX900-NEXT: ;;#ASMEND 1954; GFX900-NEXT: v_mov_b32_e32 v3, 0 1955; GFX900-NEXT: ;;#ASMSTART 1956; GFX900-NEXT: ; def v[1:2] 1957; GFX900-NEXT: ;;#ASMEND 1958; GFX900-NEXT: v_alignbit_b32 v1, v2, v2, 16 1959; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1960; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1961; GFX900-NEXT: s_waitcnt vmcnt(0) 1962; GFX900-NEXT: s_setpc_b64 s[30:31] 1963; 1964; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_0: 1965; GFX90A: ; %bb.0: 1966; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1967; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1968; GFX90A-NEXT: ;;#ASMSTART 1969; GFX90A-NEXT: ; def v[0:1] 1970; GFX90A-NEXT: ;;#ASMEND 1971; GFX90A-NEXT: ;;#ASMSTART 1972; GFX90A-NEXT: ; def v[2:3] 1973; GFX90A-NEXT: ;;#ASMEND 1974; GFX90A-NEXT: v_alignbit_b32 v1, v3, v3, 16 1975; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1976; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1977; GFX90A-NEXT: s_waitcnt vmcnt(0) 1978; GFX90A-NEXT: s_setpc_b64 s[30:31] 1979; 1980; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_0: 1981; GFX940: ; %bb.0: 1982; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1983; GFX940-NEXT: v_mov_b32_e32 v4, 0 1984; GFX940-NEXT: ;;#ASMSTART 1985; GFX940-NEXT: ; def v[0:1] 1986; GFX940-NEXT: ;;#ASMEND 1987; GFX940-NEXT: ;;#ASMSTART 1988; GFX940-NEXT: ; def v[2:3] 1989; GFX940-NEXT: ;;#ASMEND 1990; GFX940-NEXT: s_nop 0 1991; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16 1992; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1993; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1994; GFX940-NEXT: s_waitcnt vmcnt(0) 1995; GFX940-NEXT: s_setpc_b64 s[30:31] 1996 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1997 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1998 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 0> 1999 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2000 ret void 2001} 2002 2003define void @v_shuffle_v3i16_v4i16__u_1_1(ptr addrspace(1) inreg %ptr) { 2004; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_1_1: 2005; GFX900: ; %bb.0: 2006; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2007; GFX900-NEXT: v_mov_b32_e32 v2, 0 2008; GFX900-NEXT: ;;#ASMSTART 2009; GFX900-NEXT: ; def v[0:1] 2010; GFX900-NEXT: ;;#ASMEND 2011; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 2012; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2013; GFX900-NEXT: s_waitcnt vmcnt(0) 2014; GFX900-NEXT: s_setpc_b64 s[30:31] 2015; 2016; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_1_1: 2017; GFX90A: ; %bb.0: 2018; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2019; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2020; GFX90A-NEXT: ;;#ASMSTART 2021; GFX90A-NEXT: ; def v[0:1] 2022; GFX90A-NEXT: ;;#ASMEND 2023; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 2024; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2025; GFX90A-NEXT: s_waitcnt vmcnt(0) 2026; GFX90A-NEXT: s_setpc_b64 s[30:31] 2027; 2028; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_1_1: 2029; GFX940: ; %bb.0: 2030; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2031; GFX940-NEXT: v_mov_b32_e32 v2, 0 2032; GFX940-NEXT: ;;#ASMSTART 2033; GFX940-NEXT: ; def v[0:1] 2034; GFX940-NEXT: ;;#ASMEND 2035; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 2036; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2037; GFX940-NEXT: s_waitcnt vmcnt(0) 2038; GFX940-NEXT: s_setpc_b64 s[30:31] 2039 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2040 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 1, i32 1> 2041 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2042 ret void 2043} 2044 2045define void @v_shuffle_v3i16_v4i16__0_1_1(ptr addrspace(1) inreg %ptr) { 2046; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_1_1: 2047; GFX900: ; %bb.0: 2048; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2049; GFX900-NEXT: v_mov_b32_e32 v2, 0 2050; GFX900-NEXT: ;;#ASMSTART 2051; GFX900-NEXT: ; def v[0:1] 2052; GFX900-NEXT: ;;#ASMEND 2053; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 2054; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2055; GFX900-NEXT: s_waitcnt vmcnt(0) 2056; GFX900-NEXT: s_setpc_b64 s[30:31] 2057; 2058; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_1_1: 2059; GFX90A: ; %bb.0: 2060; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2061; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2062; GFX90A-NEXT: ;;#ASMSTART 2063; GFX90A-NEXT: ; def v[0:1] 2064; GFX90A-NEXT: ;;#ASMEND 2065; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 2066; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2067; GFX90A-NEXT: s_waitcnt vmcnt(0) 2068; GFX90A-NEXT: s_setpc_b64 s[30:31] 2069; 2070; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_1_1: 2071; GFX940: ; %bb.0: 2072; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2073; GFX940-NEXT: v_mov_b32_e32 v2, 0 2074; GFX940-NEXT: ;;#ASMSTART 2075; GFX940-NEXT: ; def v[0:1] 2076; GFX940-NEXT: ;;#ASMEND 2077; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 2078; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2079; GFX940-NEXT: s_waitcnt vmcnt(0) 2080; GFX940-NEXT: s_setpc_b64 s[30:31] 2081 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2082 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 1> 2083 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2084 ret void 2085} 2086 2087define void @v_shuffle_v3i16_v4i16__1_1_1(ptr addrspace(1) inreg %ptr) { 2088; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_1_1: 2089; GFX900: ; %bb.0: 2090; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2091; GFX900-NEXT: ;;#ASMSTART 2092; GFX900-NEXT: ; def v[0:1] 2093; GFX900-NEXT: ;;#ASMEND 2094; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2095; GFX900-NEXT: v_mov_b32_e32 v2, 0 2096; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2097; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2098; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 2099; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 2100; GFX900-NEXT: s_waitcnt vmcnt(0) 2101; GFX900-NEXT: s_setpc_b64 s[30:31] 2102; 2103; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_1_1: 2104; GFX90A: ; %bb.0: 2105; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2106; GFX90A-NEXT: ;;#ASMSTART 2107; GFX90A-NEXT: ; def v[0:1] 2108; GFX90A-NEXT: ;;#ASMEND 2109; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2110; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2111; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2112; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2113; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 2114; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 2115; GFX90A-NEXT: s_waitcnt vmcnt(0) 2116; GFX90A-NEXT: s_setpc_b64 s[30:31] 2117; 2118; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_1_1: 2119; GFX940: ; %bb.0: 2120; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2121; GFX940-NEXT: ;;#ASMSTART 2122; GFX940-NEXT: ; def v[0:1] 2123; GFX940-NEXT: ;;#ASMEND 2124; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2125; GFX940-NEXT: v_mov_b32_e32 v2, 0 2126; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2127; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2128; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 2129; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 2130; GFX940-NEXT: s_waitcnt vmcnt(0) 2131; GFX940-NEXT: s_setpc_b64 s[30:31] 2132 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2133 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 1, i32 1> 2134 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2135 ret void 2136} 2137 2138define void @v_shuffle_v3i16_v4i16__2_1_1(ptr addrspace(1) inreg %ptr) { 2139; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_1_1: 2140; GFX900: ; %bb.0: 2141; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2142; GFX900-NEXT: ;;#ASMSTART 2143; GFX900-NEXT: ; def v[0:1] 2144; GFX900-NEXT: ;;#ASMEND 2145; GFX900-NEXT: s_mov_b32 s4, 0xffff 2146; GFX900-NEXT: v_mov_b32_e32 v2, 0 2147; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 2148; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2149; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 2150; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 2151; GFX900-NEXT: s_waitcnt vmcnt(0) 2152; GFX900-NEXT: s_setpc_b64 s[30:31] 2153; 2154; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_1_1: 2155; GFX90A: ; %bb.0: 2156; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2157; GFX90A-NEXT: ;;#ASMSTART 2158; GFX90A-NEXT: ; def v[0:1] 2159; GFX90A-NEXT: ;;#ASMEND 2160; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2161; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2162; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0 2163; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2164; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 2165; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 2166; GFX90A-NEXT: s_waitcnt vmcnt(0) 2167; GFX90A-NEXT: s_setpc_b64 s[30:31] 2168; 2169; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_1_1: 2170; GFX940: ; %bb.0: 2171; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2172; GFX940-NEXT: ;;#ASMSTART 2173; GFX940-NEXT: ; def v[0:1] 2174; GFX940-NEXT: ;;#ASMEND 2175; GFX940-NEXT: s_mov_b32 s2, 0xffff 2176; GFX940-NEXT: v_mov_b32_e32 v2, 0 2177; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 2178; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2179; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 2180; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 2181; GFX940-NEXT: s_waitcnt vmcnt(0) 2182; GFX940-NEXT: s_setpc_b64 s[30:31] 2183 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2184 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 1, i32 1> 2185 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2186 ret void 2187} 2188 2189define void @v_shuffle_v3i16_v4i16__3_1_1(ptr addrspace(1) inreg %ptr) { 2190; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_1_1: 2191; GFX900: ; %bb.0: 2192; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2193; GFX900-NEXT: ;;#ASMSTART 2194; GFX900-NEXT: ; def v[0:1] 2195; GFX900-NEXT: ;;#ASMEND 2196; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2197; GFX900-NEXT: v_mov_b32_e32 v2, 0 2198; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 2199; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2200; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 2201; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 2202; GFX900-NEXT: s_waitcnt vmcnt(0) 2203; GFX900-NEXT: s_setpc_b64 s[30:31] 2204; 2205; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_1_1: 2206; GFX90A: ; %bb.0: 2207; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2208; GFX90A-NEXT: ;;#ASMSTART 2209; GFX90A-NEXT: ; def v[0:1] 2210; GFX90A-NEXT: ;;#ASMEND 2211; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2212; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2213; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 2214; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2215; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 2216; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 2217; GFX90A-NEXT: s_waitcnt vmcnt(0) 2218; GFX90A-NEXT: s_setpc_b64 s[30:31] 2219; 2220; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_1_1: 2221; GFX940: ; %bb.0: 2222; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2223; GFX940-NEXT: ;;#ASMSTART 2224; GFX940-NEXT: ; def v[0:1] 2225; GFX940-NEXT: ;;#ASMEND 2226; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2227; GFX940-NEXT: v_mov_b32_e32 v2, 0 2228; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 2229; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2230; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 2231; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 2232; GFX940-NEXT: s_waitcnt vmcnt(0) 2233; GFX940-NEXT: s_setpc_b64 s[30:31] 2234 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2235 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 1, i32 1> 2236 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2237 ret void 2238} 2239 2240define void @v_shuffle_v3i16_v4i16__4_1_1(ptr addrspace(1) inreg %ptr) { 2241; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_1_1: 2242; GFX900: ; %bb.0: 2243; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2244; GFX900-NEXT: v_mov_b32_e32 v2, 0 2245; GFX900-NEXT: ;;#ASMSTART 2246; GFX900-NEXT: ; def v[0:1] 2247; GFX900-NEXT: ;;#ASMEND 2248; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 2249; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2250; GFX900-NEXT: s_waitcnt vmcnt(0) 2251; GFX900-NEXT: s_setpc_b64 s[30:31] 2252; 2253; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_1_1: 2254; GFX90A: ; %bb.0: 2255; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2256; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2257; GFX90A-NEXT: ;;#ASMSTART 2258; GFX90A-NEXT: ; def v[0:1] 2259; GFX90A-NEXT: ;;#ASMEND 2260; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 2261; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2262; GFX90A-NEXT: s_waitcnt vmcnt(0) 2263; GFX90A-NEXT: s_setpc_b64 s[30:31] 2264; 2265; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_1_1: 2266; GFX940: ; %bb.0: 2267; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2268; GFX940-NEXT: v_mov_b32_e32 v2, 0 2269; GFX940-NEXT: ;;#ASMSTART 2270; GFX940-NEXT: ; def v[0:1] 2271; GFX940-NEXT: ;;#ASMEND 2272; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 2273; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2274; GFX940-NEXT: s_waitcnt vmcnt(0) 2275; GFX940-NEXT: s_setpc_b64 s[30:31] 2276 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2277 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 1, i32 1> 2278 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2279 ret void 2280} 2281 2282define void @v_shuffle_v3i16_v4i16__5_1_1(ptr addrspace(1) inreg %ptr) { 2283; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_1_1: 2284; GFX900: ; %bb.0: 2285; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2286; GFX900-NEXT: ;;#ASMSTART 2287; GFX900-NEXT: ; def v[0:1] 2288; GFX900-NEXT: ;;#ASMEND 2289; GFX900-NEXT: ;;#ASMSTART 2290; GFX900-NEXT: ; def v[1:2] 2291; GFX900-NEXT: ;;#ASMEND 2292; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2293; GFX900-NEXT: v_mov_b32_e32 v3, 0 2294; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 2295; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2296; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2297; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 2298; GFX900-NEXT: s_waitcnt vmcnt(0) 2299; GFX900-NEXT: s_setpc_b64 s[30:31] 2300; 2301; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_1_1: 2302; GFX90A: ; %bb.0: 2303; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2304; GFX90A-NEXT: ;;#ASMSTART 2305; GFX90A-NEXT: ; def v[0:1] 2306; GFX90A-NEXT: ;;#ASMEND 2307; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2308; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2309; GFX90A-NEXT: ;;#ASMSTART 2310; GFX90A-NEXT: ; def v[2:3] 2311; GFX90A-NEXT: ;;#ASMEND 2312; GFX90A-NEXT: v_perm_b32 v1, v0, v2, s4 2313; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2314; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2315; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 2316; GFX90A-NEXT: s_waitcnt vmcnt(0) 2317; GFX90A-NEXT: s_setpc_b64 s[30:31] 2318; 2319; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_1_1: 2320; GFX940: ; %bb.0: 2321; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2322; GFX940-NEXT: ;;#ASMSTART 2323; GFX940-NEXT: ; def v[0:1] 2324; GFX940-NEXT: ;;#ASMEND 2325; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2326; GFX940-NEXT: v_mov_b32_e32 v4, 0 2327; GFX940-NEXT: ;;#ASMSTART 2328; GFX940-NEXT: ; def v[2:3] 2329; GFX940-NEXT: ;;#ASMEND 2330; GFX940-NEXT: s_nop 0 2331; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 2332; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2333; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2334; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 2335; GFX940-NEXT: s_waitcnt vmcnt(0) 2336; GFX940-NEXT: s_setpc_b64 s[30:31] 2337 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2338 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2339 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 1, i32 1> 2340 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2341 ret void 2342} 2343 2344define void @v_shuffle_v3i16_v4i16__6_1_1(ptr addrspace(1) inreg %ptr) { 2345; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_1_1: 2346; GFX900: ; %bb.0: 2347; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2348; GFX900-NEXT: ;;#ASMSTART 2349; GFX900-NEXT: ; def v[0:1] 2350; GFX900-NEXT: ;;#ASMEND 2351; GFX900-NEXT: ;;#ASMSTART 2352; GFX900-NEXT: ; def v[1:2] 2353; GFX900-NEXT: ;;#ASMEND 2354; GFX900-NEXT: s_mov_b32 s4, 0xffff 2355; GFX900-NEXT: v_mov_b32_e32 v3, 0 2356; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0 2357; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2358; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 2359; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2360; GFX900-NEXT: s_waitcnt vmcnt(0) 2361; GFX900-NEXT: s_setpc_b64 s[30:31] 2362; 2363; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_1_1: 2364; GFX90A: ; %bb.0: 2365; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2366; GFX90A-NEXT: ;;#ASMSTART 2367; GFX90A-NEXT: ; def v[0:1] 2368; GFX90A-NEXT: ;;#ASMEND 2369; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2370; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2371; GFX90A-NEXT: ;;#ASMSTART 2372; GFX90A-NEXT: ; def v[2:3] 2373; GFX90A-NEXT: ;;#ASMEND 2374; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v0 2375; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2376; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 2377; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2378; GFX90A-NEXT: s_waitcnt vmcnt(0) 2379; GFX90A-NEXT: s_setpc_b64 s[30:31] 2380; 2381; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_1_1: 2382; GFX940: ; %bb.0: 2383; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2384; GFX940-NEXT: ;;#ASMSTART 2385; GFX940-NEXT: ; def v[0:1] 2386; GFX940-NEXT: ;;#ASMEND 2387; GFX940-NEXT: s_mov_b32 s2, 0xffff 2388; GFX940-NEXT: v_mov_b32_e32 v4, 0 2389; GFX940-NEXT: ;;#ASMSTART 2390; GFX940-NEXT: ; def v[2:3] 2391; GFX940-NEXT: ;;#ASMEND 2392; GFX940-NEXT: s_nop 0 2393; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 2394; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2395; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 2396; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2397; GFX940-NEXT: s_waitcnt vmcnt(0) 2398; GFX940-NEXT: s_setpc_b64 s[30:31] 2399 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2400 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2401 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 1, i32 1> 2402 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2403 ret void 2404} 2405 2406define void @v_shuffle_v3i16_v4i16__7_1_1(ptr addrspace(1) inreg %ptr) { 2407; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_1: 2408; GFX900: ; %bb.0: 2409; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2410; GFX900-NEXT: ;;#ASMSTART 2411; GFX900-NEXT: ; def v[0:1] 2412; GFX900-NEXT: ;;#ASMEND 2413; GFX900-NEXT: ;;#ASMSTART 2414; GFX900-NEXT: ; def v[1:2] 2415; GFX900-NEXT: ;;#ASMEND 2416; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2417; GFX900-NEXT: v_mov_b32_e32 v3, 0 2418; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 2419; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2420; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2421; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 2422; GFX900-NEXT: s_waitcnt vmcnt(0) 2423; GFX900-NEXT: s_setpc_b64 s[30:31] 2424; 2425; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_1: 2426; GFX90A: ; %bb.0: 2427; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2428; GFX90A-NEXT: ;;#ASMSTART 2429; GFX90A-NEXT: ; def v[0:1] 2430; GFX90A-NEXT: ;;#ASMEND 2431; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2432; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2433; GFX90A-NEXT: ;;#ASMSTART 2434; GFX90A-NEXT: ; def v[2:3] 2435; GFX90A-NEXT: ;;#ASMEND 2436; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4 2437; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2438; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2439; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 2440; GFX90A-NEXT: s_waitcnt vmcnt(0) 2441; GFX90A-NEXT: s_setpc_b64 s[30:31] 2442; 2443; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_1: 2444; GFX940: ; %bb.0: 2445; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2446; GFX940-NEXT: ;;#ASMSTART 2447; GFX940-NEXT: ; def v[0:1] 2448; GFX940-NEXT: ;;#ASMEND 2449; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2450; GFX940-NEXT: v_mov_b32_e32 v4, 0 2451; GFX940-NEXT: ;;#ASMSTART 2452; GFX940-NEXT: ; def v[2:3] 2453; GFX940-NEXT: ;;#ASMEND 2454; GFX940-NEXT: s_nop 0 2455; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 2456; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2457; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2458; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 2459; GFX940-NEXT: s_waitcnt vmcnt(0) 2460; GFX940-NEXT: s_setpc_b64 s[30:31] 2461 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2462 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2463 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 1> 2464 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2465 ret void 2466} 2467 2468define void @v_shuffle_v3i16_v4i16__7_u_1(ptr addrspace(1) inreg %ptr) { 2469; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_1: 2470; GFX900: ; %bb.0: 2471; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2472; GFX900-NEXT: ;;#ASMSTART 2473; GFX900-NEXT: ; def v[0:1] 2474; GFX900-NEXT: ;;#ASMEND 2475; GFX900-NEXT: v_mov_b32_e32 v3, 0 2476; GFX900-NEXT: ;;#ASMSTART 2477; GFX900-NEXT: ; def v[1:2] 2478; GFX900-NEXT: ;;#ASMEND 2479; GFX900-NEXT: v_alignbit_b32 v1, s4, v2, 16 2480; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2481; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2482; GFX900-NEXT: s_waitcnt vmcnt(0) 2483; GFX900-NEXT: s_setpc_b64 s[30:31] 2484; 2485; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_1: 2486; GFX90A: ; %bb.0: 2487; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2488; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2489; GFX90A-NEXT: ;;#ASMSTART 2490; GFX90A-NEXT: ; def v[0:1] 2491; GFX90A-NEXT: ;;#ASMEND 2492; GFX90A-NEXT: ;;#ASMSTART 2493; GFX90A-NEXT: ; def v[2:3] 2494; GFX90A-NEXT: ;;#ASMEND 2495; GFX90A-NEXT: v_alignbit_b32 v1, s4, v3, 16 2496; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2497; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2498; GFX90A-NEXT: s_waitcnt vmcnt(0) 2499; GFX90A-NEXT: s_setpc_b64 s[30:31] 2500; 2501; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_1: 2502; GFX940: ; %bb.0: 2503; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2504; GFX940-NEXT: v_mov_b32_e32 v4, 0 2505; GFX940-NEXT: ;;#ASMSTART 2506; GFX940-NEXT: ; def v[0:1] 2507; GFX940-NEXT: ;;#ASMEND 2508; GFX940-NEXT: ;;#ASMSTART 2509; GFX940-NEXT: ; def v[2:3] 2510; GFX940-NEXT: ;;#ASMEND 2511; GFX940-NEXT: s_nop 0 2512; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16 2513; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2514; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2515; GFX940-NEXT: s_waitcnt vmcnt(0) 2516; GFX940-NEXT: s_setpc_b64 s[30:31] 2517 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2518 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2519 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 1> 2520 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2521 ret void 2522} 2523 2524define void @v_shuffle_v3i16_v4i16__7_0_1(ptr addrspace(1) inreg %ptr) { 2525; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_1: 2526; GFX900: ; %bb.0: 2527; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2528; GFX900-NEXT: ;;#ASMSTART 2529; GFX900-NEXT: ; def v[0:1] 2530; GFX900-NEXT: ;;#ASMEND 2531; GFX900-NEXT: v_mov_b32_e32 v3, 0 2532; GFX900-NEXT: ;;#ASMSTART 2533; GFX900-NEXT: ; def v[1:2] 2534; GFX900-NEXT: ;;#ASMEND 2535; GFX900-NEXT: v_alignbit_b32 v1, v0, v2, 16 2536; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2537; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2538; GFX900-NEXT: s_waitcnt vmcnt(0) 2539; GFX900-NEXT: s_setpc_b64 s[30:31] 2540; 2541; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_1: 2542; GFX90A: ; %bb.0: 2543; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2544; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2545; GFX90A-NEXT: ;;#ASMSTART 2546; GFX90A-NEXT: ; def v[0:1] 2547; GFX90A-NEXT: ;;#ASMEND 2548; GFX90A-NEXT: ;;#ASMSTART 2549; GFX90A-NEXT: ; def v[2:3] 2550; GFX90A-NEXT: ;;#ASMEND 2551; GFX90A-NEXT: v_alignbit_b32 v1, v0, v3, 16 2552; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2553; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2554; GFX90A-NEXT: s_waitcnt vmcnt(0) 2555; GFX90A-NEXT: s_setpc_b64 s[30:31] 2556; 2557; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_1: 2558; GFX940: ; %bb.0: 2559; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2560; GFX940-NEXT: v_mov_b32_e32 v4, 0 2561; GFX940-NEXT: ;;#ASMSTART 2562; GFX940-NEXT: ; def v[0:1] 2563; GFX940-NEXT: ;;#ASMEND 2564; GFX940-NEXT: ;;#ASMSTART 2565; GFX940-NEXT: ; def v[2:3] 2566; GFX940-NEXT: ;;#ASMEND 2567; GFX940-NEXT: s_nop 0 2568; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 2569; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2570; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2571; GFX940-NEXT: s_waitcnt vmcnt(0) 2572; GFX940-NEXT: s_setpc_b64 s[30:31] 2573 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2574 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2575 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 1> 2576 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2577 ret void 2578} 2579 2580define void @v_shuffle_v3i16_v4i16__7_2_1(ptr addrspace(1) inreg %ptr) { 2581; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_1: 2582; GFX900: ; %bb.0: 2583; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2584; GFX900-NEXT: v_mov_b32_e32 v4, 0 2585; GFX900-NEXT: ;;#ASMSTART 2586; GFX900-NEXT: ; def v[0:1] 2587; GFX900-NEXT: ;;#ASMEND 2588; GFX900-NEXT: ;;#ASMSTART 2589; GFX900-NEXT: ; def v[2:3] 2590; GFX900-NEXT: ;;#ASMEND 2591; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 2592; GFX900-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2593; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 2594; GFX900-NEXT: s_waitcnt vmcnt(0) 2595; GFX900-NEXT: s_setpc_b64 s[30:31] 2596; 2597; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_1: 2598; GFX90A: ; %bb.0: 2599; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2600; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2601; GFX90A-NEXT: ;;#ASMSTART 2602; GFX90A-NEXT: ; def v[0:1] 2603; GFX90A-NEXT: ;;#ASMEND 2604; GFX90A-NEXT: ;;#ASMSTART 2605; GFX90A-NEXT: ; def v[2:3] 2606; GFX90A-NEXT: ;;#ASMEND 2607; GFX90A-NEXT: v_alignbit_b32 v1, v1, v3, 16 2608; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2609; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2610; GFX90A-NEXT: s_waitcnt vmcnt(0) 2611; GFX90A-NEXT: s_setpc_b64 s[30:31] 2612; 2613; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_1: 2614; GFX940: ; %bb.0: 2615; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2616; GFX940-NEXT: v_mov_b32_e32 v4, 0 2617; GFX940-NEXT: ;;#ASMSTART 2618; GFX940-NEXT: ; def v[0:1] 2619; GFX940-NEXT: ;;#ASMEND 2620; GFX940-NEXT: ;;#ASMSTART 2621; GFX940-NEXT: ; def v[2:3] 2622; GFX940-NEXT: ;;#ASMEND 2623; GFX940-NEXT: s_nop 0 2624; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 2625; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2626; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2627; GFX940-NEXT: s_waitcnt vmcnt(0) 2628; GFX940-NEXT: s_setpc_b64 s[30:31] 2629 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2630 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2631 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 1> 2632 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2633 ret void 2634} 2635 2636define void @v_shuffle_v3i16_v4i16__7_3_1(ptr addrspace(1) inreg %ptr) { 2637; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_1: 2638; GFX900: ; %bb.0: 2639; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2640; GFX900-NEXT: v_mov_b32_e32 v4, 0 2641; GFX900-NEXT: ;;#ASMSTART 2642; GFX900-NEXT: ; def v[0:1] 2643; GFX900-NEXT: ;;#ASMEND 2644; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2645; GFX900-NEXT: ;;#ASMSTART 2646; GFX900-NEXT: ; def v[2:3] 2647; GFX900-NEXT: ;;#ASMEND 2648; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 2649; GFX900-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2650; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 2651; GFX900-NEXT: s_waitcnt vmcnt(0) 2652; GFX900-NEXT: s_setpc_b64 s[30:31] 2653; 2654; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_1: 2655; GFX90A: ; %bb.0: 2656; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2657; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2658; GFX90A-NEXT: ;;#ASMSTART 2659; GFX90A-NEXT: ; def v[0:1] 2660; GFX90A-NEXT: ;;#ASMEND 2661; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2662; GFX90A-NEXT: ;;#ASMSTART 2663; GFX90A-NEXT: ; def v[2:3] 2664; GFX90A-NEXT: ;;#ASMEND 2665; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4 2666; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2667; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2668; GFX90A-NEXT: s_waitcnt vmcnt(0) 2669; GFX90A-NEXT: s_setpc_b64 s[30:31] 2670; 2671; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_1: 2672; GFX940: ; %bb.0: 2673; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2674; GFX940-NEXT: v_mov_b32_e32 v4, 0 2675; GFX940-NEXT: ;;#ASMSTART 2676; GFX940-NEXT: ; def v[0:1] 2677; GFX940-NEXT: ;;#ASMEND 2678; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2679; GFX940-NEXT: ;;#ASMSTART 2680; GFX940-NEXT: ; def v[2:3] 2681; GFX940-NEXT: ;;#ASMEND 2682; GFX940-NEXT: s_nop 0 2683; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 2684; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2685; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2686; GFX940-NEXT: s_waitcnt vmcnt(0) 2687; GFX940-NEXT: s_setpc_b64 s[30:31] 2688 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2689 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2690 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 1> 2691 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2692 ret void 2693} 2694 2695define void @v_shuffle_v3i16_v4i16__7_4_1(ptr addrspace(1) inreg %ptr) { 2696; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_1: 2697; GFX900: ; %bb.0: 2698; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2699; GFX900-NEXT: ;;#ASMSTART 2700; GFX900-NEXT: ; def v[0:1] 2701; GFX900-NEXT: ;;#ASMEND 2702; GFX900-NEXT: v_mov_b32_e32 v3, 0 2703; GFX900-NEXT: ;;#ASMSTART 2704; GFX900-NEXT: ; def v[1:2] 2705; GFX900-NEXT: ;;#ASMEND 2706; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16 2707; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2708; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2709; GFX900-NEXT: s_waitcnt vmcnt(0) 2710; GFX900-NEXT: s_setpc_b64 s[30:31] 2711; 2712; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_1: 2713; GFX90A: ; %bb.0: 2714; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2715; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2716; GFX90A-NEXT: ;;#ASMSTART 2717; GFX90A-NEXT: ; def v[0:1] 2718; GFX90A-NEXT: ;;#ASMEND 2719; GFX90A-NEXT: ;;#ASMSTART 2720; GFX90A-NEXT: ; def v[2:3] 2721; GFX90A-NEXT: ;;#ASMEND 2722; GFX90A-NEXT: v_alignbit_b32 v1, v2, v3, 16 2723; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2724; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2725; GFX90A-NEXT: s_waitcnt vmcnt(0) 2726; GFX90A-NEXT: s_setpc_b64 s[30:31] 2727; 2728; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_1: 2729; GFX940: ; %bb.0: 2730; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2731; GFX940-NEXT: v_mov_b32_e32 v4, 0 2732; GFX940-NEXT: ;;#ASMSTART 2733; GFX940-NEXT: ; def v[0:1] 2734; GFX940-NEXT: ;;#ASMEND 2735; GFX940-NEXT: ;;#ASMSTART 2736; GFX940-NEXT: ; def v[2:3] 2737; GFX940-NEXT: ;;#ASMEND 2738; GFX940-NEXT: s_nop 0 2739; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16 2740; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2741; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2742; GFX940-NEXT: s_waitcnt vmcnt(0) 2743; GFX940-NEXT: s_setpc_b64 s[30:31] 2744 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2745 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2746 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 1> 2747 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2748 ret void 2749} 2750 2751define void @v_shuffle_v3i16_v4i16__7_5_1(ptr addrspace(1) inreg %ptr) { 2752; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_1: 2753; GFX900: ; %bb.0: 2754; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2755; GFX900-NEXT: ;;#ASMSTART 2756; GFX900-NEXT: ; def v[0:1] 2757; GFX900-NEXT: ;;#ASMEND 2758; GFX900-NEXT: v_mov_b32_e32 v3, 0 2759; GFX900-NEXT: ;;#ASMSTART 2760; GFX900-NEXT: ; def v[1:2] 2761; GFX900-NEXT: ;;#ASMEND 2762; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2763; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 2764; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2765; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2766; GFX900-NEXT: s_waitcnt vmcnt(0) 2767; GFX900-NEXT: s_setpc_b64 s[30:31] 2768; 2769; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_1: 2770; GFX90A: ; %bb.0: 2771; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2772; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2773; GFX90A-NEXT: ;;#ASMSTART 2774; GFX90A-NEXT: ; def v[0:1] 2775; GFX90A-NEXT: ;;#ASMEND 2776; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2777; GFX90A-NEXT: ;;#ASMSTART 2778; GFX90A-NEXT: ; def v[2:3] 2779; GFX90A-NEXT: ;;#ASMEND 2780; GFX90A-NEXT: v_perm_b32 v1, v2, v3, s4 2781; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2782; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2783; GFX90A-NEXT: s_waitcnt vmcnt(0) 2784; GFX90A-NEXT: s_setpc_b64 s[30:31] 2785; 2786; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_1: 2787; GFX940: ; %bb.0: 2788; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2789; GFX940-NEXT: v_mov_b32_e32 v4, 0 2790; GFX940-NEXT: ;;#ASMSTART 2791; GFX940-NEXT: ; def v[0:1] 2792; GFX940-NEXT: ;;#ASMEND 2793; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2794; GFX940-NEXT: ;;#ASMSTART 2795; GFX940-NEXT: ; def v[2:3] 2796; GFX940-NEXT: ;;#ASMEND 2797; GFX940-NEXT: s_nop 0 2798; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 2799; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2800; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2801; GFX940-NEXT: s_waitcnt vmcnt(0) 2802; GFX940-NEXT: s_setpc_b64 s[30:31] 2803 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2804 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2805 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 1> 2806 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2807 ret void 2808} 2809 2810define void @v_shuffle_v3i16_v4i16__7_6_1(ptr addrspace(1) inreg %ptr) { 2811; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_1: 2812; GFX900: ; %bb.0: 2813; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2814; GFX900-NEXT: ;;#ASMSTART 2815; GFX900-NEXT: ; def v[0:1] 2816; GFX900-NEXT: ;;#ASMEND 2817; GFX900-NEXT: v_mov_b32_e32 v3, 0 2818; GFX900-NEXT: ;;#ASMSTART 2819; GFX900-NEXT: ; def v[1:2] 2820; GFX900-NEXT: ;;#ASMEND 2821; GFX900-NEXT: v_alignbit_b32 v1, v2, v2, 16 2822; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2823; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2824; GFX900-NEXT: s_waitcnt vmcnt(0) 2825; GFX900-NEXT: s_setpc_b64 s[30:31] 2826; 2827; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_1: 2828; GFX90A: ; %bb.0: 2829; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2830; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2831; GFX90A-NEXT: ;;#ASMSTART 2832; GFX90A-NEXT: ; def v[0:1] 2833; GFX90A-NEXT: ;;#ASMEND 2834; GFX90A-NEXT: ;;#ASMSTART 2835; GFX90A-NEXT: ; def v[2:3] 2836; GFX90A-NEXT: ;;#ASMEND 2837; GFX90A-NEXT: v_alignbit_b32 v1, v3, v3, 16 2838; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2839; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2840; GFX90A-NEXT: s_waitcnt vmcnt(0) 2841; GFX90A-NEXT: s_setpc_b64 s[30:31] 2842; 2843; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_1: 2844; GFX940: ; %bb.0: 2845; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2846; GFX940-NEXT: v_mov_b32_e32 v4, 0 2847; GFX940-NEXT: ;;#ASMSTART 2848; GFX940-NEXT: ; def v[0:1] 2849; GFX940-NEXT: ;;#ASMEND 2850; GFX940-NEXT: ;;#ASMSTART 2851; GFX940-NEXT: ; def v[2:3] 2852; GFX940-NEXT: ;;#ASMEND 2853; GFX940-NEXT: s_nop 0 2854; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16 2855; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2856; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2857; GFX940-NEXT: s_waitcnt vmcnt(0) 2858; GFX940-NEXT: s_setpc_b64 s[30:31] 2859 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2860 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2861 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 1> 2862 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2863 ret void 2864} 2865 2866define void @v_shuffle_v3i16_v4i16__u_2_2(ptr addrspace(1) inreg %ptr) { 2867; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_2_2: 2868; GFX900: ; %bb.0: 2869; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2870; GFX900-NEXT: v_mov_b32_e32 v2, 0 2871; GFX900-NEXT: ;;#ASMSTART 2872; GFX900-NEXT: ; def v[0:1] 2873; GFX900-NEXT: ;;#ASMEND 2874; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2875; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2876; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2877; GFX900-NEXT: s_waitcnt vmcnt(0) 2878; GFX900-NEXT: s_setpc_b64 s[30:31] 2879; 2880; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_2_2: 2881; GFX90A: ; %bb.0: 2882; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2883; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2884; GFX90A-NEXT: ;;#ASMSTART 2885; GFX90A-NEXT: ; def v[0:1] 2886; GFX90A-NEXT: ;;#ASMEND 2887; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2888; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2889; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2890; GFX90A-NEXT: s_waitcnt vmcnt(0) 2891; GFX90A-NEXT: s_setpc_b64 s[30:31] 2892; 2893; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_2_2: 2894; GFX940: ; %bb.0: 2895; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2896; GFX940-NEXT: v_mov_b32_e32 v2, 0 2897; GFX940-NEXT: ;;#ASMSTART 2898; GFX940-NEXT: ; def v[0:1] 2899; GFX940-NEXT: ;;#ASMEND 2900; GFX940-NEXT: s_nop 0 2901; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2902; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 2903; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2904; GFX940-NEXT: s_waitcnt vmcnt(0) 2905; GFX940-NEXT: s_setpc_b64 s[30:31] 2906 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2907 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 2, i32 2> 2908 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2909 ret void 2910} 2911 2912define void @v_shuffle_v3i16_v4i16__0_2_2(ptr addrspace(1) inreg %ptr) { 2913; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_2_2: 2914; GFX900: ; %bb.0: 2915; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2916; GFX900-NEXT: v_mov_b32_e32 v2, 0 2917; GFX900-NEXT: ;;#ASMSTART 2918; GFX900-NEXT: ; def v[0:1] 2919; GFX900-NEXT: ;;#ASMEND 2920; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2921; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 2922; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2923; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2924; GFX900-NEXT: s_waitcnt vmcnt(0) 2925; GFX900-NEXT: s_setpc_b64 s[30:31] 2926; 2927; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_2_2: 2928; GFX90A: ; %bb.0: 2929; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2930; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2931; GFX90A-NEXT: ;;#ASMSTART 2932; GFX90A-NEXT: ; def v[0:1] 2933; GFX90A-NEXT: ;;#ASMEND 2934; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2935; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 2936; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2937; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2938; GFX90A-NEXT: s_waitcnt vmcnt(0) 2939; GFX90A-NEXT: s_setpc_b64 s[30:31] 2940; 2941; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_2_2: 2942; GFX940: ; %bb.0: 2943; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2944; GFX940-NEXT: v_mov_b32_e32 v2, 0 2945; GFX940-NEXT: ;;#ASMSTART 2946; GFX940-NEXT: ; def v[0:1] 2947; GFX940-NEXT: ;;#ASMEND 2948; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2949; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 2950; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 2951; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2952; GFX940-NEXT: s_waitcnt vmcnt(0) 2953; GFX940-NEXT: s_setpc_b64 s[30:31] 2954 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2955 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 2, i32 2> 2956 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2957 ret void 2958} 2959 2960define void @v_shuffle_v3i16_v4i16__1_2_2(ptr addrspace(1) inreg %ptr) { 2961; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_2_2: 2962; GFX900: ; %bb.0: 2963; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2964; GFX900-NEXT: v_mov_b32_e32 v2, 0 2965; GFX900-NEXT: ;;#ASMSTART 2966; GFX900-NEXT: ; def v[0:1] 2967; GFX900-NEXT: ;;#ASMEND 2968; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 2969; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2970; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2971; GFX900-NEXT: s_waitcnt vmcnt(0) 2972; GFX900-NEXT: s_setpc_b64 s[30:31] 2973; 2974; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_2_2: 2975; GFX90A: ; %bb.0: 2976; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2977; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2978; GFX90A-NEXT: ;;#ASMSTART 2979; GFX90A-NEXT: ; def v[0:1] 2980; GFX90A-NEXT: ;;#ASMEND 2981; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 2982; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2983; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2984; GFX90A-NEXT: s_waitcnt vmcnt(0) 2985; GFX90A-NEXT: s_setpc_b64 s[30:31] 2986; 2987; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_2_2: 2988; GFX940: ; %bb.0: 2989; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2990; GFX940-NEXT: v_mov_b32_e32 v2, 0 2991; GFX940-NEXT: ;;#ASMSTART 2992; GFX940-NEXT: ; def v[0:1] 2993; GFX940-NEXT: ;;#ASMEND 2994; GFX940-NEXT: s_nop 0 2995; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 2996; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 2997; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2998; GFX940-NEXT: s_waitcnt vmcnt(0) 2999; GFX940-NEXT: s_setpc_b64 s[30:31] 3000 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3001 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 2> 3002 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3003 ret void 3004} 3005 3006define void @v_shuffle_v3i16_v4i16__2_2_2(ptr addrspace(1) inreg %ptr) { 3007; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_2_2: 3008; GFX900: ; %bb.0: 3009; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3010; GFX900-NEXT: v_mov_b32_e32 v2, 0 3011; GFX900-NEXT: ;;#ASMSTART 3012; GFX900-NEXT: ; def v[0:1] 3013; GFX900-NEXT: ;;#ASMEND 3014; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3015; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 3016; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3017; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3018; GFX900-NEXT: s_waitcnt vmcnt(0) 3019; GFX900-NEXT: s_setpc_b64 s[30:31] 3020; 3021; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_2_2: 3022; GFX90A: ; %bb.0: 3023; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3024; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3025; GFX90A-NEXT: ;;#ASMSTART 3026; GFX90A-NEXT: ; def v[0:1] 3027; GFX90A-NEXT: ;;#ASMEND 3028; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3029; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 3030; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 3031; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3032; GFX90A-NEXT: s_waitcnt vmcnt(0) 3033; GFX90A-NEXT: s_setpc_b64 s[30:31] 3034; 3035; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_2_2: 3036; GFX940: ; %bb.0: 3037; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3038; GFX940-NEXT: v_mov_b32_e32 v2, 0 3039; GFX940-NEXT: ;;#ASMSTART 3040; GFX940-NEXT: ; def v[0:1] 3041; GFX940-NEXT: ;;#ASMEND 3042; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3043; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 3044; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3045; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3046; GFX940-NEXT: s_waitcnt vmcnt(0) 3047; GFX940-NEXT: s_setpc_b64 s[30:31] 3048 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3049 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 2, i32 2> 3050 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3051 ret void 3052} 3053 3054define void @v_shuffle_v3i16_v4i16__3_2_2(ptr addrspace(1) inreg %ptr) { 3055; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_2_2: 3056; GFX900: ; %bb.0: 3057; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3058; GFX900-NEXT: v_mov_b32_e32 v2, 0 3059; GFX900-NEXT: ;;#ASMSTART 3060; GFX900-NEXT: ; def v[0:1] 3061; GFX900-NEXT: ;;#ASMEND 3062; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 3063; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3064; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3065; GFX900-NEXT: s_waitcnt vmcnt(0) 3066; GFX900-NEXT: s_setpc_b64 s[30:31] 3067; 3068; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_2_2: 3069; GFX90A: ; %bb.0: 3070; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3071; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3072; GFX90A-NEXT: ;;#ASMSTART 3073; GFX90A-NEXT: ; def v[0:1] 3074; GFX90A-NEXT: ;;#ASMEND 3075; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 3076; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 3077; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3078; GFX90A-NEXT: s_waitcnt vmcnt(0) 3079; GFX90A-NEXT: s_setpc_b64 s[30:31] 3080; 3081; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_2_2: 3082; GFX940: ; %bb.0: 3083; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3084; GFX940-NEXT: v_mov_b32_e32 v2, 0 3085; GFX940-NEXT: ;;#ASMSTART 3086; GFX940-NEXT: ; def v[0:1] 3087; GFX940-NEXT: ;;#ASMEND 3088; GFX940-NEXT: s_nop 0 3089; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 3090; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3091; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3092; GFX940-NEXT: s_waitcnt vmcnt(0) 3093; GFX940-NEXT: s_setpc_b64 s[30:31] 3094 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3095 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 2, i32 2> 3096 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3097 ret void 3098} 3099 3100define void @v_shuffle_v3i16_v4i16__4_2_2(ptr addrspace(1) inreg %ptr) { 3101; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_2_2: 3102; GFX900: ; %bb.0: 3103; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3104; GFX900-NEXT: v_mov_b32_e32 v2, 0 3105; GFX900-NEXT: ;;#ASMSTART 3106; GFX900-NEXT: ; def v[0:1] 3107; GFX900-NEXT: ;;#ASMEND 3108; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 3109; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3110; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3111; GFX900-NEXT: s_waitcnt vmcnt(0) 3112; GFX900-NEXT: s_setpc_b64 s[30:31] 3113; 3114; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_2_2: 3115; GFX90A: ; %bb.0: 3116; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3117; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3118; GFX90A-NEXT: ;;#ASMSTART 3119; GFX90A-NEXT: ; def v[0:1] 3120; GFX90A-NEXT: ;;#ASMEND 3121; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 3122; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 3123; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3124; GFX90A-NEXT: s_waitcnt vmcnt(0) 3125; GFX90A-NEXT: s_setpc_b64 s[30:31] 3126; 3127; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_2_2: 3128; GFX940: ; %bb.0: 3129; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3130; GFX940-NEXT: v_mov_b32_e32 v2, 0 3131; GFX940-NEXT: ;;#ASMSTART 3132; GFX940-NEXT: ; def v[0:1] 3133; GFX940-NEXT: ;;#ASMEND 3134; GFX940-NEXT: s_nop 0 3135; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 3136; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3137; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3138; GFX940-NEXT: s_waitcnt vmcnt(0) 3139; GFX940-NEXT: s_setpc_b64 s[30:31] 3140 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3141 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 2, i32 2> 3142 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3143 ret void 3144} 3145 3146define void @v_shuffle_v3i16_v4i16__5_2_2(ptr addrspace(1) inreg %ptr) { 3147; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_2_2: 3148; GFX900: ; %bb.0: 3149; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3150; GFX900-NEXT: v_mov_b32_e32 v4, 0 3151; GFX900-NEXT: ;;#ASMSTART 3152; GFX900-NEXT: ; def v[0:1] 3153; GFX900-NEXT: ;;#ASMEND 3154; GFX900-NEXT: ;;#ASMSTART 3155; GFX900-NEXT: ; def v[2:3] 3156; GFX900-NEXT: ;;#ASMEND 3157; GFX900-NEXT: v_alignbit_b32 v0, v1, v2, 16 3158; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3159; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3160; GFX900-NEXT: s_waitcnt vmcnt(0) 3161; GFX900-NEXT: s_setpc_b64 s[30:31] 3162; 3163; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_2_2: 3164; GFX90A: ; %bb.0: 3165; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3166; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3167; GFX90A-NEXT: ;;#ASMSTART 3168; GFX90A-NEXT: ; def v[0:1] 3169; GFX90A-NEXT: ;;#ASMEND 3170; GFX90A-NEXT: ;;#ASMSTART 3171; GFX90A-NEXT: ; def v[2:3] 3172; GFX90A-NEXT: ;;#ASMEND 3173; GFX90A-NEXT: v_alignbit_b32 v0, v1, v2, 16 3174; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3175; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3176; GFX90A-NEXT: s_waitcnt vmcnt(0) 3177; GFX90A-NEXT: s_setpc_b64 s[30:31] 3178; 3179; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_2_2: 3180; GFX940: ; %bb.0: 3181; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3182; GFX940-NEXT: v_mov_b32_e32 v4, 0 3183; GFX940-NEXT: ;;#ASMSTART 3184; GFX940-NEXT: ; def v[0:1] 3185; GFX940-NEXT: ;;#ASMEND 3186; GFX940-NEXT: ;;#ASMSTART 3187; GFX940-NEXT: ; def v[2:3] 3188; GFX940-NEXT: ;;#ASMEND 3189; GFX940-NEXT: s_nop 0 3190; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 3191; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3192; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3193; GFX940-NEXT: s_waitcnt vmcnt(0) 3194; GFX940-NEXT: s_setpc_b64 s[30:31] 3195 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3196 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3197 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 2, i32 2> 3198 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3199 ret void 3200} 3201 3202define void @v_shuffle_v3i16_v4i16__6_2_2(ptr addrspace(1) inreg %ptr) { 3203; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_2_2: 3204; GFX900: ; %bb.0: 3205; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3206; GFX900-NEXT: v_mov_b32_e32 v4, 0 3207; GFX900-NEXT: ;;#ASMSTART 3208; GFX900-NEXT: ; def v[0:1] 3209; GFX900-NEXT: ;;#ASMEND 3210; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3211; GFX900-NEXT: ;;#ASMSTART 3212; GFX900-NEXT: ; def v[2:3] 3213; GFX900-NEXT: ;;#ASMEND 3214; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 3215; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3216; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3217; GFX900-NEXT: s_waitcnt vmcnt(0) 3218; GFX900-NEXT: s_setpc_b64 s[30:31] 3219; 3220; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_2_2: 3221; GFX90A: ; %bb.0: 3222; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3223; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3224; GFX90A-NEXT: ;;#ASMSTART 3225; GFX90A-NEXT: ; def v[0:1] 3226; GFX90A-NEXT: ;;#ASMEND 3227; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3228; GFX90A-NEXT: ;;#ASMSTART 3229; GFX90A-NEXT: ; def v[2:3] 3230; GFX90A-NEXT: ;;#ASMEND 3231; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 3232; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3233; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3234; GFX90A-NEXT: s_waitcnt vmcnt(0) 3235; GFX90A-NEXT: s_setpc_b64 s[30:31] 3236; 3237; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_2_2: 3238; GFX940: ; %bb.0: 3239; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3240; GFX940-NEXT: v_mov_b32_e32 v4, 0 3241; GFX940-NEXT: ;;#ASMSTART 3242; GFX940-NEXT: ; def v[0:1] 3243; GFX940-NEXT: ;;#ASMEND 3244; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3245; GFX940-NEXT: ;;#ASMSTART 3246; GFX940-NEXT: ; def v[2:3] 3247; GFX940-NEXT: ;;#ASMEND 3248; GFX940-NEXT: s_nop 0 3249; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 3250; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3251; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3252; GFX940-NEXT: s_waitcnt vmcnt(0) 3253; GFX940-NEXT: s_setpc_b64 s[30:31] 3254 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3255 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3256 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 2, i32 2> 3257 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3258 ret void 3259} 3260 3261define void @v_shuffle_v3i16_v4i16__7_2_2(ptr addrspace(1) inreg %ptr) { 3262; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_2: 3263; GFX900: ; %bb.0: 3264; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3265; GFX900-NEXT: v_mov_b32_e32 v4, 0 3266; GFX900-NEXT: ;;#ASMSTART 3267; GFX900-NEXT: ; def v[0:1] 3268; GFX900-NEXT: ;;#ASMEND 3269; GFX900-NEXT: ;;#ASMSTART 3270; GFX900-NEXT: ; def v[2:3] 3271; GFX900-NEXT: ;;#ASMEND 3272; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 3273; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3274; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3275; GFX900-NEXT: s_waitcnt vmcnt(0) 3276; GFX900-NEXT: s_setpc_b64 s[30:31] 3277; 3278; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_2: 3279; GFX90A: ; %bb.0: 3280; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3281; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3282; GFX90A-NEXT: ;;#ASMSTART 3283; GFX90A-NEXT: ; def v[0:1] 3284; GFX90A-NEXT: ;;#ASMEND 3285; GFX90A-NEXT: ;;#ASMSTART 3286; GFX90A-NEXT: ; def v[2:3] 3287; GFX90A-NEXT: ;;#ASMEND 3288; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 3289; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3290; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3291; GFX90A-NEXT: s_waitcnt vmcnt(0) 3292; GFX90A-NEXT: s_setpc_b64 s[30:31] 3293; 3294; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_2: 3295; GFX940: ; %bb.0: 3296; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3297; GFX940-NEXT: v_mov_b32_e32 v4, 0 3298; GFX940-NEXT: ;;#ASMSTART 3299; GFX940-NEXT: ; def v[0:1] 3300; GFX940-NEXT: ;;#ASMEND 3301; GFX940-NEXT: ;;#ASMSTART 3302; GFX940-NEXT: ; def v[2:3] 3303; GFX940-NEXT: ;;#ASMEND 3304; GFX940-NEXT: s_nop 0 3305; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 3306; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3307; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3308; GFX940-NEXT: s_waitcnt vmcnt(0) 3309; GFX940-NEXT: s_setpc_b64 s[30:31] 3310 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3311 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3312 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 2> 3313 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3314 ret void 3315} 3316 3317define void @v_shuffle_v3i16_v4i16__7_u_2(ptr addrspace(1) inreg %ptr) { 3318; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_2: 3319; GFX900: ; %bb.0: 3320; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3321; GFX900-NEXT: v_mov_b32_e32 v4, 0 3322; GFX900-NEXT: ;;#ASMSTART 3323; GFX900-NEXT: ; def v[0:1] 3324; GFX900-NEXT: ;;#ASMEND 3325; GFX900-NEXT: ;;#ASMSTART 3326; GFX900-NEXT: ; def v[2:3] 3327; GFX900-NEXT: ;;#ASMEND 3328; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16 3329; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3330; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3331; GFX900-NEXT: s_waitcnt vmcnt(0) 3332; GFX900-NEXT: s_setpc_b64 s[30:31] 3333; 3334; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_2: 3335; GFX90A: ; %bb.0: 3336; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3337; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3338; GFX90A-NEXT: ;;#ASMSTART 3339; GFX90A-NEXT: ; def v[0:1] 3340; GFX90A-NEXT: ;;#ASMEND 3341; GFX90A-NEXT: ;;#ASMSTART 3342; GFX90A-NEXT: ; def v[2:3] 3343; GFX90A-NEXT: ;;#ASMEND 3344; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16 3345; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3346; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3347; GFX90A-NEXT: s_waitcnt vmcnt(0) 3348; GFX90A-NEXT: s_setpc_b64 s[30:31] 3349; 3350; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_2: 3351; GFX940: ; %bb.0: 3352; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3353; GFX940-NEXT: v_mov_b32_e32 v4, 0 3354; GFX940-NEXT: ;;#ASMSTART 3355; GFX940-NEXT: ; def v[0:1] 3356; GFX940-NEXT: ;;#ASMEND 3357; GFX940-NEXT: ;;#ASMSTART 3358; GFX940-NEXT: ; def v[2:3] 3359; GFX940-NEXT: ;;#ASMEND 3360; GFX940-NEXT: s_nop 0 3361; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 3362; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3363; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3364; GFX940-NEXT: s_waitcnt vmcnt(0) 3365; GFX940-NEXT: s_setpc_b64 s[30:31] 3366 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3367 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3368 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 2> 3369 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3370 ret void 3371} 3372 3373define void @v_shuffle_v3i16_v4i16__7_0_2(ptr addrspace(1) inreg %ptr) { 3374; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_2: 3375; GFX900: ; %bb.0: 3376; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3377; GFX900-NEXT: v_mov_b32_e32 v4, 0 3378; GFX900-NEXT: ;;#ASMSTART 3379; GFX900-NEXT: ; def v[0:1] 3380; GFX900-NEXT: ;;#ASMEND 3381; GFX900-NEXT: ;;#ASMSTART 3382; GFX900-NEXT: ; def v[2:3] 3383; GFX900-NEXT: ;;#ASMEND 3384; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16 3385; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3386; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3387; GFX900-NEXT: s_waitcnt vmcnt(0) 3388; GFX900-NEXT: s_setpc_b64 s[30:31] 3389; 3390; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_2: 3391; GFX90A: ; %bb.0: 3392; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3393; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3394; GFX90A-NEXT: ;;#ASMSTART 3395; GFX90A-NEXT: ; def v[0:1] 3396; GFX90A-NEXT: ;;#ASMEND 3397; GFX90A-NEXT: ;;#ASMSTART 3398; GFX90A-NEXT: ; def v[2:3] 3399; GFX90A-NEXT: ;;#ASMEND 3400; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 3401; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3402; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3403; GFX90A-NEXT: s_waitcnt vmcnt(0) 3404; GFX90A-NEXT: s_setpc_b64 s[30:31] 3405; 3406; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_2: 3407; GFX940: ; %bb.0: 3408; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3409; GFX940-NEXT: v_mov_b32_e32 v4, 0 3410; GFX940-NEXT: ;;#ASMSTART 3411; GFX940-NEXT: ; def v[0:1] 3412; GFX940-NEXT: ;;#ASMEND 3413; GFX940-NEXT: ;;#ASMSTART 3414; GFX940-NEXT: ; def v[2:3] 3415; GFX940-NEXT: ;;#ASMEND 3416; GFX940-NEXT: s_nop 0 3417; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 3418; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3419; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3420; GFX940-NEXT: s_waitcnt vmcnt(0) 3421; GFX940-NEXT: s_setpc_b64 s[30:31] 3422 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3423 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3424 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 2> 3425 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3426 ret void 3427} 3428 3429define void @v_shuffle_v3i16_v4i16__7_1_2(ptr addrspace(1) inreg %ptr) { 3430; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_2: 3431; GFX900: ; %bb.0: 3432; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3433; GFX900-NEXT: v_mov_b32_e32 v4, 0 3434; GFX900-NEXT: ;;#ASMSTART 3435; GFX900-NEXT: ; def v[0:1] 3436; GFX900-NEXT: ;;#ASMEND 3437; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3438; GFX900-NEXT: ;;#ASMSTART 3439; GFX900-NEXT: ; def v[2:3] 3440; GFX900-NEXT: ;;#ASMEND 3441; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 3442; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3443; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3444; GFX900-NEXT: s_waitcnt vmcnt(0) 3445; GFX900-NEXT: s_setpc_b64 s[30:31] 3446; 3447; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_2: 3448; GFX90A: ; %bb.0: 3449; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3450; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3451; GFX90A-NEXT: ;;#ASMSTART 3452; GFX90A-NEXT: ; def v[0:1] 3453; GFX90A-NEXT: ;;#ASMEND 3454; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3455; GFX90A-NEXT: ;;#ASMSTART 3456; GFX90A-NEXT: ; def v[2:3] 3457; GFX90A-NEXT: ;;#ASMEND 3458; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 3459; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3460; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3461; GFX90A-NEXT: s_waitcnt vmcnt(0) 3462; GFX90A-NEXT: s_setpc_b64 s[30:31] 3463; 3464; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_2: 3465; GFX940: ; %bb.0: 3466; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3467; GFX940-NEXT: v_mov_b32_e32 v4, 0 3468; GFX940-NEXT: ;;#ASMSTART 3469; GFX940-NEXT: ; def v[0:1] 3470; GFX940-NEXT: ;;#ASMEND 3471; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3472; GFX940-NEXT: ;;#ASMSTART 3473; GFX940-NEXT: ; def v[2:3] 3474; GFX940-NEXT: ;;#ASMEND 3475; GFX940-NEXT: s_nop 0 3476; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 3477; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3478; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3479; GFX940-NEXT: s_waitcnt vmcnt(0) 3480; GFX940-NEXT: s_setpc_b64 s[30:31] 3481 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3482 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3483 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 2> 3484 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3485 ret void 3486} 3487 3488define void @v_shuffle_v3i16_v4i16__7_3_2(ptr addrspace(1) inreg %ptr) { 3489; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_2: 3490; GFX900: ; %bb.0: 3491; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3492; GFX900-NEXT: v_mov_b32_e32 v4, 0 3493; GFX900-NEXT: ;;#ASMSTART 3494; GFX900-NEXT: ; def v[0:1] 3495; GFX900-NEXT: ;;#ASMEND 3496; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3497; GFX900-NEXT: ;;#ASMSTART 3498; GFX900-NEXT: ; def v[2:3] 3499; GFX900-NEXT: ;;#ASMEND 3500; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 3501; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3502; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3503; GFX900-NEXT: s_waitcnt vmcnt(0) 3504; GFX900-NEXT: s_setpc_b64 s[30:31] 3505; 3506; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_2: 3507; GFX90A: ; %bb.0: 3508; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3509; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3510; GFX90A-NEXT: ;;#ASMSTART 3511; GFX90A-NEXT: ; def v[0:1] 3512; GFX90A-NEXT: ;;#ASMEND 3513; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3514; GFX90A-NEXT: ;;#ASMSTART 3515; GFX90A-NEXT: ; def v[2:3] 3516; GFX90A-NEXT: ;;#ASMEND 3517; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 3518; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3519; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3520; GFX90A-NEXT: s_waitcnt vmcnt(0) 3521; GFX90A-NEXT: s_setpc_b64 s[30:31] 3522; 3523; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_2: 3524; GFX940: ; %bb.0: 3525; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3526; GFX940-NEXT: v_mov_b32_e32 v4, 0 3527; GFX940-NEXT: ;;#ASMSTART 3528; GFX940-NEXT: ; def v[0:1] 3529; GFX940-NEXT: ;;#ASMEND 3530; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3531; GFX940-NEXT: ;;#ASMSTART 3532; GFX940-NEXT: ; def v[2:3] 3533; GFX940-NEXT: ;;#ASMEND 3534; GFX940-NEXT: s_nop 0 3535; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 3536; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3537; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3538; GFX940-NEXT: s_waitcnt vmcnt(0) 3539; GFX940-NEXT: s_setpc_b64 s[30:31] 3540 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3541 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3542 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 2> 3543 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3544 ret void 3545} 3546 3547define void @v_shuffle_v3i16_v4i16__7_4_2(ptr addrspace(1) inreg %ptr) { 3548; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_2: 3549; GFX900: ; %bb.0: 3550; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3551; GFX900-NEXT: v_mov_b32_e32 v4, 0 3552; GFX900-NEXT: ;;#ASMSTART 3553; GFX900-NEXT: ; def v[0:1] 3554; GFX900-NEXT: ;;#ASMEND 3555; GFX900-NEXT: ;;#ASMSTART 3556; GFX900-NEXT: ; def v[2:3] 3557; GFX900-NEXT: ;;#ASMEND 3558; GFX900-NEXT: v_alignbit_b32 v0, v2, v3, 16 3559; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3560; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3561; GFX900-NEXT: s_waitcnt vmcnt(0) 3562; GFX900-NEXT: s_setpc_b64 s[30:31] 3563; 3564; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_2: 3565; GFX90A: ; %bb.0: 3566; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3567; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3568; GFX90A-NEXT: ;;#ASMSTART 3569; GFX90A-NEXT: ; def v[0:1] 3570; GFX90A-NEXT: ;;#ASMEND 3571; GFX90A-NEXT: ;;#ASMSTART 3572; GFX90A-NEXT: ; def v[2:3] 3573; GFX90A-NEXT: ;;#ASMEND 3574; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16 3575; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3576; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3577; GFX90A-NEXT: s_waitcnt vmcnt(0) 3578; GFX90A-NEXT: s_setpc_b64 s[30:31] 3579; 3580; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_2: 3581; GFX940: ; %bb.0: 3582; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3583; GFX940-NEXT: v_mov_b32_e32 v4, 0 3584; GFX940-NEXT: ;;#ASMSTART 3585; GFX940-NEXT: ; def v[0:1] 3586; GFX940-NEXT: ;;#ASMEND 3587; GFX940-NEXT: ;;#ASMSTART 3588; GFX940-NEXT: ; def v[2:3] 3589; GFX940-NEXT: ;;#ASMEND 3590; GFX940-NEXT: s_nop 0 3591; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 3592; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3593; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3594; GFX940-NEXT: s_waitcnt vmcnt(0) 3595; GFX940-NEXT: s_setpc_b64 s[30:31] 3596 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3597 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3598 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 2> 3599 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3600 ret void 3601} 3602 3603define void @v_shuffle_v3i16_v4i16__7_5_2(ptr addrspace(1) inreg %ptr) { 3604; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_2: 3605; GFX900: ; %bb.0: 3606; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3607; GFX900-NEXT: v_mov_b32_e32 v4, 0 3608; GFX900-NEXT: ;;#ASMSTART 3609; GFX900-NEXT: ; def v[0:1] 3610; GFX900-NEXT: ;;#ASMEND 3611; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3612; GFX900-NEXT: ;;#ASMSTART 3613; GFX900-NEXT: ; def v[2:3] 3614; GFX900-NEXT: ;;#ASMEND 3615; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 3616; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3617; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3618; GFX900-NEXT: s_waitcnt vmcnt(0) 3619; GFX900-NEXT: s_setpc_b64 s[30:31] 3620; 3621; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_2: 3622; GFX90A: ; %bb.0: 3623; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3624; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3625; GFX90A-NEXT: ;;#ASMSTART 3626; GFX90A-NEXT: ; def v[0:1] 3627; GFX90A-NEXT: ;;#ASMEND 3628; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3629; GFX90A-NEXT: ;;#ASMSTART 3630; GFX90A-NEXT: ; def v[2:3] 3631; GFX90A-NEXT: ;;#ASMEND 3632; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 3633; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3634; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3635; GFX90A-NEXT: s_waitcnt vmcnt(0) 3636; GFX90A-NEXT: s_setpc_b64 s[30:31] 3637; 3638; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_2: 3639; GFX940: ; %bb.0: 3640; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3641; GFX940-NEXT: v_mov_b32_e32 v4, 0 3642; GFX940-NEXT: ;;#ASMSTART 3643; GFX940-NEXT: ; def v[0:1] 3644; GFX940-NEXT: ;;#ASMEND 3645; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3646; GFX940-NEXT: ;;#ASMSTART 3647; GFX940-NEXT: ; def v[2:3] 3648; GFX940-NEXT: ;;#ASMEND 3649; GFX940-NEXT: s_nop 0 3650; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 3651; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3652; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3653; GFX940-NEXT: s_waitcnt vmcnt(0) 3654; GFX940-NEXT: s_setpc_b64 s[30:31] 3655 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3656 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3657 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 2> 3658 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3659 ret void 3660} 3661 3662define void @v_shuffle_v3i16_v4i16__7_6_2(ptr addrspace(1) inreg %ptr) { 3663; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_2: 3664; GFX900: ; %bb.0: 3665; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3666; GFX900-NEXT: v_mov_b32_e32 v4, 0 3667; GFX900-NEXT: ;;#ASMSTART 3668; GFX900-NEXT: ; def v[0:1] 3669; GFX900-NEXT: ;;#ASMEND 3670; GFX900-NEXT: ;;#ASMSTART 3671; GFX900-NEXT: ; def v[2:3] 3672; GFX900-NEXT: ;;#ASMEND 3673; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16 3674; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3675; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3676; GFX900-NEXT: s_waitcnt vmcnt(0) 3677; GFX900-NEXT: s_setpc_b64 s[30:31] 3678; 3679; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_2: 3680; GFX90A: ; %bb.0: 3681; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3682; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3683; GFX90A-NEXT: ;;#ASMSTART 3684; GFX90A-NEXT: ; def v[0:1] 3685; GFX90A-NEXT: ;;#ASMEND 3686; GFX90A-NEXT: ;;#ASMSTART 3687; GFX90A-NEXT: ; def v[2:3] 3688; GFX90A-NEXT: ;;#ASMEND 3689; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16 3690; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3691; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3692; GFX90A-NEXT: s_waitcnt vmcnt(0) 3693; GFX90A-NEXT: s_setpc_b64 s[30:31] 3694; 3695; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_2: 3696; GFX940: ; %bb.0: 3697; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3698; GFX940-NEXT: v_mov_b32_e32 v4, 0 3699; GFX940-NEXT: ;;#ASMSTART 3700; GFX940-NEXT: ; def v[0:1] 3701; GFX940-NEXT: ;;#ASMEND 3702; GFX940-NEXT: ;;#ASMSTART 3703; GFX940-NEXT: ; def v[2:3] 3704; GFX940-NEXT: ;;#ASMEND 3705; GFX940-NEXT: s_nop 0 3706; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 3707; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3708; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3709; GFX940-NEXT: s_waitcnt vmcnt(0) 3710; GFX940-NEXT: s_setpc_b64 s[30:31] 3711 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3712 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3713 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 2> 3714 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3715 ret void 3716} 3717 3718define void @v_shuffle_v3i16_v4i16__u_3_3(ptr addrspace(1) inreg %ptr) { 3719; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_3_3: 3720; GFX900: ; %bb.0: 3721; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3722; GFX900-NEXT: v_mov_b32_e32 v2, 0 3723; GFX900-NEXT: ;;#ASMSTART 3724; GFX900-NEXT: ; def v[0:1] 3725; GFX900-NEXT: ;;#ASMEND 3726; GFX900-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 3727; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3728; GFX900-NEXT: s_waitcnt vmcnt(0) 3729; GFX900-NEXT: s_setpc_b64 s[30:31] 3730; 3731; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_3_3: 3732; GFX90A: ; %bb.0: 3733; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3734; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3735; GFX90A-NEXT: ;;#ASMSTART 3736; GFX90A-NEXT: ; def v[0:1] 3737; GFX90A-NEXT: ;;#ASMEND 3738; GFX90A-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 3739; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3740; GFX90A-NEXT: s_waitcnt vmcnt(0) 3741; GFX90A-NEXT: s_setpc_b64 s[30:31] 3742; 3743; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_3_3: 3744; GFX940: ; %bb.0: 3745; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3746; GFX940-NEXT: v_mov_b32_e32 v2, 0 3747; GFX940-NEXT: ;;#ASMSTART 3748; GFX940-NEXT: ; def v[0:1] 3749; GFX940-NEXT: ;;#ASMEND 3750; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 3751; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3752; GFX940-NEXT: s_waitcnt vmcnt(0) 3753; GFX940-NEXT: s_setpc_b64 s[30:31] 3754 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3755 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 3, i32 3> 3756 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3757 ret void 3758} 3759 3760define void @v_shuffle_v3i16_v4i16__0_3_3(ptr addrspace(1) inreg %ptr) { 3761; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_3_3: 3762; GFX900: ; %bb.0: 3763; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3764; GFX900-NEXT: ;;#ASMSTART 3765; GFX900-NEXT: ; def v[0:1] 3766; GFX900-NEXT: ;;#ASMEND 3767; GFX900-NEXT: s_mov_b32 s4, 0xffff 3768; GFX900-NEXT: v_mov_b32_e32 v2, 0 3769; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 3770; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3771; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3772; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3773; GFX900-NEXT: s_waitcnt vmcnt(0) 3774; GFX900-NEXT: s_setpc_b64 s[30:31] 3775; 3776; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_3_3: 3777; GFX90A: ; %bb.0: 3778; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3779; GFX90A-NEXT: ;;#ASMSTART 3780; GFX90A-NEXT: ; def v[0:1] 3781; GFX90A-NEXT: ;;#ASMEND 3782; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3783; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3784; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 3785; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3786; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3787; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 3788; GFX90A-NEXT: s_waitcnt vmcnt(0) 3789; GFX90A-NEXT: s_setpc_b64 s[30:31] 3790; 3791; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_3_3: 3792; GFX940: ; %bb.0: 3793; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3794; GFX940-NEXT: ;;#ASMSTART 3795; GFX940-NEXT: ; def v[0:1] 3796; GFX940-NEXT: ;;#ASMEND 3797; GFX940-NEXT: s_mov_b32 s2, 0xffff 3798; GFX940-NEXT: v_mov_b32_e32 v2, 0 3799; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 3800; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3801; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3802; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3803; GFX940-NEXT: s_waitcnt vmcnt(0) 3804; GFX940-NEXT: s_setpc_b64 s[30:31] 3805 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3806 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 3, i32 3> 3807 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3808 ret void 3809} 3810 3811define void @v_shuffle_v3i16_v4i16__1_3_3(ptr addrspace(1) inreg %ptr) { 3812; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_3_3: 3813; GFX900: ; %bb.0: 3814; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3815; GFX900-NEXT: ;;#ASMSTART 3816; GFX900-NEXT: ; def v[0:1] 3817; GFX900-NEXT: ;;#ASMEND 3818; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3819; GFX900-NEXT: v_mov_b32_e32 v2, 0 3820; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 3821; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3822; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3823; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3824; GFX900-NEXT: s_waitcnt vmcnt(0) 3825; GFX900-NEXT: s_setpc_b64 s[30:31] 3826; 3827; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_3_3: 3828; GFX90A: ; %bb.0: 3829; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3830; GFX90A-NEXT: ;;#ASMSTART 3831; GFX90A-NEXT: ; def v[0:1] 3832; GFX90A-NEXT: ;;#ASMEND 3833; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3834; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3835; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 3836; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3837; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3838; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 3839; GFX90A-NEXT: s_waitcnt vmcnt(0) 3840; GFX90A-NEXT: s_setpc_b64 s[30:31] 3841; 3842; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_3_3: 3843; GFX940: ; %bb.0: 3844; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3845; GFX940-NEXT: ;;#ASMSTART 3846; GFX940-NEXT: ; def v[0:1] 3847; GFX940-NEXT: ;;#ASMEND 3848; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3849; GFX940-NEXT: v_mov_b32_e32 v2, 0 3850; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 3851; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3852; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3853; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3854; GFX940-NEXT: s_waitcnt vmcnt(0) 3855; GFX940-NEXT: s_setpc_b64 s[30:31] 3856 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3857 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 3, i32 3> 3858 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3859 ret void 3860} 3861 3862define void @v_shuffle_v3i16_v4i16__2_3_3(ptr addrspace(1) inreg %ptr) { 3863; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_3_3: 3864; GFX900: ; %bb.0: 3865; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3866; GFX900-NEXT: v_mov_b32_e32 v2, 0 3867; GFX900-NEXT: ;;#ASMSTART 3868; GFX900-NEXT: ; def v[0:1] 3869; GFX900-NEXT: ;;#ASMEND 3870; GFX900-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 3871; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3872; GFX900-NEXT: s_waitcnt vmcnt(0) 3873; GFX900-NEXT: s_setpc_b64 s[30:31] 3874; 3875; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_3_3: 3876; GFX90A: ; %bb.0: 3877; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3878; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3879; GFX90A-NEXT: ;;#ASMSTART 3880; GFX90A-NEXT: ; def v[0:1] 3881; GFX90A-NEXT: ;;#ASMEND 3882; GFX90A-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 3883; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3884; GFX90A-NEXT: s_waitcnt vmcnt(0) 3885; GFX90A-NEXT: s_setpc_b64 s[30:31] 3886; 3887; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_3_3: 3888; GFX940: ; %bb.0: 3889; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3890; GFX940-NEXT: v_mov_b32_e32 v2, 0 3891; GFX940-NEXT: ;;#ASMSTART 3892; GFX940-NEXT: ; def v[0:1] 3893; GFX940-NEXT: ;;#ASMEND 3894; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 3895; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3896; GFX940-NEXT: s_waitcnt vmcnt(0) 3897; GFX940-NEXT: s_setpc_b64 s[30:31] 3898 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3899 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 3, i32 3> 3900 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3901 ret void 3902} 3903 3904define void @v_shuffle_v3i16_v4i16__3_3_3(ptr addrspace(1) inreg %ptr) { 3905; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_3_3: 3906; GFX900: ; %bb.0: 3907; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3908; GFX900-NEXT: ;;#ASMSTART 3909; GFX900-NEXT: ; def v[0:1] 3910; GFX900-NEXT: ;;#ASMEND 3911; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3912; GFX900-NEXT: v_mov_b32_e32 v2, 0 3913; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 3914; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 3915; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3916; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 3917; GFX900-NEXT: s_waitcnt vmcnt(0) 3918; GFX900-NEXT: s_setpc_b64 s[30:31] 3919; 3920; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_3_3: 3921; GFX90A: ; %bb.0: 3922; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3923; GFX90A-NEXT: ;;#ASMSTART 3924; GFX90A-NEXT: ; def v[0:1] 3925; GFX90A-NEXT: ;;#ASMEND 3926; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3927; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3928; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v1 3929; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 3930; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3931; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 3932; GFX90A-NEXT: s_waitcnt vmcnt(0) 3933; GFX90A-NEXT: s_setpc_b64 s[30:31] 3934; 3935; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_3_3: 3936; GFX940: ; %bb.0: 3937; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3938; GFX940-NEXT: ;;#ASMSTART 3939; GFX940-NEXT: ; def v[0:1] 3940; GFX940-NEXT: ;;#ASMEND 3941; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3942; GFX940-NEXT: v_mov_b32_e32 v2, 0 3943; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 3944; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 3945; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3946; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 3947; GFX940-NEXT: s_waitcnt vmcnt(0) 3948; GFX940-NEXT: s_setpc_b64 s[30:31] 3949 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3950 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 3, i32 3> 3951 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3952 ret void 3953} 3954 3955define void @v_shuffle_v3i16_v4i16__4_3_3(ptr addrspace(1) inreg %ptr) { 3956; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_3_3: 3957; GFX900: ; %bb.0: 3958; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3959; GFX900-NEXT: v_mov_b32_e32 v2, 0 3960; GFX900-NEXT: ;;#ASMSTART 3961; GFX900-NEXT: ; def v[0:1] 3962; GFX900-NEXT: ;;#ASMEND 3963; GFX900-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 3964; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3965; GFX900-NEXT: s_waitcnt vmcnt(0) 3966; GFX900-NEXT: s_setpc_b64 s[30:31] 3967; 3968; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_3_3: 3969; GFX90A: ; %bb.0: 3970; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3971; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3972; GFX90A-NEXT: ;;#ASMSTART 3973; GFX90A-NEXT: ; def v[0:1] 3974; GFX90A-NEXT: ;;#ASMEND 3975; GFX90A-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 3976; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3977; GFX90A-NEXT: s_waitcnt vmcnt(0) 3978; GFX90A-NEXT: s_setpc_b64 s[30:31] 3979; 3980; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_3_3: 3981; GFX940: ; %bb.0: 3982; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3983; GFX940-NEXT: v_mov_b32_e32 v2, 0 3984; GFX940-NEXT: ;;#ASMSTART 3985; GFX940-NEXT: ; def v[0:1] 3986; GFX940-NEXT: ;;#ASMEND 3987; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 3988; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3989; GFX940-NEXT: s_waitcnt vmcnt(0) 3990; GFX940-NEXT: s_setpc_b64 s[30:31] 3991 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3992 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 3, i32 3> 3993 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3994 ret void 3995} 3996 3997define void @v_shuffle_v3i16_v4i16__5_3_3(ptr addrspace(1) inreg %ptr) { 3998; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_3_3: 3999; GFX900: ; %bb.0: 4000; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4001; GFX900-NEXT: ;;#ASMSTART 4002; GFX900-NEXT: ; def v[0:1] 4003; GFX900-NEXT: ;;#ASMEND 4004; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4005; GFX900-NEXT: v_mov_b32_e32 v4, 0 4006; GFX900-NEXT: ;;#ASMSTART 4007; GFX900-NEXT: ; def v[2:3] 4008; GFX900-NEXT: ;;#ASMEND 4009; GFX900-NEXT: v_perm_b32 v0, v1, v2, s4 4010; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4011; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4012; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 4013; GFX900-NEXT: s_waitcnt vmcnt(0) 4014; GFX900-NEXT: s_setpc_b64 s[30:31] 4015; 4016; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_3_3: 4017; GFX90A: ; %bb.0: 4018; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4019; GFX90A-NEXT: ;;#ASMSTART 4020; GFX90A-NEXT: ; def v[0:1] 4021; GFX90A-NEXT: ;;#ASMEND 4022; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4023; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4024; GFX90A-NEXT: ;;#ASMSTART 4025; GFX90A-NEXT: ; def v[2:3] 4026; GFX90A-NEXT: ;;#ASMEND 4027; GFX90A-NEXT: v_perm_b32 v0, v1, v2, s4 4028; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4029; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4030; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 4031; GFX90A-NEXT: s_waitcnt vmcnt(0) 4032; GFX90A-NEXT: s_setpc_b64 s[30:31] 4033; 4034; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_3_3: 4035; GFX940: ; %bb.0: 4036; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4037; GFX940-NEXT: ;;#ASMSTART 4038; GFX940-NEXT: ; def v[0:1] 4039; GFX940-NEXT: ;;#ASMEND 4040; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4041; GFX940-NEXT: v_mov_b32_e32 v4, 0 4042; GFX940-NEXT: ;;#ASMSTART 4043; GFX940-NEXT: ; def v[2:3] 4044; GFX940-NEXT: ;;#ASMEND 4045; GFX940-NEXT: s_nop 0 4046; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 4047; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4048; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4049; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 4050; GFX940-NEXT: s_waitcnt vmcnt(0) 4051; GFX940-NEXT: s_setpc_b64 s[30:31] 4052 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4053 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4054 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 3, i32 3> 4055 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4056 ret void 4057} 4058 4059define void @v_shuffle_v3i16_v4i16__6_3_3(ptr addrspace(1) inreg %ptr) { 4060; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_3_3: 4061; GFX900: ; %bb.0: 4062; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4063; GFX900-NEXT: ;;#ASMSTART 4064; GFX900-NEXT: ; def v[0:1] 4065; GFX900-NEXT: ;;#ASMEND 4066; GFX900-NEXT: s_mov_b32 s4, 0xffff 4067; GFX900-NEXT: v_mov_b32_e32 v4, 0 4068; GFX900-NEXT: ;;#ASMSTART 4069; GFX900-NEXT: ; def v[2:3] 4070; GFX900-NEXT: ;;#ASMEND 4071; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v1 4072; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4073; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 4074; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4075; GFX900-NEXT: s_waitcnt vmcnt(0) 4076; GFX900-NEXT: s_setpc_b64 s[30:31] 4077; 4078; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_3_3: 4079; GFX90A: ; %bb.0: 4080; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4081; GFX90A-NEXT: ;;#ASMSTART 4082; GFX90A-NEXT: ; def v[0:1] 4083; GFX90A-NEXT: ;;#ASMEND 4084; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4085; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4086; GFX90A-NEXT: ;;#ASMSTART 4087; GFX90A-NEXT: ; def v[2:3] 4088; GFX90A-NEXT: ;;#ASMEND 4089; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v1 4090; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4091; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 4092; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4093; GFX90A-NEXT: s_waitcnt vmcnt(0) 4094; GFX90A-NEXT: s_setpc_b64 s[30:31] 4095; 4096; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_3_3: 4097; GFX940: ; %bb.0: 4098; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4099; GFX940-NEXT: ;;#ASMSTART 4100; GFX940-NEXT: ; def v[0:1] 4101; GFX940-NEXT: ;;#ASMEND 4102; GFX940-NEXT: s_mov_b32 s2, 0xffff 4103; GFX940-NEXT: v_mov_b32_e32 v4, 0 4104; GFX940-NEXT: ;;#ASMSTART 4105; GFX940-NEXT: ; def v[2:3] 4106; GFX940-NEXT: ;;#ASMEND 4107; GFX940-NEXT: s_nop 0 4108; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 4109; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4110; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 4111; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4112; GFX940-NEXT: s_waitcnt vmcnt(0) 4113; GFX940-NEXT: s_setpc_b64 s[30:31] 4114 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4115 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4116 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 3, i32 3> 4117 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4118 ret void 4119} 4120 4121define void @v_shuffle_v3i16_v4i16__7_3_3(ptr addrspace(1) inreg %ptr) { 4122; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_3: 4123; GFX900: ; %bb.0: 4124; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4125; GFX900-NEXT: ;;#ASMSTART 4126; GFX900-NEXT: ; def v[0:1] 4127; GFX900-NEXT: ;;#ASMEND 4128; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4129; GFX900-NEXT: v_mov_b32_e32 v4, 0 4130; GFX900-NEXT: ;;#ASMSTART 4131; GFX900-NEXT: ; def v[2:3] 4132; GFX900-NEXT: ;;#ASMEND 4133; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 4134; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4135; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4136; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 4137; GFX900-NEXT: s_waitcnt vmcnt(0) 4138; GFX900-NEXT: s_setpc_b64 s[30:31] 4139; 4140; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_3: 4141; GFX90A: ; %bb.0: 4142; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4143; GFX90A-NEXT: ;;#ASMSTART 4144; GFX90A-NEXT: ; def v[0:1] 4145; GFX90A-NEXT: ;;#ASMEND 4146; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4147; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4148; GFX90A-NEXT: ;;#ASMSTART 4149; GFX90A-NEXT: ; def v[2:3] 4150; GFX90A-NEXT: ;;#ASMEND 4151; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 4152; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4153; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4154; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 4155; GFX90A-NEXT: s_waitcnt vmcnt(0) 4156; GFX90A-NEXT: s_setpc_b64 s[30:31] 4157; 4158; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_3: 4159; GFX940: ; %bb.0: 4160; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4161; GFX940-NEXT: ;;#ASMSTART 4162; GFX940-NEXT: ; def v[0:1] 4163; GFX940-NEXT: ;;#ASMEND 4164; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4165; GFX940-NEXT: v_mov_b32_e32 v4, 0 4166; GFX940-NEXT: ;;#ASMSTART 4167; GFX940-NEXT: ; def v[2:3] 4168; GFX940-NEXT: ;;#ASMEND 4169; GFX940-NEXT: s_nop 0 4170; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 4171; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4172; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4173; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 4174; GFX940-NEXT: s_waitcnt vmcnt(0) 4175; GFX940-NEXT: s_setpc_b64 s[30:31] 4176 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4177 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4178 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 3> 4179 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4180 ret void 4181} 4182 4183define void @v_shuffle_v3i16_v4i16__7_u_3(ptr addrspace(1) inreg %ptr) { 4184; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_3: 4185; GFX900: ; %bb.0: 4186; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4187; GFX900-NEXT: v_mov_b32_e32 v4, 0 4188; GFX900-NEXT: ;;#ASMSTART 4189; GFX900-NEXT: ; def v[0:1] 4190; GFX900-NEXT: ;;#ASMEND 4191; GFX900-NEXT: ;;#ASMSTART 4192; GFX900-NEXT: ; def v[2:3] 4193; GFX900-NEXT: ;;#ASMEND 4194; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16 4195; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4196; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4197; GFX900-NEXT: s_waitcnt vmcnt(0) 4198; GFX900-NEXT: s_setpc_b64 s[30:31] 4199; 4200; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_3: 4201; GFX90A: ; %bb.0: 4202; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4203; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4204; GFX90A-NEXT: ;;#ASMSTART 4205; GFX90A-NEXT: ; def v[0:1] 4206; GFX90A-NEXT: ;;#ASMEND 4207; GFX90A-NEXT: ;;#ASMSTART 4208; GFX90A-NEXT: ; def v[2:3] 4209; GFX90A-NEXT: ;;#ASMEND 4210; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16 4211; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4212; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4213; GFX90A-NEXT: s_waitcnt vmcnt(0) 4214; GFX90A-NEXT: s_setpc_b64 s[30:31] 4215; 4216; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_3: 4217; GFX940: ; %bb.0: 4218; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4219; GFX940-NEXT: v_mov_b32_e32 v4, 0 4220; GFX940-NEXT: ;;#ASMSTART 4221; GFX940-NEXT: ; def v[0:1] 4222; GFX940-NEXT: ;;#ASMEND 4223; GFX940-NEXT: ;;#ASMSTART 4224; GFX940-NEXT: ; def v[2:3] 4225; GFX940-NEXT: ;;#ASMEND 4226; GFX940-NEXT: s_nop 0 4227; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 4228; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4229; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4230; GFX940-NEXT: s_waitcnt vmcnt(0) 4231; GFX940-NEXT: s_setpc_b64 s[30:31] 4232 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4233 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4234 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 3> 4235 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4236 ret void 4237} 4238 4239define void @v_shuffle_v3i16_v4i16__7_0_3(ptr addrspace(1) inreg %ptr) { 4240; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_3: 4241; GFX900: ; %bb.0: 4242; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4243; GFX900-NEXT: v_mov_b32_e32 v4, 0 4244; GFX900-NEXT: ;;#ASMSTART 4245; GFX900-NEXT: ; def v[0:1] 4246; GFX900-NEXT: ;;#ASMEND 4247; GFX900-NEXT: ;;#ASMSTART 4248; GFX900-NEXT: ; def v[2:3] 4249; GFX900-NEXT: ;;#ASMEND 4250; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16 4251; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4252; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4253; GFX900-NEXT: s_waitcnt vmcnt(0) 4254; GFX900-NEXT: s_setpc_b64 s[30:31] 4255; 4256; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_3: 4257; GFX90A: ; %bb.0: 4258; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4259; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4260; GFX90A-NEXT: ;;#ASMSTART 4261; GFX90A-NEXT: ; def v[0:1] 4262; GFX90A-NEXT: ;;#ASMEND 4263; GFX90A-NEXT: ;;#ASMSTART 4264; GFX90A-NEXT: ; def v[2:3] 4265; GFX90A-NEXT: ;;#ASMEND 4266; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 4267; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4268; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4269; GFX90A-NEXT: s_waitcnt vmcnt(0) 4270; GFX90A-NEXT: s_setpc_b64 s[30:31] 4271; 4272; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_3: 4273; GFX940: ; %bb.0: 4274; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4275; GFX940-NEXT: v_mov_b32_e32 v4, 0 4276; GFX940-NEXT: ;;#ASMSTART 4277; GFX940-NEXT: ; def v[0:1] 4278; GFX940-NEXT: ;;#ASMEND 4279; GFX940-NEXT: ;;#ASMSTART 4280; GFX940-NEXT: ; def v[2:3] 4281; GFX940-NEXT: ;;#ASMEND 4282; GFX940-NEXT: s_nop 0 4283; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 4284; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4285; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4286; GFX940-NEXT: s_waitcnt vmcnt(0) 4287; GFX940-NEXT: s_setpc_b64 s[30:31] 4288 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4289 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4290 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 3> 4291 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4292 ret void 4293} 4294 4295define void @v_shuffle_v3i16_v4i16__7_1_3(ptr addrspace(1) inreg %ptr) { 4296; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_3: 4297; GFX900: ; %bb.0: 4298; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4299; GFX900-NEXT: v_mov_b32_e32 v4, 0 4300; GFX900-NEXT: ;;#ASMSTART 4301; GFX900-NEXT: ; def v[0:1] 4302; GFX900-NEXT: ;;#ASMEND 4303; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4304; GFX900-NEXT: ;;#ASMSTART 4305; GFX900-NEXT: ; def v[2:3] 4306; GFX900-NEXT: ;;#ASMEND 4307; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 4308; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4309; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4310; GFX900-NEXT: s_waitcnt vmcnt(0) 4311; GFX900-NEXT: s_setpc_b64 s[30:31] 4312; 4313; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_3: 4314; GFX90A: ; %bb.0: 4315; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4316; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4317; GFX90A-NEXT: ;;#ASMSTART 4318; GFX90A-NEXT: ; def v[0:1] 4319; GFX90A-NEXT: ;;#ASMEND 4320; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4321; GFX90A-NEXT: ;;#ASMSTART 4322; GFX90A-NEXT: ; def v[2:3] 4323; GFX90A-NEXT: ;;#ASMEND 4324; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 4325; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4326; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4327; GFX90A-NEXT: s_waitcnt vmcnt(0) 4328; GFX90A-NEXT: s_setpc_b64 s[30:31] 4329; 4330; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_3: 4331; GFX940: ; %bb.0: 4332; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4333; GFX940-NEXT: v_mov_b32_e32 v4, 0 4334; GFX940-NEXT: ;;#ASMSTART 4335; GFX940-NEXT: ; def v[0:1] 4336; GFX940-NEXT: ;;#ASMEND 4337; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4338; GFX940-NEXT: ;;#ASMSTART 4339; GFX940-NEXT: ; def v[2:3] 4340; GFX940-NEXT: ;;#ASMEND 4341; GFX940-NEXT: s_nop 0 4342; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 4343; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4344; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4345; GFX940-NEXT: s_waitcnt vmcnt(0) 4346; GFX940-NEXT: s_setpc_b64 s[30:31] 4347 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4348 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4349 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 3> 4350 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4351 ret void 4352} 4353 4354define void @v_shuffle_v3i16_v4i16__7_2_3(ptr addrspace(1) inreg %ptr) { 4355; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_3: 4356; GFX900: ; %bb.0: 4357; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4358; GFX900-NEXT: v_mov_b32_e32 v4, 0 4359; GFX900-NEXT: ;;#ASMSTART 4360; GFX900-NEXT: ; def v[0:1] 4361; GFX900-NEXT: ;;#ASMEND 4362; GFX900-NEXT: ;;#ASMSTART 4363; GFX900-NEXT: ; def v[2:3] 4364; GFX900-NEXT: ;;#ASMEND 4365; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 4366; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4367; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4368; GFX900-NEXT: s_waitcnt vmcnt(0) 4369; GFX900-NEXT: s_setpc_b64 s[30:31] 4370; 4371; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_3: 4372; GFX90A: ; %bb.0: 4373; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4374; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4375; GFX90A-NEXT: ;;#ASMSTART 4376; GFX90A-NEXT: ; def v[0:1] 4377; GFX90A-NEXT: ;;#ASMEND 4378; GFX90A-NEXT: ;;#ASMSTART 4379; GFX90A-NEXT: ; def v[2:3] 4380; GFX90A-NEXT: ;;#ASMEND 4381; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 4382; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4383; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4384; GFX90A-NEXT: s_waitcnt vmcnt(0) 4385; GFX90A-NEXT: s_setpc_b64 s[30:31] 4386; 4387; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_3: 4388; GFX940: ; %bb.0: 4389; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4390; GFX940-NEXT: v_mov_b32_e32 v4, 0 4391; GFX940-NEXT: ;;#ASMSTART 4392; GFX940-NEXT: ; def v[0:1] 4393; GFX940-NEXT: ;;#ASMEND 4394; GFX940-NEXT: ;;#ASMSTART 4395; GFX940-NEXT: ; def v[2:3] 4396; GFX940-NEXT: ;;#ASMEND 4397; GFX940-NEXT: s_nop 0 4398; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 4399; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4400; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4401; GFX940-NEXT: s_waitcnt vmcnt(0) 4402; GFX940-NEXT: s_setpc_b64 s[30:31] 4403 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4404 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4405 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 3> 4406 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4407 ret void 4408} 4409 4410define void @v_shuffle_v3i16_v4i16__7_4_3(ptr addrspace(1) inreg %ptr) { 4411; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_3: 4412; GFX900: ; %bb.0: 4413; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4414; GFX900-NEXT: v_mov_b32_e32 v4, 0 4415; GFX900-NEXT: ;;#ASMSTART 4416; GFX900-NEXT: ; def v[0:1] 4417; GFX900-NEXT: ;;#ASMEND 4418; GFX900-NEXT: ;;#ASMSTART 4419; GFX900-NEXT: ; def v[2:3] 4420; GFX900-NEXT: ;;#ASMEND 4421; GFX900-NEXT: v_alignbit_b32 v0, v2, v3, 16 4422; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4423; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4424; GFX900-NEXT: s_waitcnt vmcnt(0) 4425; GFX900-NEXT: s_setpc_b64 s[30:31] 4426; 4427; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_3: 4428; GFX90A: ; %bb.0: 4429; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4430; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4431; GFX90A-NEXT: ;;#ASMSTART 4432; GFX90A-NEXT: ; def v[0:1] 4433; GFX90A-NEXT: ;;#ASMEND 4434; GFX90A-NEXT: ;;#ASMSTART 4435; GFX90A-NEXT: ; def v[2:3] 4436; GFX90A-NEXT: ;;#ASMEND 4437; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16 4438; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4439; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4440; GFX90A-NEXT: s_waitcnt vmcnt(0) 4441; GFX90A-NEXT: s_setpc_b64 s[30:31] 4442; 4443; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_3: 4444; GFX940: ; %bb.0: 4445; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4446; GFX940-NEXT: v_mov_b32_e32 v4, 0 4447; GFX940-NEXT: ;;#ASMSTART 4448; GFX940-NEXT: ; def v[0:1] 4449; GFX940-NEXT: ;;#ASMEND 4450; GFX940-NEXT: ;;#ASMSTART 4451; GFX940-NEXT: ; def v[2:3] 4452; GFX940-NEXT: ;;#ASMEND 4453; GFX940-NEXT: s_nop 0 4454; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 4455; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4456; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4457; GFX940-NEXT: s_waitcnt vmcnt(0) 4458; GFX940-NEXT: s_setpc_b64 s[30:31] 4459 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4460 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4461 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 3> 4462 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4463 ret void 4464} 4465 4466define void @v_shuffle_v3i16_v4i16__7_5_3(ptr addrspace(1) inreg %ptr) { 4467; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_3: 4468; GFX900: ; %bb.0: 4469; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4470; GFX900-NEXT: v_mov_b32_e32 v4, 0 4471; GFX900-NEXT: ;;#ASMSTART 4472; GFX900-NEXT: ; def v[0:1] 4473; GFX900-NEXT: ;;#ASMEND 4474; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4475; GFX900-NEXT: ;;#ASMSTART 4476; GFX900-NEXT: ; def v[2:3] 4477; GFX900-NEXT: ;;#ASMEND 4478; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 4479; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4480; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4481; GFX900-NEXT: s_waitcnt vmcnt(0) 4482; GFX900-NEXT: s_setpc_b64 s[30:31] 4483; 4484; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_3: 4485; GFX90A: ; %bb.0: 4486; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4487; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4488; GFX90A-NEXT: ;;#ASMSTART 4489; GFX90A-NEXT: ; def v[0:1] 4490; GFX90A-NEXT: ;;#ASMEND 4491; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4492; GFX90A-NEXT: ;;#ASMSTART 4493; GFX90A-NEXT: ; def v[2:3] 4494; GFX90A-NEXT: ;;#ASMEND 4495; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 4496; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4497; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4498; GFX90A-NEXT: s_waitcnt vmcnt(0) 4499; GFX90A-NEXT: s_setpc_b64 s[30:31] 4500; 4501; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_3: 4502; GFX940: ; %bb.0: 4503; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4504; GFX940-NEXT: v_mov_b32_e32 v4, 0 4505; GFX940-NEXT: ;;#ASMSTART 4506; GFX940-NEXT: ; def v[0:1] 4507; GFX940-NEXT: ;;#ASMEND 4508; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4509; GFX940-NEXT: ;;#ASMSTART 4510; GFX940-NEXT: ; def v[2:3] 4511; GFX940-NEXT: ;;#ASMEND 4512; GFX940-NEXT: s_nop 0 4513; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 4514; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4515; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4516; GFX940-NEXT: s_waitcnt vmcnt(0) 4517; GFX940-NEXT: s_setpc_b64 s[30:31] 4518 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4519 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4520 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 3> 4521 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4522 ret void 4523} 4524 4525define void @v_shuffle_v3i16_v4i16__7_6_3(ptr addrspace(1) inreg %ptr) { 4526; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_3: 4527; GFX900: ; %bb.0: 4528; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4529; GFX900-NEXT: v_mov_b32_e32 v4, 0 4530; GFX900-NEXT: ;;#ASMSTART 4531; GFX900-NEXT: ; def v[0:1] 4532; GFX900-NEXT: ;;#ASMEND 4533; GFX900-NEXT: ;;#ASMSTART 4534; GFX900-NEXT: ; def v[2:3] 4535; GFX900-NEXT: ;;#ASMEND 4536; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16 4537; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4538; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4539; GFX900-NEXT: s_waitcnt vmcnt(0) 4540; GFX900-NEXT: s_setpc_b64 s[30:31] 4541; 4542; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_3: 4543; GFX90A: ; %bb.0: 4544; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4545; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4546; GFX90A-NEXT: ;;#ASMSTART 4547; GFX90A-NEXT: ; def v[0:1] 4548; GFX90A-NEXT: ;;#ASMEND 4549; GFX90A-NEXT: ;;#ASMSTART 4550; GFX90A-NEXT: ; def v[2:3] 4551; GFX90A-NEXT: ;;#ASMEND 4552; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16 4553; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4554; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4555; GFX90A-NEXT: s_waitcnt vmcnt(0) 4556; GFX90A-NEXT: s_setpc_b64 s[30:31] 4557; 4558; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_3: 4559; GFX940: ; %bb.0: 4560; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4561; GFX940-NEXT: v_mov_b32_e32 v4, 0 4562; GFX940-NEXT: ;;#ASMSTART 4563; GFX940-NEXT: ; def v[0:1] 4564; GFX940-NEXT: ;;#ASMEND 4565; GFX940-NEXT: ;;#ASMSTART 4566; GFX940-NEXT: ; def v[2:3] 4567; GFX940-NEXT: ;;#ASMEND 4568; GFX940-NEXT: s_nop 0 4569; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 4570; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4571; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4572; GFX940-NEXT: s_waitcnt vmcnt(0) 4573; GFX940-NEXT: s_setpc_b64 s[30:31] 4574 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4575 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4576 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 3> 4577 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4578 ret void 4579} 4580 4581define void @v_shuffle_v3i16_v4i16__u_4_4(ptr addrspace(1) inreg %ptr) { 4582; GFX9-LABEL: v_shuffle_v3i16_v4i16__u_4_4: 4583; GFX9: ; %bb.0: 4584; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4585; GFX9-NEXT: s_setpc_b64 s[30:31] 4586 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4587 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 4, i32 4> 4588 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4589 ret void 4590} 4591 4592define void @v_shuffle_v3i16_v4i16__0_4_4(ptr addrspace(1) inreg %ptr) { 4593; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_4_4: 4594; GFX900: ; %bb.0: 4595; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4596; GFX900-NEXT: v_mov_b32_e32 v2, 0 4597; GFX900-NEXT: ;;#ASMSTART 4598; GFX900-NEXT: ; def v[0:1] 4599; GFX900-NEXT: ;;#ASMEND 4600; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 4601; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 4602; GFX900-NEXT: s_waitcnt vmcnt(0) 4603; GFX900-NEXT: s_setpc_b64 s[30:31] 4604; 4605; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_4_4: 4606; GFX90A: ; %bb.0: 4607; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4608; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4609; GFX90A-NEXT: ;;#ASMSTART 4610; GFX90A-NEXT: ; def v[0:1] 4611; GFX90A-NEXT: ;;#ASMEND 4612; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 4613; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4614; GFX90A-NEXT: s_waitcnt vmcnt(0) 4615; GFX90A-NEXT: s_setpc_b64 s[30:31] 4616; 4617; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_4_4: 4618; GFX940: ; %bb.0: 4619; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4620; GFX940-NEXT: v_mov_b32_e32 v2, 0 4621; GFX940-NEXT: ;;#ASMSTART 4622; GFX940-NEXT: ; def v[0:1] 4623; GFX940-NEXT: ;;#ASMEND 4624; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 4625; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4626; GFX940-NEXT: s_waitcnt vmcnt(0) 4627; GFX940-NEXT: s_setpc_b64 s[30:31] 4628 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4629 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 4, i32 4> 4630 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4631 ret void 4632} 4633 4634define void @v_shuffle_v3i16_v4i16__1_4_4(ptr addrspace(1) inreg %ptr) { 4635; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_4_4: 4636; GFX900: ; %bb.0: 4637; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4638; GFX900-NEXT: ;;#ASMSTART 4639; GFX900-NEXT: ; def v[0:1] 4640; GFX900-NEXT: ;;#ASMEND 4641; GFX900-NEXT: v_mov_b32_e32 v2, 0 4642; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 4643; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 4644; GFX900-NEXT: s_waitcnt vmcnt(0) 4645; GFX900-NEXT: s_setpc_b64 s[30:31] 4646; 4647; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_4_4: 4648; GFX90A: ; %bb.0: 4649; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4650; GFX90A-NEXT: ;;#ASMSTART 4651; GFX90A-NEXT: ; def v[0:1] 4652; GFX90A-NEXT: ;;#ASMEND 4653; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4654; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 4655; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4656; GFX90A-NEXT: s_waitcnt vmcnt(0) 4657; GFX90A-NEXT: s_setpc_b64 s[30:31] 4658; 4659; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_4_4: 4660; GFX940: ; %bb.0: 4661; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4662; GFX940-NEXT: ;;#ASMSTART 4663; GFX940-NEXT: ; def v[0:1] 4664; GFX940-NEXT: ;;#ASMEND 4665; GFX940-NEXT: v_mov_b32_e32 v2, 0 4666; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 4667; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4668; GFX940-NEXT: s_waitcnt vmcnt(0) 4669; GFX940-NEXT: s_setpc_b64 s[30:31] 4670 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4671 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 4, i32 4> 4672 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4673 ret void 4674} 4675 4676define void @v_shuffle_v3i16_v4i16__2_4_4(ptr addrspace(1) inreg %ptr) { 4677; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_4_4: 4678; GFX900: ; %bb.0: 4679; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4680; GFX900-NEXT: v_mov_b32_e32 v2, 0 4681; GFX900-NEXT: ;;#ASMSTART 4682; GFX900-NEXT: ; def v[0:1] 4683; GFX900-NEXT: ;;#ASMEND 4684; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4685; GFX900-NEXT: s_waitcnt vmcnt(0) 4686; GFX900-NEXT: s_setpc_b64 s[30:31] 4687; 4688; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_4_4: 4689; GFX90A: ; %bb.0: 4690; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4691; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4692; GFX90A-NEXT: ;;#ASMSTART 4693; GFX90A-NEXT: ; def v[0:1] 4694; GFX90A-NEXT: ;;#ASMEND 4695; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4696; GFX90A-NEXT: s_waitcnt vmcnt(0) 4697; GFX90A-NEXT: s_setpc_b64 s[30:31] 4698; 4699; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_4_4: 4700; GFX940: ; %bb.0: 4701; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4702; GFX940-NEXT: v_mov_b32_e32 v2, 0 4703; GFX940-NEXT: ;;#ASMSTART 4704; GFX940-NEXT: ; def v[0:1] 4705; GFX940-NEXT: ;;#ASMEND 4706; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4707; GFX940-NEXT: s_waitcnt vmcnt(0) 4708; GFX940-NEXT: s_setpc_b64 s[30:31] 4709 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4710 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 4, i32 4> 4711 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4712 ret void 4713} 4714 4715define void @v_shuffle_v3i16_v4i16__3_4_4(ptr addrspace(1) inreg %ptr) { 4716; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_4_4: 4717; GFX900: ; %bb.0: 4718; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4719; GFX900-NEXT: ;;#ASMSTART 4720; GFX900-NEXT: ; def v[0:1] 4721; GFX900-NEXT: ;;#ASMEND 4722; GFX900-NEXT: v_mov_b32_e32 v2, 0 4723; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 4724; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 4725; GFX900-NEXT: s_waitcnt vmcnt(0) 4726; GFX900-NEXT: s_setpc_b64 s[30:31] 4727; 4728; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_4_4: 4729; GFX90A: ; %bb.0: 4730; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4731; GFX90A-NEXT: ;;#ASMSTART 4732; GFX90A-NEXT: ; def v[0:1] 4733; GFX90A-NEXT: ;;#ASMEND 4734; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4735; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 4736; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4737; GFX90A-NEXT: s_waitcnt vmcnt(0) 4738; GFX90A-NEXT: s_setpc_b64 s[30:31] 4739; 4740; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_4_4: 4741; GFX940: ; %bb.0: 4742; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4743; GFX940-NEXT: ;;#ASMSTART 4744; GFX940-NEXT: ; def v[0:1] 4745; GFX940-NEXT: ;;#ASMEND 4746; GFX940-NEXT: v_mov_b32_e32 v2, 0 4747; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 4748; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4749; GFX940-NEXT: s_waitcnt vmcnt(0) 4750; GFX940-NEXT: s_setpc_b64 s[30:31] 4751 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4752 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 4, i32 4> 4753 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4754 ret void 4755} 4756 4757define void @v_shuffle_v3i16_v4i16__4_4_4(ptr addrspace(1) inreg %ptr) { 4758; GFX9-LABEL: v_shuffle_v3i16_v4i16__4_4_4: 4759; GFX9: ; %bb.0: 4760; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4761; GFX9-NEXT: s_setpc_b64 s[30:31] 4762 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4763 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 4, i32 4> 4764 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4765 ret void 4766} 4767 4768define void @v_shuffle_v3i16_v4i16__5_4_4(ptr addrspace(1) inreg %ptr) { 4769; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_4_4: 4770; GFX900: ; %bb.0: 4771; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4772; GFX900-NEXT: v_mov_b32_e32 v2, 0 4773; GFX900-NEXT: ;;#ASMSTART 4774; GFX900-NEXT: ; def v[0:1] 4775; GFX900-NEXT: ;;#ASMEND 4776; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16 4777; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 4778; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4779; GFX900-NEXT: s_waitcnt vmcnt(0) 4780; GFX900-NEXT: s_setpc_b64 s[30:31] 4781; 4782; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_4_4: 4783; GFX90A: ; %bb.0: 4784; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4785; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4786; GFX90A-NEXT: ;;#ASMSTART 4787; GFX90A-NEXT: ; def v[0:1] 4788; GFX90A-NEXT: ;;#ASMEND 4789; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16 4790; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 4791; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4792; GFX90A-NEXT: s_waitcnt vmcnt(0) 4793; GFX90A-NEXT: s_setpc_b64 s[30:31] 4794; 4795; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_4_4: 4796; GFX940: ; %bb.0: 4797; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4798; GFX940-NEXT: v_mov_b32_e32 v2, 0 4799; GFX940-NEXT: ;;#ASMSTART 4800; GFX940-NEXT: ; def v[0:1] 4801; GFX940-NEXT: ;;#ASMEND 4802; GFX940-NEXT: s_nop 0 4803; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 4804; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 4805; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4806; GFX940-NEXT: s_waitcnt vmcnt(0) 4807; GFX940-NEXT: s_setpc_b64 s[30:31] 4808 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4809 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4810 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 4, i32 4> 4811 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4812 ret void 4813} 4814 4815define void @v_shuffle_v3i16_v4i16__6_4_4(ptr addrspace(1) inreg %ptr) { 4816; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_4_4: 4817; GFX900: ; %bb.0: 4818; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4819; GFX900-NEXT: v_mov_b32_e32 v2, 0 4820; GFX900-NEXT: ;;#ASMSTART 4821; GFX900-NEXT: ; def v[0:1] 4822; GFX900-NEXT: ;;#ASMEND 4823; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4824; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 4825; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 4826; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4827; GFX900-NEXT: s_waitcnt vmcnt(0) 4828; GFX900-NEXT: s_setpc_b64 s[30:31] 4829; 4830; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_4_4: 4831; GFX90A: ; %bb.0: 4832; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4833; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4834; GFX90A-NEXT: ;;#ASMSTART 4835; GFX90A-NEXT: ; def v[0:1] 4836; GFX90A-NEXT: ;;#ASMEND 4837; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4838; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 4839; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 4840; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4841; GFX90A-NEXT: s_waitcnt vmcnt(0) 4842; GFX90A-NEXT: s_setpc_b64 s[30:31] 4843; 4844; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_4_4: 4845; GFX940: ; %bb.0: 4846; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4847; GFX940-NEXT: v_mov_b32_e32 v2, 0 4848; GFX940-NEXT: ;;#ASMSTART 4849; GFX940-NEXT: ; def v[0:1] 4850; GFX940-NEXT: ;;#ASMEND 4851; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4852; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 4853; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 4854; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4855; GFX940-NEXT: s_waitcnt vmcnt(0) 4856; GFX940-NEXT: s_setpc_b64 s[30:31] 4857 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4858 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4859 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 4, i32 4> 4860 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4861 ret void 4862} 4863 4864define void @v_shuffle_v3i16_v4i16__7_4_4(ptr addrspace(1) inreg %ptr) { 4865; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_4: 4866; GFX900: ; %bb.0: 4867; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4868; GFX900-NEXT: v_mov_b32_e32 v2, 0 4869; GFX900-NEXT: ;;#ASMSTART 4870; GFX900-NEXT: ; def v[0:1] 4871; GFX900-NEXT: ;;#ASMEND 4872; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 4873; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 4874; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4875; GFX900-NEXT: s_waitcnt vmcnt(0) 4876; GFX900-NEXT: s_setpc_b64 s[30:31] 4877; 4878; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_4: 4879; GFX90A: ; %bb.0: 4880; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4881; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4882; GFX90A-NEXT: ;;#ASMSTART 4883; GFX90A-NEXT: ; def v[0:1] 4884; GFX90A-NEXT: ;;#ASMEND 4885; GFX90A-NEXT: v_alignbit_b32 v1, v0, v1, 16 4886; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 4887; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4888; GFX90A-NEXT: s_waitcnt vmcnt(0) 4889; GFX90A-NEXT: s_setpc_b64 s[30:31] 4890; 4891; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_4: 4892; GFX940: ; %bb.0: 4893; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4894; GFX940-NEXT: v_mov_b32_e32 v2, 0 4895; GFX940-NEXT: ;;#ASMSTART 4896; GFX940-NEXT: ; def v[0:1] 4897; GFX940-NEXT: ;;#ASMEND 4898; GFX940-NEXT: s_nop 0 4899; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 4900; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 4901; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4902; GFX940-NEXT: s_waitcnt vmcnt(0) 4903; GFX940-NEXT: s_setpc_b64 s[30:31] 4904 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4905 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4906 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 4> 4907 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4908 ret void 4909} 4910 4911define void @v_shuffle_v3i16_v4i16__7_u_4(ptr addrspace(1) inreg %ptr) { 4912; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_4: 4913; GFX900: ; %bb.0: 4914; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4915; GFX900-NEXT: v_mov_b32_e32 v2, 0 4916; GFX900-NEXT: ;;#ASMSTART 4917; GFX900-NEXT: ; def v[0:1] 4918; GFX900-NEXT: ;;#ASMEND 4919; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16 4920; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 4921; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4922; GFX900-NEXT: s_waitcnt vmcnt(0) 4923; GFX900-NEXT: s_setpc_b64 s[30:31] 4924; 4925; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_4: 4926; GFX90A: ; %bb.0: 4927; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4928; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4929; GFX90A-NEXT: ;;#ASMSTART 4930; GFX90A-NEXT: ; def v[0:1] 4931; GFX90A-NEXT: ;;#ASMEND 4932; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16 4933; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 4934; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4935; GFX90A-NEXT: s_waitcnt vmcnt(0) 4936; GFX90A-NEXT: s_setpc_b64 s[30:31] 4937; 4938; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_4: 4939; GFX940: ; %bb.0: 4940; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4941; GFX940-NEXT: v_mov_b32_e32 v2, 0 4942; GFX940-NEXT: ;;#ASMSTART 4943; GFX940-NEXT: ; def v[0:1] 4944; GFX940-NEXT: ;;#ASMEND 4945; GFX940-NEXT: s_nop 0 4946; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 4947; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 4948; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4949; GFX940-NEXT: s_waitcnt vmcnt(0) 4950; GFX940-NEXT: s_setpc_b64 s[30:31] 4951 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4952 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4953 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 4> 4954 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4955 ret void 4956} 4957 4958define void @v_shuffle_v3i16_v4i16__7_0_4(ptr addrspace(1) inreg %ptr) { 4959; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_4: 4960; GFX900: ; %bb.0: 4961; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4962; GFX900-NEXT: ;;#ASMSTART 4963; GFX900-NEXT: ; def v[0:1] 4964; GFX900-NEXT: ;;#ASMEND 4965; GFX900-NEXT: v_mov_b32_e32 v3, 0 4966; GFX900-NEXT: ;;#ASMSTART 4967; GFX900-NEXT: ; def v[1:2] 4968; GFX900-NEXT: ;;#ASMEND 4969; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 4970; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 4971; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 4972; GFX900-NEXT: s_waitcnt vmcnt(0) 4973; GFX900-NEXT: s_setpc_b64 s[30:31] 4974; 4975; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_4: 4976; GFX90A: ; %bb.0: 4977; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4978; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4979; GFX90A-NEXT: ;;#ASMSTART 4980; GFX90A-NEXT: ; def v[0:1] 4981; GFX90A-NEXT: ;;#ASMEND 4982; GFX90A-NEXT: ;;#ASMSTART 4983; GFX90A-NEXT: ; def v[2:3] 4984; GFX90A-NEXT: ;;#ASMEND 4985; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 4986; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4 4987; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4988; GFX90A-NEXT: s_waitcnt vmcnt(0) 4989; GFX90A-NEXT: s_setpc_b64 s[30:31] 4990; 4991; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_4: 4992; GFX940: ; %bb.0: 4993; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4994; GFX940-NEXT: v_mov_b32_e32 v4, 0 4995; GFX940-NEXT: ;;#ASMSTART 4996; GFX940-NEXT: ; def v[0:1] 4997; GFX940-NEXT: ;;#ASMEND 4998; GFX940-NEXT: ;;#ASMSTART 4999; GFX940-NEXT: ; def v[2:3] 5000; GFX940-NEXT: ;;#ASMEND 5001; GFX940-NEXT: s_nop 0 5002; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 5003; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 5004; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5005; GFX940-NEXT: s_waitcnt vmcnt(0) 5006; GFX940-NEXT: s_setpc_b64 s[30:31] 5007 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5008 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5009 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 4> 5010 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5011 ret void 5012} 5013 5014define void @v_shuffle_v3i16_v4i16__7_1_4(ptr addrspace(1) inreg %ptr) { 5015; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_4: 5016; GFX900: ; %bb.0: 5017; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5018; GFX900-NEXT: ;;#ASMSTART 5019; GFX900-NEXT: ; def v[0:1] 5020; GFX900-NEXT: ;;#ASMEND 5021; GFX900-NEXT: v_mov_b32_e32 v3, 0 5022; GFX900-NEXT: ;;#ASMSTART 5023; GFX900-NEXT: ; def v[1:2] 5024; GFX900-NEXT: ;;#ASMEND 5025; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5026; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 5027; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 5028; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 5029; GFX900-NEXT: s_waitcnt vmcnt(0) 5030; GFX900-NEXT: s_setpc_b64 s[30:31] 5031; 5032; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_4: 5033; GFX90A: ; %bb.0: 5034; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5035; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5036; GFX90A-NEXT: ;;#ASMSTART 5037; GFX90A-NEXT: ; def v[0:1] 5038; GFX90A-NEXT: ;;#ASMEND 5039; GFX90A-NEXT: ;;#ASMSTART 5040; GFX90A-NEXT: ; def v[2:3] 5041; GFX90A-NEXT: ;;#ASMEND 5042; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5043; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 5044; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4 5045; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5046; GFX90A-NEXT: s_waitcnt vmcnt(0) 5047; GFX90A-NEXT: s_setpc_b64 s[30:31] 5048; 5049; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_4: 5050; GFX940: ; %bb.0: 5051; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5052; GFX940-NEXT: v_mov_b32_e32 v4, 0 5053; GFX940-NEXT: ;;#ASMSTART 5054; GFX940-NEXT: ; def v[0:1] 5055; GFX940-NEXT: ;;#ASMEND 5056; GFX940-NEXT: ;;#ASMSTART 5057; GFX940-NEXT: ; def v[2:3] 5058; GFX940-NEXT: ;;#ASMEND 5059; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5060; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 5061; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 5062; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5063; GFX940-NEXT: s_waitcnt vmcnt(0) 5064; GFX940-NEXT: s_setpc_b64 s[30:31] 5065 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5066 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5067 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 4> 5068 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5069 ret void 5070} 5071 5072define void @v_shuffle_v3i16_v4i16__7_2_4(ptr addrspace(1) inreg %ptr) { 5073; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_4: 5074; GFX900: ; %bb.0: 5075; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5076; GFX900-NEXT: v_mov_b32_e32 v4, 0 5077; GFX900-NEXT: ;;#ASMSTART 5078; GFX900-NEXT: ; def v[0:1] 5079; GFX900-NEXT: ;;#ASMEND 5080; GFX900-NEXT: ;;#ASMSTART 5081; GFX900-NEXT: ; def v[2:3] 5082; GFX900-NEXT: ;;#ASMEND 5083; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 5084; GFX900-NEXT: global_store_short v4, v2, s[16:17] offset:4 5085; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5086; GFX900-NEXT: s_waitcnt vmcnt(0) 5087; GFX900-NEXT: s_setpc_b64 s[30:31] 5088; 5089; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_4: 5090; GFX90A: ; %bb.0: 5091; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5092; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5093; GFX90A-NEXT: ;;#ASMSTART 5094; GFX90A-NEXT: ; def v[0:1] 5095; GFX90A-NEXT: ;;#ASMEND 5096; GFX90A-NEXT: ;;#ASMSTART 5097; GFX90A-NEXT: ; def v[2:3] 5098; GFX90A-NEXT: ;;#ASMEND 5099; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 5100; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4 5101; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5102; GFX90A-NEXT: s_waitcnt vmcnt(0) 5103; GFX90A-NEXT: s_setpc_b64 s[30:31] 5104; 5105; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_4: 5106; GFX940: ; %bb.0: 5107; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5108; GFX940-NEXT: v_mov_b32_e32 v4, 0 5109; GFX940-NEXT: ;;#ASMSTART 5110; GFX940-NEXT: ; def v[0:1] 5111; GFX940-NEXT: ;;#ASMEND 5112; GFX940-NEXT: ;;#ASMSTART 5113; GFX940-NEXT: ; def v[2:3] 5114; GFX940-NEXT: ;;#ASMEND 5115; GFX940-NEXT: s_nop 0 5116; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 5117; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 5118; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5119; GFX940-NEXT: s_waitcnt vmcnt(0) 5120; GFX940-NEXT: s_setpc_b64 s[30:31] 5121 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5122 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5123 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 4> 5124 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5125 ret void 5126} 5127 5128define void @v_shuffle_v3i16_v4i16__7_3_4(ptr addrspace(1) inreg %ptr) { 5129; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_4: 5130; GFX900: ; %bb.0: 5131; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5132; GFX900-NEXT: v_mov_b32_e32 v4, 0 5133; GFX900-NEXT: ;;#ASMSTART 5134; GFX900-NEXT: ; def v[0:1] 5135; GFX900-NEXT: ;;#ASMEND 5136; GFX900-NEXT: ;;#ASMSTART 5137; GFX900-NEXT: ; def v[2:3] 5138; GFX900-NEXT: ;;#ASMEND 5139; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5140; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 5141; GFX900-NEXT: global_store_short v4, v2, s[16:17] offset:4 5142; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5143; GFX900-NEXT: s_waitcnt vmcnt(0) 5144; GFX900-NEXT: s_setpc_b64 s[30:31] 5145; 5146; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_4: 5147; GFX90A: ; %bb.0: 5148; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5149; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5150; GFX90A-NEXT: ;;#ASMSTART 5151; GFX90A-NEXT: ; def v[0:1] 5152; GFX90A-NEXT: ;;#ASMEND 5153; GFX90A-NEXT: ;;#ASMSTART 5154; GFX90A-NEXT: ; def v[2:3] 5155; GFX90A-NEXT: ;;#ASMEND 5156; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5157; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 5158; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4 5159; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5160; GFX90A-NEXT: s_waitcnt vmcnt(0) 5161; GFX90A-NEXT: s_setpc_b64 s[30:31] 5162; 5163; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_4: 5164; GFX940: ; %bb.0: 5165; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5166; GFX940-NEXT: v_mov_b32_e32 v4, 0 5167; GFX940-NEXT: ;;#ASMSTART 5168; GFX940-NEXT: ; def v[0:1] 5169; GFX940-NEXT: ;;#ASMEND 5170; GFX940-NEXT: ;;#ASMSTART 5171; GFX940-NEXT: ; def v[2:3] 5172; GFX940-NEXT: ;;#ASMEND 5173; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5174; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 5175; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 5176; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5177; GFX940-NEXT: s_waitcnt vmcnt(0) 5178; GFX940-NEXT: s_setpc_b64 s[30:31] 5179 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5180 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5181 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 4> 5182 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5183 ret void 5184} 5185 5186define void @v_shuffle_v3i16_v4i16__7_5_4(ptr addrspace(1) inreg %ptr) { 5187; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_4: 5188; GFX900: ; %bb.0: 5189; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5190; GFX900-NEXT: v_mov_b32_e32 v2, 0 5191; GFX900-NEXT: ;;#ASMSTART 5192; GFX900-NEXT: ; def v[0:1] 5193; GFX900-NEXT: ;;#ASMEND 5194; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5195; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 5196; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5197; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5198; GFX900-NEXT: s_waitcnt vmcnt(0) 5199; GFX900-NEXT: s_setpc_b64 s[30:31] 5200; 5201; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_4: 5202; GFX90A: ; %bb.0: 5203; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5204; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5205; GFX90A-NEXT: ;;#ASMSTART 5206; GFX90A-NEXT: ; def v[0:1] 5207; GFX90A-NEXT: ;;#ASMEND 5208; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5209; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 5210; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5211; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5212; GFX90A-NEXT: s_waitcnt vmcnt(0) 5213; GFX90A-NEXT: s_setpc_b64 s[30:31] 5214; 5215; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_4: 5216; GFX940: ; %bb.0: 5217; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5218; GFX940-NEXT: v_mov_b32_e32 v2, 0 5219; GFX940-NEXT: ;;#ASMSTART 5220; GFX940-NEXT: ; def v[0:1] 5221; GFX940-NEXT: ;;#ASMEND 5222; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5223; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 5224; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5225; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5226; GFX940-NEXT: s_waitcnt vmcnt(0) 5227; GFX940-NEXT: s_setpc_b64 s[30:31] 5228 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5229 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5230 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 4> 5231 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5232 ret void 5233} 5234 5235define void @v_shuffle_v3i16_v4i16__7_6_4(ptr addrspace(1) inreg %ptr) { 5236; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_4: 5237; GFX900: ; %bb.0: 5238; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5239; GFX900-NEXT: v_mov_b32_e32 v2, 0 5240; GFX900-NEXT: ;;#ASMSTART 5241; GFX900-NEXT: ; def v[0:1] 5242; GFX900-NEXT: ;;#ASMEND 5243; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16 5244; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5245; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5246; GFX900-NEXT: s_waitcnt vmcnt(0) 5247; GFX900-NEXT: s_setpc_b64 s[30:31] 5248; 5249; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_4: 5250; GFX90A: ; %bb.0: 5251; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5252; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5253; GFX90A-NEXT: ;;#ASMSTART 5254; GFX90A-NEXT: ; def v[0:1] 5255; GFX90A-NEXT: ;;#ASMEND 5256; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16 5257; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5258; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5259; GFX90A-NEXT: s_waitcnt vmcnt(0) 5260; GFX90A-NEXT: s_setpc_b64 s[30:31] 5261; 5262; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_4: 5263; GFX940: ; %bb.0: 5264; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5265; GFX940-NEXT: v_mov_b32_e32 v2, 0 5266; GFX940-NEXT: ;;#ASMSTART 5267; GFX940-NEXT: ; def v[0:1] 5268; GFX940-NEXT: ;;#ASMEND 5269; GFX940-NEXT: s_nop 0 5270; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 5271; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5272; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5273; GFX940-NEXT: s_waitcnt vmcnt(0) 5274; GFX940-NEXT: s_setpc_b64 s[30:31] 5275 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5276 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5277 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 4> 5278 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5279 ret void 5280} 5281 5282define void @v_shuffle_v3i16_v4i16__u_5_5(ptr addrspace(1) inreg %ptr) { 5283; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_5_5: 5284; GFX900: ; %bb.0: 5285; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5286; GFX900-NEXT: v_mov_b32_e32 v2, 0 5287; GFX900-NEXT: ;;#ASMSTART 5288; GFX900-NEXT: ; def v[0:1] 5289; GFX900-NEXT: ;;#ASMEND 5290; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 5291; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 5292; GFX900-NEXT: s_waitcnt vmcnt(0) 5293; GFX900-NEXT: s_setpc_b64 s[30:31] 5294; 5295; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_5_5: 5296; GFX90A: ; %bb.0: 5297; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5298; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5299; GFX90A-NEXT: ;;#ASMSTART 5300; GFX90A-NEXT: ; def v[0:1] 5301; GFX90A-NEXT: ;;#ASMEND 5302; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 5303; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 5304; GFX90A-NEXT: s_waitcnt vmcnt(0) 5305; GFX90A-NEXT: s_setpc_b64 s[30:31] 5306; 5307; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_5_5: 5308; GFX940: ; %bb.0: 5309; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5310; GFX940-NEXT: v_mov_b32_e32 v2, 0 5311; GFX940-NEXT: ;;#ASMSTART 5312; GFX940-NEXT: ; def v[0:1] 5313; GFX940-NEXT: ;;#ASMEND 5314; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 5315; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 5316; GFX940-NEXT: s_waitcnt vmcnt(0) 5317; GFX940-NEXT: s_setpc_b64 s[30:31] 5318 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5319 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5320 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 5, i32 5> 5321 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5322 ret void 5323} 5324 5325define void @v_shuffle_v3i16_v4i16__0_5_5(ptr addrspace(1) inreg %ptr) { 5326; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_5_5: 5327; GFX900: ; %bb.0: 5328; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5329; GFX900-NEXT: ;;#ASMSTART 5330; GFX900-NEXT: ; def v[0:1] 5331; GFX900-NEXT: ;;#ASMEND 5332; GFX900-NEXT: s_mov_b32 s4, 0xffff 5333; GFX900-NEXT: v_mov_b32_e32 v3, 0 5334; GFX900-NEXT: ;;#ASMSTART 5335; GFX900-NEXT: ; def v[1:2] 5336; GFX900-NEXT: ;;#ASMEND 5337; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 5338; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 5339; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 5340; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 5341; GFX900-NEXT: s_waitcnt vmcnt(0) 5342; GFX900-NEXT: s_setpc_b64 s[30:31] 5343; 5344; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_5_5: 5345; GFX90A: ; %bb.0: 5346; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5347; GFX90A-NEXT: ;;#ASMSTART 5348; GFX90A-NEXT: ; def v[0:1] 5349; GFX90A-NEXT: ;;#ASMEND 5350; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5351; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5352; GFX90A-NEXT: ;;#ASMSTART 5353; GFX90A-NEXT: ; def v[2:3] 5354; GFX90A-NEXT: ;;#ASMEND 5355; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2 5356; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5357; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v2 5358; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 5359; GFX90A-NEXT: s_waitcnt vmcnt(0) 5360; GFX90A-NEXT: s_setpc_b64 s[30:31] 5361; 5362; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_5_5: 5363; GFX940: ; %bb.0: 5364; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5365; GFX940-NEXT: ;;#ASMSTART 5366; GFX940-NEXT: ; def v[0:1] 5367; GFX940-NEXT: ;;#ASMEND 5368; GFX940-NEXT: s_mov_b32 s2, 0xffff 5369; GFX940-NEXT: v_mov_b32_e32 v4, 0 5370; GFX940-NEXT: ;;#ASMSTART 5371; GFX940-NEXT: ; def v[2:3] 5372; GFX940-NEXT: ;;#ASMEND 5373; GFX940-NEXT: s_nop 0 5374; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 5375; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5376; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 5377; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 5378; GFX940-NEXT: s_waitcnt vmcnt(0) 5379; GFX940-NEXT: s_setpc_b64 s[30:31] 5380 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5381 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5382 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 5, i32 5> 5383 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5384 ret void 5385} 5386 5387define void @v_shuffle_v3i16_v4i16__1_5_5(ptr addrspace(1) inreg %ptr) { 5388; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_5_5: 5389; GFX900: ; %bb.0: 5390; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5391; GFX900-NEXT: ;;#ASMSTART 5392; GFX900-NEXT: ; def v[0:1] 5393; GFX900-NEXT: ;;#ASMEND 5394; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5395; GFX900-NEXT: v_mov_b32_e32 v3, 0 5396; GFX900-NEXT: ;;#ASMSTART 5397; GFX900-NEXT: ; def v[1:2] 5398; GFX900-NEXT: ;;#ASMEND 5399; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 5400; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 5401; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 5402; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 5403; GFX900-NEXT: s_waitcnt vmcnt(0) 5404; GFX900-NEXT: s_setpc_b64 s[30:31] 5405; 5406; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_5_5: 5407; GFX90A: ; %bb.0: 5408; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5409; GFX90A-NEXT: ;;#ASMSTART 5410; GFX90A-NEXT: ; def v[0:1] 5411; GFX90A-NEXT: ;;#ASMEND 5412; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5413; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5414; GFX90A-NEXT: ;;#ASMSTART 5415; GFX90A-NEXT: ; def v[2:3] 5416; GFX90A-NEXT: ;;#ASMEND 5417; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 5418; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5419; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v2 5420; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 5421; GFX90A-NEXT: s_waitcnt vmcnt(0) 5422; GFX90A-NEXT: s_setpc_b64 s[30:31] 5423; 5424; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_5_5: 5425; GFX940: ; %bb.0: 5426; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5427; GFX940-NEXT: ;;#ASMSTART 5428; GFX940-NEXT: ; def v[0:1] 5429; GFX940-NEXT: ;;#ASMEND 5430; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5431; GFX940-NEXT: v_mov_b32_e32 v4, 0 5432; GFX940-NEXT: ;;#ASMSTART 5433; GFX940-NEXT: ; def v[2:3] 5434; GFX940-NEXT: ;;#ASMEND 5435; GFX940-NEXT: s_nop 0 5436; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 5437; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5438; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 5439; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 5440; GFX940-NEXT: s_waitcnt vmcnt(0) 5441; GFX940-NEXT: s_setpc_b64 s[30:31] 5442 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5443 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5444 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 5, i32 5> 5445 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5446 ret void 5447} 5448 5449define void @v_shuffle_v3i16_v4i16__2_5_5(ptr addrspace(1) inreg %ptr) { 5450; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_5_5: 5451; GFX900: ; %bb.0: 5452; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5453; GFX900-NEXT: ;;#ASMSTART 5454; GFX900-NEXT: ; def v[0:1] 5455; GFX900-NEXT: ;;#ASMEND 5456; GFX900-NEXT: s_mov_b32 s4, 0xffff 5457; GFX900-NEXT: v_mov_b32_e32 v4, 0 5458; GFX900-NEXT: ;;#ASMSTART 5459; GFX900-NEXT: ; def v[2:3] 5460; GFX900-NEXT: ;;#ASMEND 5461; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2 5462; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5463; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 5464; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5465; GFX900-NEXT: s_waitcnt vmcnt(0) 5466; GFX900-NEXT: s_setpc_b64 s[30:31] 5467; 5468; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_5_5: 5469; GFX90A: ; %bb.0: 5470; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5471; GFX90A-NEXT: ;;#ASMSTART 5472; GFX90A-NEXT: ; def v[0:1] 5473; GFX90A-NEXT: ;;#ASMEND 5474; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5475; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5476; GFX90A-NEXT: ;;#ASMSTART 5477; GFX90A-NEXT: ; def v[2:3] 5478; GFX90A-NEXT: ;;#ASMEND 5479; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2 5480; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5481; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 5482; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5483; GFX90A-NEXT: s_waitcnt vmcnt(0) 5484; GFX90A-NEXT: s_setpc_b64 s[30:31] 5485; 5486; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_5_5: 5487; GFX940: ; %bb.0: 5488; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5489; GFX940-NEXT: ;;#ASMSTART 5490; GFX940-NEXT: ; def v[0:1] 5491; GFX940-NEXT: ;;#ASMEND 5492; GFX940-NEXT: s_mov_b32 s2, 0xffff 5493; GFX940-NEXT: v_mov_b32_e32 v4, 0 5494; GFX940-NEXT: ;;#ASMSTART 5495; GFX940-NEXT: ; def v[2:3] 5496; GFX940-NEXT: ;;#ASMEND 5497; GFX940-NEXT: s_nop 0 5498; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 5499; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5500; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 5501; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5502; GFX940-NEXT: s_waitcnt vmcnt(0) 5503; GFX940-NEXT: s_setpc_b64 s[30:31] 5504 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5505 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5506 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 5, i32 5> 5507 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5508 ret void 5509} 5510 5511define void @v_shuffle_v3i16_v4i16__3_5_5(ptr addrspace(1) inreg %ptr) { 5512; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_5_5: 5513; GFX900: ; %bb.0: 5514; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5515; GFX900-NEXT: ;;#ASMSTART 5516; GFX900-NEXT: ; def v[0:1] 5517; GFX900-NEXT: ;;#ASMEND 5518; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5519; GFX900-NEXT: v_mov_b32_e32 v4, 0 5520; GFX900-NEXT: ;;#ASMSTART 5521; GFX900-NEXT: ; def v[2:3] 5522; GFX900-NEXT: ;;#ASMEND 5523; GFX900-NEXT: v_perm_b32 v0, v2, v1, s4 5524; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5525; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5526; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 5527; GFX900-NEXT: s_waitcnt vmcnt(0) 5528; GFX900-NEXT: s_setpc_b64 s[30:31] 5529; 5530; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_5_5: 5531; GFX90A: ; %bb.0: 5532; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5533; GFX90A-NEXT: ;;#ASMSTART 5534; GFX90A-NEXT: ; def v[0:1] 5535; GFX90A-NEXT: ;;#ASMEND 5536; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5537; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5538; GFX90A-NEXT: ;;#ASMSTART 5539; GFX90A-NEXT: ; def v[2:3] 5540; GFX90A-NEXT: ;;#ASMEND 5541; GFX90A-NEXT: v_perm_b32 v0, v2, v1, s4 5542; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5543; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5544; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 5545; GFX90A-NEXT: s_waitcnt vmcnt(0) 5546; GFX90A-NEXT: s_setpc_b64 s[30:31] 5547; 5548; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_5_5: 5549; GFX940: ; %bb.0: 5550; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5551; GFX940-NEXT: ;;#ASMSTART 5552; GFX940-NEXT: ; def v[0:1] 5553; GFX940-NEXT: ;;#ASMEND 5554; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5555; GFX940-NEXT: v_mov_b32_e32 v4, 0 5556; GFX940-NEXT: ;;#ASMSTART 5557; GFX940-NEXT: ; def v[2:3] 5558; GFX940-NEXT: ;;#ASMEND 5559; GFX940-NEXT: s_nop 0 5560; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 5561; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5562; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5563; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 5564; GFX940-NEXT: s_waitcnt vmcnt(0) 5565; GFX940-NEXT: s_setpc_b64 s[30:31] 5566 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5567 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5568 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 5, i32 5> 5569 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5570 ret void 5571} 5572 5573define void @v_shuffle_v3i16_v4i16__4_5_5(ptr addrspace(1) inreg %ptr) { 5574; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_5_5: 5575; GFX900: ; %bb.0: 5576; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5577; GFX900-NEXT: v_mov_b32_e32 v2, 0 5578; GFX900-NEXT: ;;#ASMSTART 5579; GFX900-NEXT: ; def v[0:1] 5580; GFX900-NEXT: ;;#ASMEND 5581; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 5582; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 5583; GFX900-NEXT: s_waitcnt vmcnt(0) 5584; GFX900-NEXT: s_setpc_b64 s[30:31] 5585; 5586; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_5_5: 5587; GFX90A: ; %bb.0: 5588; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5589; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5590; GFX90A-NEXT: ;;#ASMSTART 5591; GFX90A-NEXT: ; def v[0:1] 5592; GFX90A-NEXT: ;;#ASMEND 5593; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 5594; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 5595; GFX90A-NEXT: s_waitcnt vmcnt(0) 5596; GFX90A-NEXT: s_setpc_b64 s[30:31] 5597; 5598; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_5_5: 5599; GFX940: ; %bb.0: 5600; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5601; GFX940-NEXT: v_mov_b32_e32 v2, 0 5602; GFX940-NEXT: ;;#ASMSTART 5603; GFX940-NEXT: ; def v[0:1] 5604; GFX940-NEXT: ;;#ASMEND 5605; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 5606; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 5607; GFX940-NEXT: s_waitcnt vmcnt(0) 5608; GFX940-NEXT: s_setpc_b64 s[30:31] 5609 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5610 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5611 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 5, i32 5> 5612 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5613 ret void 5614} 5615 5616define void @v_shuffle_v3i16_v4i16__5_5_5(ptr addrspace(1) inreg %ptr) { 5617; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_5_5: 5618; GFX900: ; %bb.0: 5619; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5620; GFX900-NEXT: ;;#ASMSTART 5621; GFX900-NEXT: ; def v[0:1] 5622; GFX900-NEXT: ;;#ASMEND 5623; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5624; GFX900-NEXT: v_mov_b32_e32 v2, 0 5625; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 5626; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5627; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5628; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5629; GFX900-NEXT: s_waitcnt vmcnt(0) 5630; GFX900-NEXT: s_setpc_b64 s[30:31] 5631; 5632; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_5_5: 5633; GFX90A: ; %bb.0: 5634; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5635; GFX90A-NEXT: ;;#ASMSTART 5636; GFX90A-NEXT: ; def v[0:1] 5637; GFX90A-NEXT: ;;#ASMEND 5638; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5639; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5640; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 5641; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5642; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5643; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5644; GFX90A-NEXT: s_waitcnt vmcnt(0) 5645; GFX90A-NEXT: s_setpc_b64 s[30:31] 5646; 5647; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_5_5: 5648; GFX940: ; %bb.0: 5649; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5650; GFX940-NEXT: ;;#ASMSTART 5651; GFX940-NEXT: ; def v[0:1] 5652; GFX940-NEXT: ;;#ASMEND 5653; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5654; GFX940-NEXT: v_mov_b32_e32 v2, 0 5655; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 5656; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5657; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5658; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5659; GFX940-NEXT: s_waitcnt vmcnt(0) 5660; GFX940-NEXT: s_setpc_b64 s[30:31] 5661 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5662 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5663 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 5, i32 5> 5664 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5665 ret void 5666} 5667 5668define void @v_shuffle_v3i16_v4i16__6_5_5(ptr addrspace(1) inreg %ptr) { 5669; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_5_5: 5670; GFX900: ; %bb.0: 5671; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5672; GFX900-NEXT: ;;#ASMSTART 5673; GFX900-NEXT: ; def v[0:1] 5674; GFX900-NEXT: ;;#ASMEND 5675; GFX900-NEXT: s_mov_b32 s4, 0xffff 5676; GFX900-NEXT: v_mov_b32_e32 v2, 0 5677; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 5678; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5679; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5680; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5681; GFX900-NEXT: s_waitcnt vmcnt(0) 5682; GFX900-NEXT: s_setpc_b64 s[30:31] 5683; 5684; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_5_5: 5685; GFX90A: ; %bb.0: 5686; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5687; GFX90A-NEXT: ;;#ASMSTART 5688; GFX90A-NEXT: ; def v[0:1] 5689; GFX90A-NEXT: ;;#ASMEND 5690; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5691; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5692; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0 5693; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5694; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5695; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5696; GFX90A-NEXT: s_waitcnt vmcnt(0) 5697; GFX90A-NEXT: s_setpc_b64 s[30:31] 5698; 5699; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_5_5: 5700; GFX940: ; %bb.0: 5701; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5702; GFX940-NEXT: ;;#ASMSTART 5703; GFX940-NEXT: ; def v[0:1] 5704; GFX940-NEXT: ;;#ASMEND 5705; GFX940-NEXT: s_mov_b32 s2, 0xffff 5706; GFX940-NEXT: v_mov_b32_e32 v2, 0 5707; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 5708; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5709; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5710; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5711; GFX940-NEXT: s_waitcnt vmcnt(0) 5712; GFX940-NEXT: s_setpc_b64 s[30:31] 5713 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5714 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5715 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 5, i32 5> 5716 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5717 ret void 5718} 5719 5720define void @v_shuffle_v3i16_v4i16__7_5_5(ptr addrspace(1) inreg %ptr) { 5721; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_5: 5722; GFX900: ; %bb.0: 5723; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5724; GFX900-NEXT: ;;#ASMSTART 5725; GFX900-NEXT: ; def v[0:1] 5726; GFX900-NEXT: ;;#ASMEND 5727; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5728; GFX900-NEXT: v_mov_b32_e32 v2, 0 5729; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 5730; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5731; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5732; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5733; GFX900-NEXT: s_waitcnt vmcnt(0) 5734; GFX900-NEXT: s_setpc_b64 s[30:31] 5735; 5736; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_5: 5737; GFX90A: ; %bb.0: 5738; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5739; GFX90A-NEXT: ;;#ASMSTART 5740; GFX90A-NEXT: ; def v[0:1] 5741; GFX90A-NEXT: ;;#ASMEND 5742; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5743; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5744; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 5745; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5746; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5747; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5748; GFX90A-NEXT: s_waitcnt vmcnt(0) 5749; GFX90A-NEXT: s_setpc_b64 s[30:31] 5750; 5751; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_5: 5752; GFX940: ; %bb.0: 5753; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5754; GFX940-NEXT: ;;#ASMSTART 5755; GFX940-NEXT: ; def v[0:1] 5756; GFX940-NEXT: ;;#ASMEND 5757; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5758; GFX940-NEXT: v_mov_b32_e32 v2, 0 5759; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 5760; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5761; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5762; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5763; GFX940-NEXT: s_waitcnt vmcnt(0) 5764; GFX940-NEXT: s_setpc_b64 s[30:31] 5765 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5766 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5767 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 5> 5768 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5769 ret void 5770} 5771 5772define void @v_shuffle_v3i16_v4i16__7_u_5(ptr addrspace(1) inreg %ptr) { 5773; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_5: 5774; GFX900: ; %bb.0: 5775; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5776; GFX900-NEXT: v_mov_b32_e32 v2, 0 5777; GFX900-NEXT: ;;#ASMSTART 5778; GFX900-NEXT: ; def v[0:1] 5779; GFX900-NEXT: ;;#ASMEND 5780; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16 5781; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 5782; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5783; GFX900-NEXT: s_waitcnt vmcnt(0) 5784; GFX900-NEXT: s_setpc_b64 s[30:31] 5785; 5786; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_5: 5787; GFX90A: ; %bb.0: 5788; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5789; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5790; GFX90A-NEXT: ;;#ASMSTART 5791; GFX90A-NEXT: ; def v[0:1] 5792; GFX90A-NEXT: ;;#ASMEND 5793; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16 5794; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 5795; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5796; GFX90A-NEXT: s_waitcnt vmcnt(0) 5797; GFX90A-NEXT: s_setpc_b64 s[30:31] 5798; 5799; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_5: 5800; GFX940: ; %bb.0: 5801; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5802; GFX940-NEXT: v_mov_b32_e32 v2, 0 5803; GFX940-NEXT: ;;#ASMSTART 5804; GFX940-NEXT: ; def v[0:1] 5805; GFX940-NEXT: ;;#ASMEND 5806; GFX940-NEXT: s_nop 0 5807; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 5808; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 5809; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5810; GFX940-NEXT: s_waitcnt vmcnt(0) 5811; GFX940-NEXT: s_setpc_b64 s[30:31] 5812 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5813 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5814 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 5> 5815 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5816 ret void 5817} 5818 5819define void @v_shuffle_v3i16_v4i16__7_0_5(ptr addrspace(1) inreg %ptr) { 5820; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_5: 5821; GFX900: ; %bb.0: 5822; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5823; GFX900-NEXT: ;;#ASMSTART 5824; GFX900-NEXT: ; def v[0:1] 5825; GFX900-NEXT: ;;#ASMEND 5826; GFX900-NEXT: v_mov_b32_e32 v3, 0 5827; GFX900-NEXT: ;;#ASMSTART 5828; GFX900-NEXT: ; def v[1:2] 5829; GFX900-NEXT: ;;#ASMEND 5830; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 5831; GFX900-NEXT: global_store_short_d16_hi v3, v1, s[16:17] offset:4 5832; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 5833; GFX900-NEXT: s_waitcnt vmcnt(0) 5834; GFX900-NEXT: s_setpc_b64 s[30:31] 5835; 5836; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_5: 5837; GFX90A: ; %bb.0: 5838; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5839; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5840; GFX90A-NEXT: ;;#ASMSTART 5841; GFX90A-NEXT: ; def v[0:1] 5842; GFX90A-NEXT: ;;#ASMEND 5843; GFX90A-NEXT: ;;#ASMSTART 5844; GFX90A-NEXT: ; def v[2:3] 5845; GFX90A-NEXT: ;;#ASMEND 5846; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 5847; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 5848; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5849; GFX90A-NEXT: s_waitcnt vmcnt(0) 5850; GFX90A-NEXT: s_setpc_b64 s[30:31] 5851; 5852; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_5: 5853; GFX940: ; %bb.0: 5854; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5855; GFX940-NEXT: v_mov_b32_e32 v4, 0 5856; GFX940-NEXT: ;;#ASMSTART 5857; GFX940-NEXT: ; def v[0:1] 5858; GFX940-NEXT: ;;#ASMEND 5859; GFX940-NEXT: ;;#ASMSTART 5860; GFX940-NEXT: ; def v[2:3] 5861; GFX940-NEXT: ;;#ASMEND 5862; GFX940-NEXT: s_nop 0 5863; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 5864; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 5865; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5866; GFX940-NEXT: s_waitcnt vmcnt(0) 5867; GFX940-NEXT: s_setpc_b64 s[30:31] 5868 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5869 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5870 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 5> 5871 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5872 ret void 5873} 5874 5875define void @v_shuffle_v3i16_v4i16__7_1_5(ptr addrspace(1) inreg %ptr) { 5876; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_5: 5877; GFX900: ; %bb.0: 5878; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5879; GFX900-NEXT: ;;#ASMSTART 5880; GFX900-NEXT: ; def v[0:1] 5881; GFX900-NEXT: ;;#ASMEND 5882; GFX900-NEXT: v_mov_b32_e32 v3, 0 5883; GFX900-NEXT: ;;#ASMSTART 5884; GFX900-NEXT: ; def v[1:2] 5885; GFX900-NEXT: ;;#ASMEND 5886; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5887; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 5888; GFX900-NEXT: global_store_short_d16_hi v3, v1, s[16:17] offset:4 5889; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 5890; GFX900-NEXT: s_waitcnt vmcnt(0) 5891; GFX900-NEXT: s_setpc_b64 s[30:31] 5892; 5893; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_5: 5894; GFX90A: ; %bb.0: 5895; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5896; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5897; GFX90A-NEXT: ;;#ASMSTART 5898; GFX90A-NEXT: ; def v[0:1] 5899; GFX90A-NEXT: ;;#ASMEND 5900; GFX90A-NEXT: ;;#ASMSTART 5901; GFX90A-NEXT: ; def v[2:3] 5902; GFX90A-NEXT: ;;#ASMEND 5903; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5904; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 5905; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 5906; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5907; GFX90A-NEXT: s_waitcnt vmcnt(0) 5908; GFX90A-NEXT: s_setpc_b64 s[30:31] 5909; 5910; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_5: 5911; GFX940: ; %bb.0: 5912; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5913; GFX940-NEXT: v_mov_b32_e32 v4, 0 5914; GFX940-NEXT: ;;#ASMSTART 5915; GFX940-NEXT: ; def v[0:1] 5916; GFX940-NEXT: ;;#ASMEND 5917; GFX940-NEXT: ;;#ASMSTART 5918; GFX940-NEXT: ; def v[2:3] 5919; GFX940-NEXT: ;;#ASMEND 5920; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5921; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 5922; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 5923; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5924; GFX940-NEXT: s_waitcnt vmcnt(0) 5925; GFX940-NEXT: s_setpc_b64 s[30:31] 5926 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5927 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5928 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 5> 5929 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5930 ret void 5931} 5932 5933define void @v_shuffle_v3i16_v4i16__7_2_5(ptr addrspace(1) inreg %ptr) { 5934; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_5: 5935; GFX900: ; %bb.0: 5936; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5937; GFX900-NEXT: v_mov_b32_e32 v4, 0 5938; GFX900-NEXT: ;;#ASMSTART 5939; GFX900-NEXT: ; def v[0:1] 5940; GFX900-NEXT: ;;#ASMEND 5941; GFX900-NEXT: ;;#ASMSTART 5942; GFX900-NEXT: ; def v[2:3] 5943; GFX900-NEXT: ;;#ASMEND 5944; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 5945; GFX900-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 5946; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5947; GFX900-NEXT: s_waitcnt vmcnt(0) 5948; GFX900-NEXT: s_setpc_b64 s[30:31] 5949; 5950; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_5: 5951; GFX90A: ; %bb.0: 5952; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5953; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5954; GFX90A-NEXT: ;;#ASMSTART 5955; GFX90A-NEXT: ; def v[0:1] 5956; GFX90A-NEXT: ;;#ASMEND 5957; GFX90A-NEXT: ;;#ASMSTART 5958; GFX90A-NEXT: ; def v[2:3] 5959; GFX90A-NEXT: ;;#ASMEND 5960; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 5961; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 5962; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5963; GFX90A-NEXT: s_waitcnt vmcnt(0) 5964; GFX90A-NEXT: s_setpc_b64 s[30:31] 5965; 5966; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_5: 5967; GFX940: ; %bb.0: 5968; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5969; GFX940-NEXT: v_mov_b32_e32 v4, 0 5970; GFX940-NEXT: ;;#ASMSTART 5971; GFX940-NEXT: ; def v[0:1] 5972; GFX940-NEXT: ;;#ASMEND 5973; GFX940-NEXT: ;;#ASMSTART 5974; GFX940-NEXT: ; def v[2:3] 5975; GFX940-NEXT: ;;#ASMEND 5976; GFX940-NEXT: s_nop 0 5977; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 5978; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 5979; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5980; GFX940-NEXT: s_waitcnt vmcnt(0) 5981; GFX940-NEXT: s_setpc_b64 s[30:31] 5982 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5983 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5984 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 5> 5985 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5986 ret void 5987} 5988 5989define void @v_shuffle_v3i16_v4i16__7_3_5(ptr addrspace(1) inreg %ptr) { 5990; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_5: 5991; GFX900: ; %bb.0: 5992; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5993; GFX900-NEXT: v_mov_b32_e32 v4, 0 5994; GFX900-NEXT: ;;#ASMSTART 5995; GFX900-NEXT: ; def v[0:1] 5996; GFX900-NEXT: ;;#ASMEND 5997; GFX900-NEXT: ;;#ASMSTART 5998; GFX900-NEXT: ; def v[2:3] 5999; GFX900-NEXT: ;;#ASMEND 6000; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6001; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 6002; GFX900-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 6003; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6004; GFX900-NEXT: s_waitcnt vmcnt(0) 6005; GFX900-NEXT: s_setpc_b64 s[30:31] 6006; 6007; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_5: 6008; GFX90A: ; %bb.0: 6009; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6010; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6011; GFX90A-NEXT: ;;#ASMSTART 6012; GFX90A-NEXT: ; def v[0:1] 6013; GFX90A-NEXT: ;;#ASMEND 6014; GFX90A-NEXT: ;;#ASMSTART 6015; GFX90A-NEXT: ; def v[2:3] 6016; GFX90A-NEXT: ;;#ASMEND 6017; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6018; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 6019; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 6020; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6021; GFX90A-NEXT: s_waitcnt vmcnt(0) 6022; GFX90A-NEXT: s_setpc_b64 s[30:31] 6023; 6024; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_5: 6025; GFX940: ; %bb.0: 6026; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6027; GFX940-NEXT: v_mov_b32_e32 v4, 0 6028; GFX940-NEXT: ;;#ASMSTART 6029; GFX940-NEXT: ; def v[0:1] 6030; GFX940-NEXT: ;;#ASMEND 6031; GFX940-NEXT: ;;#ASMSTART 6032; GFX940-NEXT: ; def v[2:3] 6033; GFX940-NEXT: ;;#ASMEND 6034; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6035; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 6036; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 6037; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6038; GFX940-NEXT: s_waitcnt vmcnt(0) 6039; GFX940-NEXT: s_setpc_b64 s[30:31] 6040 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6041 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6042 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 5> 6043 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6044 ret void 6045} 6046 6047define void @v_shuffle_v3i16_v4i16__7_4_5(ptr addrspace(1) inreg %ptr) { 6048; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_5: 6049; GFX900: ; %bb.0: 6050; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6051; GFX900-NEXT: v_mov_b32_e32 v2, 0 6052; GFX900-NEXT: ;;#ASMSTART 6053; GFX900-NEXT: ; def v[0:1] 6054; GFX900-NEXT: ;;#ASMEND 6055; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 6056; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 6057; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 6058; GFX900-NEXT: s_waitcnt vmcnt(0) 6059; GFX900-NEXT: s_setpc_b64 s[30:31] 6060; 6061; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_5: 6062; GFX90A: ; %bb.0: 6063; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6064; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6065; GFX90A-NEXT: ;;#ASMSTART 6066; GFX90A-NEXT: ; def v[0:1] 6067; GFX90A-NEXT: ;;#ASMEND 6068; GFX90A-NEXT: v_alignbit_b32 v1, v0, v1, 16 6069; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 6070; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 6071; GFX90A-NEXT: s_waitcnt vmcnt(0) 6072; GFX90A-NEXT: s_setpc_b64 s[30:31] 6073; 6074; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_5: 6075; GFX940: ; %bb.0: 6076; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6077; GFX940-NEXT: v_mov_b32_e32 v2, 0 6078; GFX940-NEXT: ;;#ASMSTART 6079; GFX940-NEXT: ; def v[0:1] 6080; GFX940-NEXT: ;;#ASMEND 6081; GFX940-NEXT: s_nop 0 6082; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 6083; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 6084; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 6085; GFX940-NEXT: s_waitcnt vmcnt(0) 6086; GFX940-NEXT: s_setpc_b64 s[30:31] 6087 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6088 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6089 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 5> 6090 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6091 ret void 6092} 6093 6094define void @v_shuffle_v3i16_v4i16__7_6_5(ptr addrspace(1) inreg %ptr) { 6095; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_5: 6096; GFX900: ; %bb.0: 6097; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6098; GFX900-NEXT: v_mov_b32_e32 v2, 0 6099; GFX900-NEXT: ;;#ASMSTART 6100; GFX900-NEXT: ; def v[0:1] 6101; GFX900-NEXT: ;;#ASMEND 6102; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16 6103; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 6104; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 6105; GFX900-NEXT: s_waitcnt vmcnt(0) 6106; GFX900-NEXT: s_setpc_b64 s[30:31] 6107; 6108; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_5: 6109; GFX90A: ; %bb.0: 6110; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6111; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6112; GFX90A-NEXT: ;;#ASMSTART 6113; GFX90A-NEXT: ; def v[0:1] 6114; GFX90A-NEXT: ;;#ASMEND 6115; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16 6116; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 6117; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 6118; GFX90A-NEXT: s_waitcnt vmcnt(0) 6119; GFX90A-NEXT: s_setpc_b64 s[30:31] 6120; 6121; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_5: 6122; GFX940: ; %bb.0: 6123; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6124; GFX940-NEXT: v_mov_b32_e32 v2, 0 6125; GFX940-NEXT: ;;#ASMSTART 6126; GFX940-NEXT: ; def v[0:1] 6127; GFX940-NEXT: ;;#ASMEND 6128; GFX940-NEXT: s_nop 0 6129; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 6130; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 6131; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 6132; GFX940-NEXT: s_waitcnt vmcnt(0) 6133; GFX940-NEXT: s_setpc_b64 s[30:31] 6134 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6135 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6136 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 5> 6137 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6138 ret void 6139} 6140 6141define void @v_shuffle_v3i16_v4i16__u_6_6(ptr addrspace(1) inreg %ptr) { 6142; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_6_6: 6143; GFX900: ; %bb.0: 6144; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6145; GFX900-NEXT: v_mov_b32_e32 v2, 0 6146; GFX900-NEXT: ;;#ASMSTART 6147; GFX900-NEXT: ; def v[0:1] 6148; GFX900-NEXT: ;;#ASMEND 6149; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 6150; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6151; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6152; GFX900-NEXT: s_waitcnt vmcnt(0) 6153; GFX900-NEXT: s_setpc_b64 s[30:31] 6154; 6155; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_6_6: 6156; GFX90A: ; %bb.0: 6157; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6158; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6159; GFX90A-NEXT: ;;#ASMSTART 6160; GFX90A-NEXT: ; def v[0:1] 6161; GFX90A-NEXT: ;;#ASMEND 6162; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 6163; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6164; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6165; GFX90A-NEXT: s_waitcnt vmcnt(0) 6166; GFX90A-NEXT: s_setpc_b64 s[30:31] 6167; 6168; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_6_6: 6169; GFX940: ; %bb.0: 6170; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6171; GFX940-NEXT: v_mov_b32_e32 v2, 0 6172; GFX940-NEXT: ;;#ASMSTART 6173; GFX940-NEXT: ; def v[0:1] 6174; GFX940-NEXT: ;;#ASMEND 6175; GFX940-NEXT: s_nop 0 6176; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 6177; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6178; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6179; GFX940-NEXT: s_waitcnt vmcnt(0) 6180; GFX940-NEXT: s_setpc_b64 s[30:31] 6181 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6182 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6183 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 6, i32 6> 6184 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6185 ret void 6186} 6187 6188define void @v_shuffle_v3i16_v4i16__0_6_6(ptr addrspace(1) inreg %ptr) { 6189; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_6_6: 6190; GFX900: ; %bb.0: 6191; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6192; GFX900-NEXT: ;;#ASMSTART 6193; GFX900-NEXT: ; def v[0:1] 6194; GFX900-NEXT: ;;#ASMEND 6195; GFX900-NEXT: v_mov_b32_e32 v3, 0 6196; GFX900-NEXT: ;;#ASMSTART 6197; GFX900-NEXT: ; def v[1:2] 6198; GFX900-NEXT: ;;#ASMEND 6199; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6200; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 6201; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 6202; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 6203; GFX900-NEXT: s_waitcnt vmcnt(0) 6204; GFX900-NEXT: s_setpc_b64 s[30:31] 6205; 6206; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_6_6: 6207; GFX90A: ; %bb.0: 6208; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6209; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6210; GFX90A-NEXT: ;;#ASMSTART 6211; GFX90A-NEXT: ; def v[0:1] 6212; GFX90A-NEXT: ;;#ASMEND 6213; GFX90A-NEXT: ;;#ASMSTART 6214; GFX90A-NEXT: ; def v[2:3] 6215; GFX90A-NEXT: ;;#ASMEND 6216; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6217; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 6218; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6219; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6220; GFX90A-NEXT: s_waitcnt vmcnt(0) 6221; GFX90A-NEXT: s_setpc_b64 s[30:31] 6222; 6223; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_6_6: 6224; GFX940: ; %bb.0: 6225; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6226; GFX940-NEXT: v_mov_b32_e32 v4, 0 6227; GFX940-NEXT: ;;#ASMSTART 6228; GFX940-NEXT: ; def v[0:1] 6229; GFX940-NEXT: ;;#ASMEND 6230; GFX940-NEXT: ;;#ASMSTART 6231; GFX940-NEXT: ; def v[2:3] 6232; GFX940-NEXT: ;;#ASMEND 6233; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6234; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 6235; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6236; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6237; GFX940-NEXT: s_waitcnt vmcnt(0) 6238; GFX940-NEXT: s_setpc_b64 s[30:31] 6239 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6240 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6241 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 6, i32 6> 6242 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6243 ret void 6244} 6245 6246define void @v_shuffle_v3i16_v4i16__1_6_6(ptr addrspace(1) inreg %ptr) { 6247; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_6_6: 6248; GFX900: ; %bb.0: 6249; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6250; GFX900-NEXT: ;;#ASMSTART 6251; GFX900-NEXT: ; def v[0:1] 6252; GFX900-NEXT: ;;#ASMEND 6253; GFX900-NEXT: v_mov_b32_e32 v3, 0 6254; GFX900-NEXT: ;;#ASMSTART 6255; GFX900-NEXT: ; def v[1:2] 6256; GFX900-NEXT: ;;#ASMEND 6257; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16 6258; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 6259; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 6260; GFX900-NEXT: s_waitcnt vmcnt(0) 6261; GFX900-NEXT: s_setpc_b64 s[30:31] 6262; 6263; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_6_6: 6264; GFX90A: ; %bb.0: 6265; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6266; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6267; GFX90A-NEXT: ;;#ASMSTART 6268; GFX90A-NEXT: ; def v[0:1] 6269; GFX90A-NEXT: ;;#ASMEND 6270; GFX90A-NEXT: ;;#ASMSTART 6271; GFX90A-NEXT: ; def v[2:3] 6272; GFX90A-NEXT: ;;#ASMEND 6273; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16 6274; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6275; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6276; GFX90A-NEXT: s_waitcnt vmcnt(0) 6277; GFX90A-NEXT: s_setpc_b64 s[30:31] 6278; 6279; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_6_6: 6280; GFX940: ; %bb.0: 6281; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6282; GFX940-NEXT: v_mov_b32_e32 v4, 0 6283; GFX940-NEXT: ;;#ASMSTART 6284; GFX940-NEXT: ; def v[0:1] 6285; GFX940-NEXT: ;;#ASMEND 6286; GFX940-NEXT: ;;#ASMSTART 6287; GFX940-NEXT: ; def v[2:3] 6288; GFX940-NEXT: ;;#ASMEND 6289; GFX940-NEXT: s_nop 0 6290; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 6291; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6292; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6293; GFX940-NEXT: s_waitcnt vmcnt(0) 6294; GFX940-NEXT: s_setpc_b64 s[30:31] 6295 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6296 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6297 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 6, i32 6> 6298 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6299 ret void 6300} 6301 6302define void @v_shuffle_v3i16_v4i16__2_6_6(ptr addrspace(1) inreg %ptr) { 6303; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_6_6: 6304; GFX900: ; %bb.0: 6305; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6306; GFX900-NEXT: v_mov_b32_e32 v4, 0 6307; GFX900-NEXT: ;;#ASMSTART 6308; GFX900-NEXT: ; def v[0:1] 6309; GFX900-NEXT: ;;#ASMEND 6310; GFX900-NEXT: ;;#ASMSTART 6311; GFX900-NEXT: ; def v[2:3] 6312; GFX900-NEXT: ;;#ASMEND 6313; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6314; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 6315; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4 6316; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6317; GFX900-NEXT: s_waitcnt vmcnt(0) 6318; GFX900-NEXT: s_setpc_b64 s[30:31] 6319; 6320; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_6_6: 6321; GFX90A: ; %bb.0: 6322; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6323; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6324; GFX90A-NEXT: ;;#ASMSTART 6325; GFX90A-NEXT: ; def v[0:1] 6326; GFX90A-NEXT: ;;#ASMEND 6327; GFX90A-NEXT: ;;#ASMSTART 6328; GFX90A-NEXT: ; def v[2:3] 6329; GFX90A-NEXT: ;;#ASMEND 6330; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6331; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 6332; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6333; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6334; GFX90A-NEXT: s_waitcnt vmcnt(0) 6335; GFX90A-NEXT: s_setpc_b64 s[30:31] 6336; 6337; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_6_6: 6338; GFX940: ; %bb.0: 6339; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6340; GFX940-NEXT: v_mov_b32_e32 v4, 0 6341; GFX940-NEXT: ;;#ASMSTART 6342; GFX940-NEXT: ; def v[0:1] 6343; GFX940-NEXT: ;;#ASMEND 6344; GFX940-NEXT: ;;#ASMSTART 6345; GFX940-NEXT: ; def v[2:3] 6346; GFX940-NEXT: ;;#ASMEND 6347; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6348; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 6349; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6350; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6351; GFX940-NEXT: s_waitcnt vmcnt(0) 6352; GFX940-NEXT: s_setpc_b64 s[30:31] 6353 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6354 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6355 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 6, i32 6> 6356 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6357 ret void 6358} 6359 6360define void @v_shuffle_v3i16_v4i16__3_6_6(ptr addrspace(1) inreg %ptr) { 6361; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_6_6: 6362; GFX900: ; %bb.0: 6363; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6364; GFX900-NEXT: v_mov_b32_e32 v4, 0 6365; GFX900-NEXT: ;;#ASMSTART 6366; GFX900-NEXT: ; def v[0:1] 6367; GFX900-NEXT: ;;#ASMEND 6368; GFX900-NEXT: ;;#ASMSTART 6369; GFX900-NEXT: ; def v[2:3] 6370; GFX900-NEXT: ;;#ASMEND 6371; GFX900-NEXT: v_alignbit_b32 v0, v3, v1, 16 6372; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4 6373; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6374; GFX900-NEXT: s_waitcnt vmcnt(0) 6375; GFX900-NEXT: s_setpc_b64 s[30:31] 6376; 6377; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_6_6: 6378; GFX90A: ; %bb.0: 6379; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6380; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6381; GFX90A-NEXT: ;;#ASMSTART 6382; GFX90A-NEXT: ; def v[0:1] 6383; GFX90A-NEXT: ;;#ASMEND 6384; GFX90A-NEXT: ;;#ASMSTART 6385; GFX90A-NEXT: ; def v[2:3] 6386; GFX90A-NEXT: ;;#ASMEND 6387; GFX90A-NEXT: v_alignbit_b32 v0, v3, v1, 16 6388; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6389; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6390; GFX90A-NEXT: s_waitcnt vmcnt(0) 6391; GFX90A-NEXT: s_setpc_b64 s[30:31] 6392; 6393; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_6_6: 6394; GFX940: ; %bb.0: 6395; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6396; GFX940-NEXT: v_mov_b32_e32 v4, 0 6397; GFX940-NEXT: ;;#ASMSTART 6398; GFX940-NEXT: ; def v[0:1] 6399; GFX940-NEXT: ;;#ASMEND 6400; GFX940-NEXT: ;;#ASMSTART 6401; GFX940-NEXT: ; def v[2:3] 6402; GFX940-NEXT: ;;#ASMEND 6403; GFX940-NEXT: s_nop 0 6404; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 6405; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6406; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6407; GFX940-NEXT: s_waitcnt vmcnt(0) 6408; GFX940-NEXT: s_setpc_b64 s[30:31] 6409 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6410 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6411 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 6, i32 6> 6412 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6413 ret void 6414} 6415 6416define void @v_shuffle_v3i16_v4i16__4_6_6(ptr addrspace(1) inreg %ptr) { 6417; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_6_6: 6418; GFX900: ; %bb.0: 6419; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6420; GFX900-NEXT: v_mov_b32_e32 v2, 0 6421; GFX900-NEXT: ;;#ASMSTART 6422; GFX900-NEXT: ; def v[0:1] 6423; GFX900-NEXT: ;;#ASMEND 6424; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6425; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 6426; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6427; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6428; GFX900-NEXT: s_waitcnt vmcnt(0) 6429; GFX900-NEXT: s_setpc_b64 s[30:31] 6430; 6431; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_6_6: 6432; GFX90A: ; %bb.0: 6433; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6434; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6435; GFX90A-NEXT: ;;#ASMSTART 6436; GFX90A-NEXT: ; def v[0:1] 6437; GFX90A-NEXT: ;;#ASMEND 6438; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6439; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 6440; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6441; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6442; GFX90A-NEXT: s_waitcnt vmcnt(0) 6443; GFX90A-NEXT: s_setpc_b64 s[30:31] 6444; 6445; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_6_6: 6446; GFX940: ; %bb.0: 6447; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6448; GFX940-NEXT: v_mov_b32_e32 v2, 0 6449; GFX940-NEXT: ;;#ASMSTART 6450; GFX940-NEXT: ; def v[0:1] 6451; GFX940-NEXT: ;;#ASMEND 6452; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6453; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 6454; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6455; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6456; GFX940-NEXT: s_waitcnt vmcnt(0) 6457; GFX940-NEXT: s_setpc_b64 s[30:31] 6458 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6459 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6460 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 6, i32 6> 6461 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6462 ret void 6463} 6464 6465define void @v_shuffle_v3i16_v4i16__5_6_6(ptr addrspace(1) inreg %ptr) { 6466; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_6_6: 6467; GFX900: ; %bb.0: 6468; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6469; GFX900-NEXT: v_mov_b32_e32 v2, 0 6470; GFX900-NEXT: ;;#ASMSTART 6471; GFX900-NEXT: ; def v[0:1] 6472; GFX900-NEXT: ;;#ASMEND 6473; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 6474; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6475; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6476; GFX900-NEXT: s_waitcnt vmcnt(0) 6477; GFX900-NEXT: s_setpc_b64 s[30:31] 6478; 6479; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_6_6: 6480; GFX90A: ; %bb.0: 6481; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6482; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6483; GFX90A-NEXT: ;;#ASMSTART 6484; GFX90A-NEXT: ; def v[0:1] 6485; GFX90A-NEXT: ;;#ASMEND 6486; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 6487; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6488; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6489; GFX90A-NEXT: s_waitcnt vmcnt(0) 6490; GFX90A-NEXT: s_setpc_b64 s[30:31] 6491; 6492; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_6_6: 6493; GFX940: ; %bb.0: 6494; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6495; GFX940-NEXT: v_mov_b32_e32 v2, 0 6496; GFX940-NEXT: ;;#ASMSTART 6497; GFX940-NEXT: ; def v[0:1] 6498; GFX940-NEXT: ;;#ASMEND 6499; GFX940-NEXT: s_nop 0 6500; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 6501; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6502; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6503; GFX940-NEXT: s_waitcnt vmcnt(0) 6504; GFX940-NEXT: s_setpc_b64 s[30:31] 6505 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6506 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6507 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 6, i32 6> 6508 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6509 ret void 6510} 6511 6512define void @v_shuffle_v3i16_v4i16__6_6_6(ptr addrspace(1) inreg %ptr) { 6513; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_6_6: 6514; GFX900: ; %bb.0: 6515; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6516; GFX900-NEXT: v_mov_b32_e32 v2, 0 6517; GFX900-NEXT: ;;#ASMSTART 6518; GFX900-NEXT: ; def v[0:1] 6519; GFX900-NEXT: ;;#ASMEND 6520; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6521; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 6522; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6523; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6524; GFX900-NEXT: s_waitcnt vmcnt(0) 6525; GFX900-NEXT: s_setpc_b64 s[30:31] 6526; 6527; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_6_6: 6528; GFX90A: ; %bb.0: 6529; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6530; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6531; GFX90A-NEXT: ;;#ASMSTART 6532; GFX90A-NEXT: ; def v[0:1] 6533; GFX90A-NEXT: ;;#ASMEND 6534; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6535; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 6536; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6537; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6538; GFX90A-NEXT: s_waitcnt vmcnt(0) 6539; GFX90A-NEXT: s_setpc_b64 s[30:31] 6540; 6541; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_6_6: 6542; GFX940: ; %bb.0: 6543; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6544; GFX940-NEXT: v_mov_b32_e32 v2, 0 6545; GFX940-NEXT: ;;#ASMSTART 6546; GFX940-NEXT: ; def v[0:1] 6547; GFX940-NEXT: ;;#ASMEND 6548; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6549; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 6550; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6551; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6552; GFX940-NEXT: s_waitcnt vmcnt(0) 6553; GFX940-NEXT: s_setpc_b64 s[30:31] 6554 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6555 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6556 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 6, i32 6> 6557 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6558 ret void 6559} 6560 6561define void @v_shuffle_v3i16_v4i16__7_6_6(ptr addrspace(1) inreg %ptr) { 6562; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_6: 6563; GFX900: ; %bb.0: 6564; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6565; GFX900-NEXT: v_mov_b32_e32 v2, 0 6566; GFX900-NEXT: ;;#ASMSTART 6567; GFX900-NEXT: ; def v[0:1] 6568; GFX900-NEXT: ;;#ASMEND 6569; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 6570; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6571; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6572; GFX900-NEXT: s_waitcnt vmcnt(0) 6573; GFX900-NEXT: s_setpc_b64 s[30:31] 6574; 6575; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_6: 6576; GFX90A: ; %bb.0: 6577; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6578; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6579; GFX90A-NEXT: ;;#ASMSTART 6580; GFX90A-NEXT: ; def v[0:1] 6581; GFX90A-NEXT: ;;#ASMEND 6582; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 6583; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6584; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6585; GFX90A-NEXT: s_waitcnt vmcnt(0) 6586; GFX90A-NEXT: s_setpc_b64 s[30:31] 6587; 6588; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_6: 6589; GFX940: ; %bb.0: 6590; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6591; GFX940-NEXT: v_mov_b32_e32 v2, 0 6592; GFX940-NEXT: ;;#ASMSTART 6593; GFX940-NEXT: ; def v[0:1] 6594; GFX940-NEXT: ;;#ASMEND 6595; GFX940-NEXT: s_nop 0 6596; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 6597; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6598; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6599; GFX940-NEXT: s_waitcnt vmcnt(0) 6600; GFX940-NEXT: s_setpc_b64 s[30:31] 6601 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6602 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6603 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 6> 6604 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6605 ret void 6606} 6607 6608define void @v_shuffle_v3i16_v4i16__7_u_6(ptr addrspace(1) inreg %ptr) { 6609; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_6: 6610; GFX900: ; %bb.0: 6611; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6612; GFX900-NEXT: v_mov_b32_e32 v2, 0 6613; GFX900-NEXT: ;;#ASMSTART 6614; GFX900-NEXT: ; def v[0:1] 6615; GFX900-NEXT: ;;#ASMEND 6616; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 6617; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6618; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6619; GFX900-NEXT: s_waitcnt vmcnt(0) 6620; GFX900-NEXT: s_setpc_b64 s[30:31] 6621; 6622; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_6: 6623; GFX90A: ; %bb.0: 6624; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6625; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6626; GFX90A-NEXT: ;;#ASMSTART 6627; GFX90A-NEXT: ; def v[0:1] 6628; GFX90A-NEXT: ;;#ASMEND 6629; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 6630; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6631; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6632; GFX90A-NEXT: s_waitcnt vmcnt(0) 6633; GFX90A-NEXT: s_setpc_b64 s[30:31] 6634; 6635; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_6: 6636; GFX940: ; %bb.0: 6637; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6638; GFX940-NEXT: v_mov_b32_e32 v2, 0 6639; GFX940-NEXT: ;;#ASMSTART 6640; GFX940-NEXT: ; def v[0:1] 6641; GFX940-NEXT: ;;#ASMEND 6642; GFX940-NEXT: s_nop 0 6643; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 6644; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6645; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6646; GFX940-NEXT: s_waitcnt vmcnt(0) 6647; GFX940-NEXT: s_setpc_b64 s[30:31] 6648 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6649 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6650 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 6> 6651 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6652 ret void 6653} 6654 6655define void @v_shuffle_v3i16_v4i16__7_0_6(ptr addrspace(1) inreg %ptr) { 6656; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_6: 6657; GFX900: ; %bb.0: 6658; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6659; GFX900-NEXT: ;;#ASMSTART 6660; GFX900-NEXT: ; def v[0:1] 6661; GFX900-NEXT: ;;#ASMEND 6662; GFX900-NEXT: v_mov_b32_e32 v3, 0 6663; GFX900-NEXT: ;;#ASMSTART 6664; GFX900-NEXT: ; def v[1:2] 6665; GFX900-NEXT: ;;#ASMEND 6666; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 6667; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 6668; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 6669; GFX900-NEXT: s_waitcnt vmcnt(0) 6670; GFX900-NEXT: s_setpc_b64 s[30:31] 6671; 6672; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_6: 6673; GFX90A: ; %bb.0: 6674; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6675; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6676; GFX90A-NEXT: ;;#ASMSTART 6677; GFX90A-NEXT: ; def v[0:1] 6678; GFX90A-NEXT: ;;#ASMEND 6679; GFX90A-NEXT: ;;#ASMSTART 6680; GFX90A-NEXT: ; def v[2:3] 6681; GFX90A-NEXT: ;;#ASMEND 6682; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 6683; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6684; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6685; GFX90A-NEXT: s_waitcnt vmcnt(0) 6686; GFX90A-NEXT: s_setpc_b64 s[30:31] 6687; 6688; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_6: 6689; GFX940: ; %bb.0: 6690; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6691; GFX940-NEXT: v_mov_b32_e32 v4, 0 6692; GFX940-NEXT: ;;#ASMSTART 6693; GFX940-NEXT: ; def v[0:1] 6694; GFX940-NEXT: ;;#ASMEND 6695; GFX940-NEXT: ;;#ASMSTART 6696; GFX940-NEXT: ; def v[2:3] 6697; GFX940-NEXT: ;;#ASMEND 6698; GFX940-NEXT: s_nop 0 6699; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 6700; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6701; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6702; GFX940-NEXT: s_waitcnt vmcnt(0) 6703; GFX940-NEXT: s_setpc_b64 s[30:31] 6704 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6705 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6706 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 6> 6707 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6708 ret void 6709} 6710 6711define void @v_shuffle_v3i16_v4i16__7_1_6(ptr addrspace(1) inreg %ptr) { 6712; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_6: 6713; GFX900: ; %bb.0: 6714; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6715; GFX900-NEXT: ;;#ASMSTART 6716; GFX900-NEXT: ; def v[0:1] 6717; GFX900-NEXT: ;;#ASMEND 6718; GFX900-NEXT: v_mov_b32_e32 v3, 0 6719; GFX900-NEXT: ;;#ASMSTART 6720; GFX900-NEXT: ; def v[1:2] 6721; GFX900-NEXT: ;;#ASMEND 6722; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6723; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 6724; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 6725; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 6726; GFX900-NEXT: s_waitcnt vmcnt(0) 6727; GFX900-NEXT: s_setpc_b64 s[30:31] 6728; 6729; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_6: 6730; GFX90A: ; %bb.0: 6731; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6732; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6733; GFX90A-NEXT: ;;#ASMSTART 6734; GFX90A-NEXT: ; def v[0:1] 6735; GFX90A-NEXT: ;;#ASMEND 6736; GFX90A-NEXT: ;;#ASMSTART 6737; GFX90A-NEXT: ; def v[2:3] 6738; GFX90A-NEXT: ;;#ASMEND 6739; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6740; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 6741; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6742; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6743; GFX90A-NEXT: s_waitcnt vmcnt(0) 6744; GFX90A-NEXT: s_setpc_b64 s[30:31] 6745; 6746; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_6: 6747; GFX940: ; %bb.0: 6748; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6749; GFX940-NEXT: v_mov_b32_e32 v4, 0 6750; GFX940-NEXT: ;;#ASMSTART 6751; GFX940-NEXT: ; def v[0:1] 6752; GFX940-NEXT: ;;#ASMEND 6753; GFX940-NEXT: ;;#ASMSTART 6754; GFX940-NEXT: ; def v[2:3] 6755; GFX940-NEXT: ;;#ASMEND 6756; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6757; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 6758; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6759; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6760; GFX940-NEXT: s_waitcnt vmcnt(0) 6761; GFX940-NEXT: s_setpc_b64 s[30:31] 6762 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6763 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6764 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 6> 6765 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6766 ret void 6767} 6768 6769define void @v_shuffle_v3i16_v4i16__7_2_6(ptr addrspace(1) inreg %ptr) { 6770; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_6: 6771; GFX900: ; %bb.0: 6772; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6773; GFX900-NEXT: v_mov_b32_e32 v4, 0 6774; GFX900-NEXT: ;;#ASMSTART 6775; GFX900-NEXT: ; def v[0:1] 6776; GFX900-NEXT: ;;#ASMEND 6777; GFX900-NEXT: ;;#ASMSTART 6778; GFX900-NEXT: ; def v[2:3] 6779; GFX900-NEXT: ;;#ASMEND 6780; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 6781; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4 6782; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6783; GFX900-NEXT: s_waitcnt vmcnt(0) 6784; GFX900-NEXT: s_setpc_b64 s[30:31] 6785; 6786; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_6: 6787; GFX90A: ; %bb.0: 6788; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6789; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6790; GFX90A-NEXT: ;;#ASMSTART 6791; GFX90A-NEXT: ; def v[0:1] 6792; GFX90A-NEXT: ;;#ASMEND 6793; GFX90A-NEXT: ;;#ASMSTART 6794; GFX90A-NEXT: ; def v[2:3] 6795; GFX90A-NEXT: ;;#ASMEND 6796; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 6797; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6798; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6799; GFX90A-NEXT: s_waitcnt vmcnt(0) 6800; GFX90A-NEXT: s_setpc_b64 s[30:31] 6801; 6802; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_6: 6803; GFX940: ; %bb.0: 6804; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6805; GFX940-NEXT: v_mov_b32_e32 v4, 0 6806; GFX940-NEXT: ;;#ASMSTART 6807; GFX940-NEXT: ; def v[0:1] 6808; GFX940-NEXT: ;;#ASMEND 6809; GFX940-NEXT: ;;#ASMSTART 6810; GFX940-NEXT: ; def v[2:3] 6811; GFX940-NEXT: ;;#ASMEND 6812; GFX940-NEXT: s_nop 0 6813; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 6814; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6815; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6816; GFX940-NEXT: s_waitcnt vmcnt(0) 6817; GFX940-NEXT: s_setpc_b64 s[30:31] 6818 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6819 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6820 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 6> 6821 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6822 ret void 6823} 6824 6825define void @v_shuffle_v3i16_v4i16__7_3_6(ptr addrspace(1) inreg %ptr) { 6826; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_6: 6827; GFX900: ; %bb.0: 6828; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6829; GFX900-NEXT: v_mov_b32_e32 v4, 0 6830; GFX900-NEXT: ;;#ASMSTART 6831; GFX900-NEXT: ; def v[0:1] 6832; GFX900-NEXT: ;;#ASMEND 6833; GFX900-NEXT: ;;#ASMSTART 6834; GFX900-NEXT: ; def v[2:3] 6835; GFX900-NEXT: ;;#ASMEND 6836; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6837; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 6838; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4 6839; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6840; GFX900-NEXT: s_waitcnt vmcnt(0) 6841; GFX900-NEXT: s_setpc_b64 s[30:31] 6842; 6843; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_6: 6844; GFX90A: ; %bb.0: 6845; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6846; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6847; GFX90A-NEXT: ;;#ASMSTART 6848; GFX90A-NEXT: ; def v[0:1] 6849; GFX90A-NEXT: ;;#ASMEND 6850; GFX90A-NEXT: ;;#ASMSTART 6851; GFX90A-NEXT: ; def v[2:3] 6852; GFX90A-NEXT: ;;#ASMEND 6853; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6854; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 6855; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6856; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6857; GFX90A-NEXT: s_waitcnt vmcnt(0) 6858; GFX90A-NEXT: s_setpc_b64 s[30:31] 6859; 6860; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_6: 6861; GFX940: ; %bb.0: 6862; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6863; GFX940-NEXT: v_mov_b32_e32 v4, 0 6864; GFX940-NEXT: ;;#ASMSTART 6865; GFX940-NEXT: ; def v[0:1] 6866; GFX940-NEXT: ;;#ASMEND 6867; GFX940-NEXT: ;;#ASMSTART 6868; GFX940-NEXT: ; def v[2:3] 6869; GFX940-NEXT: ;;#ASMEND 6870; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6871; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 6872; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6873; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6874; GFX940-NEXT: s_waitcnt vmcnt(0) 6875; GFX940-NEXT: s_setpc_b64 s[30:31] 6876 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6877 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6878 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 6> 6879 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6880 ret void 6881} 6882 6883define void @v_shuffle_v3i16_v4i16__7_4_6(ptr addrspace(1) inreg %ptr) { 6884; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_6: 6885; GFX900: ; %bb.0: 6886; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6887; GFX900-NEXT: v_mov_b32_e32 v2, 0 6888; GFX900-NEXT: ;;#ASMSTART 6889; GFX900-NEXT: ; def v[0:1] 6890; GFX900-NEXT: ;;#ASMEND 6891; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 6892; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6893; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6894; GFX900-NEXT: s_waitcnt vmcnt(0) 6895; GFX900-NEXT: s_setpc_b64 s[30:31] 6896; 6897; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_6: 6898; GFX90A: ; %bb.0: 6899; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6900; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6901; GFX90A-NEXT: ;;#ASMSTART 6902; GFX90A-NEXT: ; def v[0:1] 6903; GFX90A-NEXT: ;;#ASMEND 6904; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 6905; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6906; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6907; GFX90A-NEXT: s_waitcnt vmcnt(0) 6908; GFX90A-NEXT: s_setpc_b64 s[30:31] 6909; 6910; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_6: 6911; GFX940: ; %bb.0: 6912; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6913; GFX940-NEXT: v_mov_b32_e32 v2, 0 6914; GFX940-NEXT: ;;#ASMSTART 6915; GFX940-NEXT: ; def v[0:1] 6916; GFX940-NEXT: ;;#ASMEND 6917; GFX940-NEXT: s_nop 0 6918; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 6919; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6920; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6921; GFX940-NEXT: s_waitcnt vmcnt(0) 6922; GFX940-NEXT: s_setpc_b64 s[30:31] 6923 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6924 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6925 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 6> 6926 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6927 ret void 6928} 6929 6930define void @v_shuffle_v3i16_v4i16__7_5_6(ptr addrspace(1) inreg %ptr) { 6931; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_6: 6932; GFX900: ; %bb.0: 6933; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6934; GFX900-NEXT: v_mov_b32_e32 v2, 0 6935; GFX900-NEXT: ;;#ASMSTART 6936; GFX900-NEXT: ; def v[0:1] 6937; GFX900-NEXT: ;;#ASMEND 6938; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6939; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 6940; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6941; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6942; GFX900-NEXT: s_waitcnt vmcnt(0) 6943; GFX900-NEXT: s_setpc_b64 s[30:31] 6944; 6945; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_6: 6946; GFX90A: ; %bb.0: 6947; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6948; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6949; GFX90A-NEXT: ;;#ASMSTART 6950; GFX90A-NEXT: ; def v[0:1] 6951; GFX90A-NEXT: ;;#ASMEND 6952; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6953; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 6954; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6955; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6956; GFX90A-NEXT: s_waitcnt vmcnt(0) 6957; GFX90A-NEXT: s_setpc_b64 s[30:31] 6958; 6959; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_6: 6960; GFX940: ; %bb.0: 6961; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6962; GFX940-NEXT: v_mov_b32_e32 v2, 0 6963; GFX940-NEXT: ;;#ASMSTART 6964; GFX940-NEXT: ; def v[0:1] 6965; GFX940-NEXT: ;;#ASMEND 6966; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6967; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 6968; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6969; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6970; GFX940-NEXT: s_waitcnt vmcnt(0) 6971; GFX940-NEXT: s_setpc_b64 s[30:31] 6972 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6973 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6974 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 6> 6975 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6976 ret void 6977} 6978 6979define void @v_shuffle_v3i16_v4i16__u_7_7(ptr addrspace(1) inreg %ptr) { 6980; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_7_7: 6981; GFX900: ; %bb.0: 6982; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6983; GFX900-NEXT: v_mov_b32_e32 v2, 0 6984; GFX900-NEXT: ;;#ASMSTART 6985; GFX900-NEXT: ; def v[0:1] 6986; GFX900-NEXT: ;;#ASMEND 6987; GFX900-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 6988; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 6989; GFX900-NEXT: s_waitcnt vmcnt(0) 6990; GFX900-NEXT: s_setpc_b64 s[30:31] 6991; 6992; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_7_7: 6993; GFX90A: ; %bb.0: 6994; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6995; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6996; GFX90A-NEXT: ;;#ASMSTART 6997; GFX90A-NEXT: ; def v[0:1] 6998; GFX90A-NEXT: ;;#ASMEND 6999; GFX90A-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 7000; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 7001; GFX90A-NEXT: s_waitcnt vmcnt(0) 7002; GFX90A-NEXT: s_setpc_b64 s[30:31] 7003; 7004; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_7_7: 7005; GFX940: ; %bb.0: 7006; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7007; GFX940-NEXT: v_mov_b32_e32 v2, 0 7008; GFX940-NEXT: ;;#ASMSTART 7009; GFX940-NEXT: ; def v[0:1] 7010; GFX940-NEXT: ;;#ASMEND 7011; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 7012; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 7013; GFX940-NEXT: s_waitcnt vmcnt(0) 7014; GFX940-NEXT: s_setpc_b64 s[30:31] 7015 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7016 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7017 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 7, i32 7> 7018 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7019 ret void 7020} 7021 7022define void @v_shuffle_v3i16_v4i16__0_7_7(ptr addrspace(1) inreg %ptr) { 7023; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_7_7: 7024; GFX900: ; %bb.0: 7025; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7026; GFX900-NEXT: ;;#ASMSTART 7027; GFX900-NEXT: ; def v[0:1] 7028; GFX900-NEXT: ;;#ASMEND 7029; GFX900-NEXT: s_mov_b32 s4, 0xffff 7030; GFX900-NEXT: v_mov_b32_e32 v3, 0 7031; GFX900-NEXT: ;;#ASMSTART 7032; GFX900-NEXT: ; def v[1:2] 7033; GFX900-NEXT: ;;#ASMEND 7034; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v2 7035; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 7036; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 7037; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 7038; GFX900-NEXT: s_waitcnt vmcnt(0) 7039; GFX900-NEXT: s_setpc_b64 s[30:31] 7040; 7041; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_7_7: 7042; GFX90A: ; %bb.0: 7043; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7044; GFX90A-NEXT: ;;#ASMSTART 7045; GFX90A-NEXT: ; def v[0:1] 7046; GFX90A-NEXT: ;;#ASMEND 7047; GFX90A-NEXT: s_mov_b32 s4, 0xffff 7048; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7049; GFX90A-NEXT: ;;#ASMSTART 7050; GFX90A-NEXT: ; def v[2:3] 7051; GFX90A-NEXT: ;;#ASMEND 7052; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v3 7053; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7054; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7055; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7056; GFX90A-NEXT: s_waitcnt vmcnt(0) 7057; GFX90A-NEXT: s_setpc_b64 s[30:31] 7058; 7059; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_7_7: 7060; GFX940: ; %bb.0: 7061; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7062; GFX940-NEXT: ;;#ASMSTART 7063; GFX940-NEXT: ; def v[0:1] 7064; GFX940-NEXT: ;;#ASMEND 7065; GFX940-NEXT: s_mov_b32 s2, 0xffff 7066; GFX940-NEXT: v_mov_b32_e32 v4, 0 7067; GFX940-NEXT: ;;#ASMSTART 7068; GFX940-NEXT: ; def v[2:3] 7069; GFX940-NEXT: ;;#ASMEND 7070; GFX940-NEXT: s_nop 0 7071; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 7072; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7073; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7074; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7075; GFX940-NEXT: s_waitcnt vmcnt(0) 7076; GFX940-NEXT: s_setpc_b64 s[30:31] 7077 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7078 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7079 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 7, i32 7> 7080 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7081 ret void 7082} 7083 7084define void @v_shuffle_v3i16_v4i16__1_7_7(ptr addrspace(1) inreg %ptr) { 7085; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_7_7: 7086; GFX900: ; %bb.0: 7087; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7088; GFX900-NEXT: ;;#ASMSTART 7089; GFX900-NEXT: ; def v[0:1] 7090; GFX900-NEXT: ;;#ASMEND 7091; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7092; GFX900-NEXT: v_mov_b32_e32 v3, 0 7093; GFX900-NEXT: ;;#ASMSTART 7094; GFX900-NEXT: ; def v[1:2] 7095; GFX900-NEXT: ;;#ASMEND 7096; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 7097; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 7098; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 7099; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 7100; GFX900-NEXT: s_waitcnt vmcnt(0) 7101; GFX900-NEXT: s_setpc_b64 s[30:31] 7102; 7103; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_7_7: 7104; GFX90A: ; %bb.0: 7105; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7106; GFX90A-NEXT: ;;#ASMSTART 7107; GFX90A-NEXT: ; def v[0:1] 7108; GFX90A-NEXT: ;;#ASMEND 7109; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7110; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7111; GFX90A-NEXT: ;;#ASMSTART 7112; GFX90A-NEXT: ; def v[2:3] 7113; GFX90A-NEXT: ;;#ASMEND 7114; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 7115; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7116; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7117; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7118; GFX90A-NEXT: s_waitcnt vmcnt(0) 7119; GFX90A-NEXT: s_setpc_b64 s[30:31] 7120; 7121; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_7_7: 7122; GFX940: ; %bb.0: 7123; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7124; GFX940-NEXT: ;;#ASMSTART 7125; GFX940-NEXT: ; def v[0:1] 7126; GFX940-NEXT: ;;#ASMEND 7127; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7128; GFX940-NEXT: v_mov_b32_e32 v4, 0 7129; GFX940-NEXT: ;;#ASMSTART 7130; GFX940-NEXT: ; def v[2:3] 7131; GFX940-NEXT: ;;#ASMEND 7132; GFX940-NEXT: s_nop 0 7133; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 7134; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7135; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7136; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7137; GFX940-NEXT: s_waitcnt vmcnt(0) 7138; GFX940-NEXT: s_setpc_b64 s[30:31] 7139 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7140 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7141 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 7, i32 7> 7142 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7143 ret void 7144} 7145 7146define void @v_shuffle_v3i16_v4i16__2_7_7(ptr addrspace(1) inreg %ptr) { 7147; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_7_7: 7148; GFX900: ; %bb.0: 7149; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7150; GFX900-NEXT: ;;#ASMSTART 7151; GFX900-NEXT: ; def v[0:1] 7152; GFX900-NEXT: ;;#ASMEND 7153; GFX900-NEXT: s_mov_b32 s4, 0xffff 7154; GFX900-NEXT: v_mov_b32_e32 v4, 0 7155; GFX900-NEXT: ;;#ASMSTART 7156; GFX900-NEXT: ; def v[2:3] 7157; GFX900-NEXT: ;;#ASMEND 7158; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v3 7159; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7160; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 7161; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7162; GFX900-NEXT: s_waitcnt vmcnt(0) 7163; GFX900-NEXT: s_setpc_b64 s[30:31] 7164; 7165; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_7_7: 7166; GFX90A: ; %bb.0: 7167; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7168; GFX90A-NEXT: ;;#ASMSTART 7169; GFX90A-NEXT: ; def v[0:1] 7170; GFX90A-NEXT: ;;#ASMEND 7171; GFX90A-NEXT: s_mov_b32 s4, 0xffff 7172; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7173; GFX90A-NEXT: ;;#ASMSTART 7174; GFX90A-NEXT: ; def v[2:3] 7175; GFX90A-NEXT: ;;#ASMEND 7176; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v3 7177; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7178; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7179; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7180; GFX90A-NEXT: s_waitcnt vmcnt(0) 7181; GFX90A-NEXT: s_setpc_b64 s[30:31] 7182; 7183; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_7_7: 7184; GFX940: ; %bb.0: 7185; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7186; GFX940-NEXT: ;;#ASMSTART 7187; GFX940-NEXT: ; def v[0:1] 7188; GFX940-NEXT: ;;#ASMEND 7189; GFX940-NEXT: s_mov_b32 s2, 0xffff 7190; GFX940-NEXT: v_mov_b32_e32 v4, 0 7191; GFX940-NEXT: ;;#ASMSTART 7192; GFX940-NEXT: ; def v[2:3] 7193; GFX940-NEXT: ;;#ASMEND 7194; GFX940-NEXT: s_nop 0 7195; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 7196; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7197; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7198; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7199; GFX940-NEXT: s_waitcnt vmcnt(0) 7200; GFX940-NEXT: s_setpc_b64 s[30:31] 7201 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7202 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7203 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 7, i32 7> 7204 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7205 ret void 7206} 7207 7208define void @v_shuffle_v3i16_v4i16__3_7_7(ptr addrspace(1) inreg %ptr) { 7209; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_7_7: 7210; GFX900: ; %bb.0: 7211; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7212; GFX900-NEXT: ;;#ASMSTART 7213; GFX900-NEXT: ; def v[0:1] 7214; GFX900-NEXT: ;;#ASMEND 7215; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7216; GFX900-NEXT: v_mov_b32_e32 v4, 0 7217; GFX900-NEXT: ;;#ASMSTART 7218; GFX900-NEXT: ; def v[2:3] 7219; GFX900-NEXT: ;;#ASMEND 7220; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 7221; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7222; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7223; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 7224; GFX900-NEXT: s_waitcnt vmcnt(0) 7225; GFX900-NEXT: s_setpc_b64 s[30:31] 7226; 7227; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_7_7: 7228; GFX90A: ; %bb.0: 7229; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7230; GFX90A-NEXT: ;;#ASMSTART 7231; GFX90A-NEXT: ; def v[0:1] 7232; GFX90A-NEXT: ;;#ASMEND 7233; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7234; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7235; GFX90A-NEXT: ;;#ASMSTART 7236; GFX90A-NEXT: ; def v[2:3] 7237; GFX90A-NEXT: ;;#ASMEND 7238; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 7239; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7240; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7241; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7242; GFX90A-NEXT: s_waitcnt vmcnt(0) 7243; GFX90A-NEXT: s_setpc_b64 s[30:31] 7244; 7245; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_7_7: 7246; GFX940: ; %bb.0: 7247; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7248; GFX940-NEXT: ;;#ASMSTART 7249; GFX940-NEXT: ; def v[0:1] 7250; GFX940-NEXT: ;;#ASMEND 7251; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7252; GFX940-NEXT: v_mov_b32_e32 v4, 0 7253; GFX940-NEXT: ;;#ASMSTART 7254; GFX940-NEXT: ; def v[2:3] 7255; GFX940-NEXT: ;;#ASMEND 7256; GFX940-NEXT: s_nop 0 7257; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 7258; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7259; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7260; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7261; GFX940-NEXT: s_waitcnt vmcnt(0) 7262; GFX940-NEXT: s_setpc_b64 s[30:31] 7263 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7264 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7265 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 7, i32 7> 7266 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7267 ret void 7268} 7269 7270define void @v_shuffle_v3i16_v4i16__4_7_7(ptr addrspace(1) inreg %ptr) { 7271; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_7_7: 7272; GFX900: ; %bb.0: 7273; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7274; GFX900-NEXT: ;;#ASMSTART 7275; GFX900-NEXT: ; def v[0:1] 7276; GFX900-NEXT: ;;#ASMEND 7277; GFX900-NEXT: s_mov_b32 s4, 0xffff 7278; GFX900-NEXT: v_mov_b32_e32 v2, 0 7279; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 7280; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7281; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 7282; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 7283; GFX900-NEXT: s_waitcnt vmcnt(0) 7284; GFX900-NEXT: s_setpc_b64 s[30:31] 7285; 7286; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_7_7: 7287; GFX90A: ; %bb.0: 7288; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7289; GFX90A-NEXT: ;;#ASMSTART 7290; GFX90A-NEXT: ; def v[0:1] 7291; GFX90A-NEXT: ;;#ASMEND 7292; GFX90A-NEXT: s_mov_b32 s4, 0xffff 7293; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7294; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 7295; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7296; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 7297; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 7298; GFX90A-NEXT: s_waitcnt vmcnt(0) 7299; GFX90A-NEXT: s_setpc_b64 s[30:31] 7300; 7301; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_7_7: 7302; GFX940: ; %bb.0: 7303; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7304; GFX940-NEXT: ;;#ASMSTART 7305; GFX940-NEXT: ; def v[0:1] 7306; GFX940-NEXT: ;;#ASMEND 7307; GFX940-NEXT: s_mov_b32 s2, 0xffff 7308; GFX940-NEXT: v_mov_b32_e32 v2, 0 7309; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 7310; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7311; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7312; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 7313; GFX940-NEXT: s_waitcnt vmcnt(0) 7314; GFX940-NEXT: s_setpc_b64 s[30:31] 7315 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7316 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7317 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 7, i32 7> 7318 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7319 ret void 7320} 7321 7322define void @v_shuffle_v3i16_v4i16__5_7_7(ptr addrspace(1) inreg %ptr) { 7323; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_7_7: 7324; GFX900: ; %bb.0: 7325; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7326; GFX900-NEXT: ;;#ASMSTART 7327; GFX900-NEXT: ; def v[0:1] 7328; GFX900-NEXT: ;;#ASMEND 7329; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7330; GFX900-NEXT: v_mov_b32_e32 v2, 0 7331; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 7332; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7333; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 7334; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 7335; GFX900-NEXT: s_waitcnt vmcnt(0) 7336; GFX900-NEXT: s_setpc_b64 s[30:31] 7337; 7338; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_7_7: 7339; GFX90A: ; %bb.0: 7340; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7341; GFX90A-NEXT: ;;#ASMSTART 7342; GFX90A-NEXT: ; def v[0:1] 7343; GFX90A-NEXT: ;;#ASMEND 7344; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7345; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7346; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 7347; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7348; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 7349; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 7350; GFX90A-NEXT: s_waitcnt vmcnt(0) 7351; GFX90A-NEXT: s_setpc_b64 s[30:31] 7352; 7353; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_7_7: 7354; GFX940: ; %bb.0: 7355; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7356; GFX940-NEXT: ;;#ASMSTART 7357; GFX940-NEXT: ; def v[0:1] 7358; GFX940-NEXT: ;;#ASMEND 7359; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7360; GFX940-NEXT: v_mov_b32_e32 v2, 0 7361; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 7362; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7363; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7364; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 7365; GFX940-NEXT: s_waitcnt vmcnt(0) 7366; GFX940-NEXT: s_setpc_b64 s[30:31] 7367 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7368 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7369 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 7, i32 7> 7370 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7371 ret void 7372} 7373 7374define void @v_shuffle_v3i16_v4i16__6_7_7(ptr addrspace(1) inreg %ptr) { 7375; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_7_7: 7376; GFX900: ; %bb.0: 7377; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7378; GFX900-NEXT: v_mov_b32_e32 v2, 0 7379; GFX900-NEXT: ;;#ASMSTART 7380; GFX900-NEXT: ; def v[0:1] 7381; GFX900-NEXT: ;;#ASMEND 7382; GFX900-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 7383; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 7384; GFX900-NEXT: s_waitcnt vmcnt(0) 7385; GFX900-NEXT: s_setpc_b64 s[30:31] 7386; 7387; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_7_7: 7388; GFX90A: ; %bb.0: 7389; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7390; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7391; GFX90A-NEXT: ;;#ASMSTART 7392; GFX90A-NEXT: ; def v[0:1] 7393; GFX90A-NEXT: ;;#ASMEND 7394; GFX90A-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 7395; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 7396; GFX90A-NEXT: s_waitcnt vmcnt(0) 7397; GFX90A-NEXT: s_setpc_b64 s[30:31] 7398; 7399; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_7_7: 7400; GFX940: ; %bb.0: 7401; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7402; GFX940-NEXT: v_mov_b32_e32 v2, 0 7403; GFX940-NEXT: ;;#ASMSTART 7404; GFX940-NEXT: ; def v[0:1] 7405; GFX940-NEXT: ;;#ASMEND 7406; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 7407; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 7408; GFX940-NEXT: s_waitcnt vmcnt(0) 7409; GFX940-NEXT: s_setpc_b64 s[30:31] 7410 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7411 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7412 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 7, i32 7> 7413 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7414 ret void 7415} 7416 7417define void @v_shuffle_v3i16_v4i16__7_u_7(ptr addrspace(1) inreg %ptr) { 7418; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_7: 7419; GFX900: ; %bb.0: 7420; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7421; GFX900-NEXT: ;;#ASMSTART 7422; GFX900-NEXT: ; def v[0:1] 7423; GFX900-NEXT: ;;#ASMEND 7424; GFX900-NEXT: v_mov_b32_e32 v2, 0 7425; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 7426; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16 7427; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 7428; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 7429; GFX900-NEXT: s_waitcnt vmcnt(0) 7430; GFX900-NEXT: s_setpc_b64 s[30:31] 7431; 7432; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_7: 7433; GFX90A: ; %bb.0: 7434; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7435; GFX90A-NEXT: ;;#ASMSTART 7436; GFX90A-NEXT: ; def v[0:1] 7437; GFX90A-NEXT: ;;#ASMEND 7438; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7439; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v1 7440; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16 7441; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 7442; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 7443; GFX90A-NEXT: s_waitcnt vmcnt(0) 7444; GFX90A-NEXT: s_setpc_b64 s[30:31] 7445; 7446; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_7: 7447; GFX940: ; %bb.0: 7448; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7449; GFX940-NEXT: ;;#ASMSTART 7450; GFX940-NEXT: ; def v[0:1] 7451; GFX940-NEXT: ;;#ASMEND 7452; GFX940-NEXT: v_mov_b32_e32 v2, 0 7453; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 7454; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 7455; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 7456; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 7457; GFX940-NEXT: s_waitcnt vmcnt(0) 7458; GFX940-NEXT: s_setpc_b64 s[30:31] 7459 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7460 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7461 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 7> 7462 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7463 ret void 7464} 7465 7466define void @v_shuffle_v3i16_v4i16__7_0_7(ptr addrspace(1) inreg %ptr) { 7467; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_7: 7468; GFX900: ; %bb.0: 7469; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7470; GFX900-NEXT: ;;#ASMSTART 7471; GFX900-NEXT: ; def v[0:1] 7472; GFX900-NEXT: ;;#ASMEND 7473; GFX900-NEXT: v_mov_b32_e32 v3, 0 7474; GFX900-NEXT: ;;#ASMSTART 7475; GFX900-NEXT: ; def v[1:2] 7476; GFX900-NEXT: ;;#ASMEND 7477; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 7478; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 7479; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 7480; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 7481; GFX900-NEXT: s_waitcnt vmcnt(0) 7482; GFX900-NEXT: s_setpc_b64 s[30:31] 7483; 7484; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_7: 7485; GFX90A: ; %bb.0: 7486; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7487; GFX90A-NEXT: ;;#ASMSTART 7488; GFX90A-NEXT: ; def v[0:1] 7489; GFX90A-NEXT: ;;#ASMEND 7490; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7491; GFX90A-NEXT: ;;#ASMSTART 7492; GFX90A-NEXT: ; def v[2:3] 7493; GFX90A-NEXT: ;;#ASMEND 7494; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 7495; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7496; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7497; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7498; GFX90A-NEXT: s_waitcnt vmcnt(0) 7499; GFX90A-NEXT: s_setpc_b64 s[30:31] 7500; 7501; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_7: 7502; GFX940: ; %bb.0: 7503; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7504; GFX940-NEXT: ;;#ASMSTART 7505; GFX940-NEXT: ; def v[0:1] 7506; GFX940-NEXT: ;;#ASMEND 7507; GFX940-NEXT: v_mov_b32_e32 v4, 0 7508; GFX940-NEXT: ;;#ASMSTART 7509; GFX940-NEXT: ; def v[2:3] 7510; GFX940-NEXT: ;;#ASMEND 7511; GFX940-NEXT: s_nop 0 7512; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 7513; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7514; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7515; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7516; GFX940-NEXT: s_waitcnt vmcnt(0) 7517; GFX940-NEXT: s_setpc_b64 s[30:31] 7518 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7519 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7520 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 7> 7521 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7522 ret void 7523} 7524 7525define void @v_shuffle_v3i16_v4i16__7_1_7(ptr addrspace(1) inreg %ptr) { 7526; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_7: 7527; GFX900: ; %bb.0: 7528; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7529; GFX900-NEXT: ;;#ASMSTART 7530; GFX900-NEXT: ; def v[0:1] 7531; GFX900-NEXT: ;;#ASMEND 7532; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7533; GFX900-NEXT: v_mov_b32_e32 v3, 0 7534; GFX900-NEXT: ;;#ASMSTART 7535; GFX900-NEXT: ; def v[1:2] 7536; GFX900-NEXT: ;;#ASMEND 7537; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 7538; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 7539; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 7540; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 7541; GFX900-NEXT: s_waitcnt vmcnt(0) 7542; GFX900-NEXT: s_setpc_b64 s[30:31] 7543; 7544; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_7: 7545; GFX90A: ; %bb.0: 7546; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7547; GFX90A-NEXT: ;;#ASMSTART 7548; GFX90A-NEXT: ; def v[0:1] 7549; GFX90A-NEXT: ;;#ASMEND 7550; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7551; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7552; GFX90A-NEXT: ;;#ASMSTART 7553; GFX90A-NEXT: ; def v[2:3] 7554; GFX90A-NEXT: ;;#ASMEND 7555; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 7556; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7557; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7558; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7559; GFX90A-NEXT: s_waitcnt vmcnt(0) 7560; GFX90A-NEXT: s_setpc_b64 s[30:31] 7561; 7562; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_7: 7563; GFX940: ; %bb.0: 7564; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7565; GFX940-NEXT: ;;#ASMSTART 7566; GFX940-NEXT: ; def v[0:1] 7567; GFX940-NEXT: ;;#ASMEND 7568; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7569; GFX940-NEXT: v_mov_b32_e32 v4, 0 7570; GFX940-NEXT: ;;#ASMSTART 7571; GFX940-NEXT: ; def v[2:3] 7572; GFX940-NEXT: ;;#ASMEND 7573; GFX940-NEXT: s_nop 0 7574; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 7575; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7576; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7577; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7578; GFX940-NEXT: s_waitcnt vmcnt(0) 7579; GFX940-NEXT: s_setpc_b64 s[30:31] 7580 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7581 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7582 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 7> 7583 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7584 ret void 7585} 7586 7587define void @v_shuffle_v3i16_v4i16__7_2_7(ptr addrspace(1) inreg %ptr) { 7588; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_7: 7589; GFX900: ; %bb.0: 7590; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7591; GFX900-NEXT: ;;#ASMSTART 7592; GFX900-NEXT: ; def v[0:1] 7593; GFX900-NEXT: ;;#ASMEND 7594; GFX900-NEXT: v_mov_b32_e32 v4, 0 7595; GFX900-NEXT: ;;#ASMSTART 7596; GFX900-NEXT: ; def v[2:3] 7597; GFX900-NEXT: ;;#ASMEND 7598; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 7599; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7600; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 7601; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7602; GFX900-NEXT: s_waitcnt vmcnt(0) 7603; GFX900-NEXT: s_setpc_b64 s[30:31] 7604; 7605; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_7: 7606; GFX90A: ; %bb.0: 7607; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7608; GFX90A-NEXT: ;;#ASMSTART 7609; GFX90A-NEXT: ; def v[0:1] 7610; GFX90A-NEXT: ;;#ASMEND 7611; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7612; GFX90A-NEXT: ;;#ASMSTART 7613; GFX90A-NEXT: ; def v[2:3] 7614; GFX90A-NEXT: ;;#ASMEND 7615; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 7616; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7617; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7618; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7619; GFX90A-NEXT: s_waitcnt vmcnt(0) 7620; GFX90A-NEXT: s_setpc_b64 s[30:31] 7621; 7622; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_7: 7623; GFX940: ; %bb.0: 7624; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7625; GFX940-NEXT: ;;#ASMSTART 7626; GFX940-NEXT: ; def v[0:1] 7627; GFX940-NEXT: ;;#ASMEND 7628; GFX940-NEXT: v_mov_b32_e32 v4, 0 7629; GFX940-NEXT: ;;#ASMSTART 7630; GFX940-NEXT: ; def v[2:3] 7631; GFX940-NEXT: ;;#ASMEND 7632; GFX940-NEXT: s_nop 0 7633; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 7634; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7635; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7636; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7637; GFX940-NEXT: s_waitcnt vmcnt(0) 7638; GFX940-NEXT: s_setpc_b64 s[30:31] 7639 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7640 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7641 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 7> 7642 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7643 ret void 7644} 7645 7646define void @v_shuffle_v3i16_v4i16__7_3_7(ptr addrspace(1) inreg %ptr) { 7647; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_7: 7648; GFX900: ; %bb.0: 7649; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7650; GFX900-NEXT: ;;#ASMSTART 7651; GFX900-NEXT: ; def v[0:1] 7652; GFX900-NEXT: ;;#ASMEND 7653; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7654; GFX900-NEXT: v_mov_b32_e32 v4, 0 7655; GFX900-NEXT: ;;#ASMSTART 7656; GFX900-NEXT: ; def v[2:3] 7657; GFX900-NEXT: ;;#ASMEND 7658; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 7659; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7660; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7661; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 7662; GFX900-NEXT: s_waitcnt vmcnt(0) 7663; GFX900-NEXT: s_setpc_b64 s[30:31] 7664; 7665; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_7: 7666; GFX90A: ; %bb.0: 7667; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7668; GFX90A-NEXT: ;;#ASMSTART 7669; GFX90A-NEXT: ; def v[0:1] 7670; GFX90A-NEXT: ;;#ASMEND 7671; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7672; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7673; GFX90A-NEXT: ;;#ASMSTART 7674; GFX90A-NEXT: ; def v[2:3] 7675; GFX90A-NEXT: ;;#ASMEND 7676; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 7677; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7678; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7679; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7680; GFX90A-NEXT: s_waitcnt vmcnt(0) 7681; GFX90A-NEXT: s_setpc_b64 s[30:31] 7682; 7683; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_7: 7684; GFX940: ; %bb.0: 7685; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7686; GFX940-NEXT: ;;#ASMSTART 7687; GFX940-NEXT: ; def v[0:1] 7688; GFX940-NEXT: ;;#ASMEND 7689; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7690; GFX940-NEXT: v_mov_b32_e32 v4, 0 7691; GFX940-NEXT: ;;#ASMSTART 7692; GFX940-NEXT: ; def v[2:3] 7693; GFX940-NEXT: ;;#ASMEND 7694; GFX940-NEXT: s_nop 0 7695; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 7696; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7697; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7698; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7699; GFX940-NEXT: s_waitcnt vmcnt(0) 7700; GFX940-NEXT: s_setpc_b64 s[30:31] 7701 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7702 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7703 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 7> 7704 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7705 ret void 7706} 7707 7708define void @v_shuffle_v3i16_v4i16__7_4_7(ptr addrspace(1) inreg %ptr) { 7709; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_7: 7710; GFX900: ; %bb.0: 7711; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7712; GFX900-NEXT: ;;#ASMSTART 7713; GFX900-NEXT: ; def v[0:1] 7714; GFX900-NEXT: ;;#ASMEND 7715; GFX900-NEXT: v_mov_b32_e32 v2, 0 7716; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 7717; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v1 7718; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 7719; GFX900-NEXT: global_store_short v2, v3, s[16:17] offset:4 7720; GFX900-NEXT: s_waitcnt vmcnt(0) 7721; GFX900-NEXT: s_setpc_b64 s[30:31] 7722; 7723; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_7: 7724; GFX90A: ; %bb.0: 7725; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7726; GFX90A-NEXT: ;;#ASMSTART 7727; GFX90A-NEXT: ; def v[0:1] 7728; GFX90A-NEXT: ;;#ASMEND 7729; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7730; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 7731; GFX90A-NEXT: v_lshrrev_b32_e32 v3, 16, v1 7732; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 7733; GFX90A-NEXT: global_store_short v2, v3, s[16:17] offset:4 7734; GFX90A-NEXT: s_waitcnt vmcnt(0) 7735; GFX90A-NEXT: s_setpc_b64 s[30:31] 7736; 7737; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_7: 7738; GFX940: ; %bb.0: 7739; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7740; GFX940-NEXT: ;;#ASMSTART 7741; GFX940-NEXT: ; def v[0:1] 7742; GFX940-NEXT: ;;#ASMEND 7743; GFX940-NEXT: v_mov_b32_e32 v2, 0 7744; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 7745; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v1 7746; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7747; GFX940-NEXT: global_store_short v2, v3, s[0:1] offset:4 sc0 sc1 7748; GFX940-NEXT: s_waitcnt vmcnt(0) 7749; GFX940-NEXT: s_setpc_b64 s[30:31] 7750 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7751 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7752 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 7> 7753 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7754 ret void 7755} 7756 7757define void @v_shuffle_v3i16_v4i16__7_5_7(ptr addrspace(1) inreg %ptr) { 7758; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_7: 7759; GFX900: ; %bb.0: 7760; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7761; GFX900-NEXT: ;;#ASMSTART 7762; GFX900-NEXT: ; def v[0:1] 7763; GFX900-NEXT: ;;#ASMEND 7764; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7765; GFX900-NEXT: v_mov_b32_e32 v2, 0 7766; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 7767; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7768; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 7769; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 7770; GFX900-NEXT: s_waitcnt vmcnt(0) 7771; GFX900-NEXT: s_setpc_b64 s[30:31] 7772; 7773; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_7: 7774; GFX90A: ; %bb.0: 7775; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7776; GFX90A-NEXT: ;;#ASMSTART 7777; GFX90A-NEXT: ; def v[0:1] 7778; GFX90A-NEXT: ;;#ASMEND 7779; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7780; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7781; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 7782; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7783; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 7784; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 7785; GFX90A-NEXT: s_waitcnt vmcnt(0) 7786; GFX90A-NEXT: s_setpc_b64 s[30:31] 7787; 7788; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_7: 7789; GFX940: ; %bb.0: 7790; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7791; GFX940-NEXT: ;;#ASMSTART 7792; GFX940-NEXT: ; def v[0:1] 7793; GFX940-NEXT: ;;#ASMEND 7794; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7795; GFX940-NEXT: v_mov_b32_e32 v2, 0 7796; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 7797; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7798; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7799; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 7800; GFX940-NEXT: s_waitcnt vmcnt(0) 7801; GFX940-NEXT: s_setpc_b64 s[30:31] 7802 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7803 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7804 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 7> 7805 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7806 ret void 7807} 7808 7809define void @v_shuffle_v3i16_v4i16__7_6_7(ptr addrspace(1) inreg %ptr) { 7810; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_7: 7811; GFX900: ; %bb.0: 7812; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7813; GFX900-NEXT: ;;#ASMSTART 7814; GFX900-NEXT: ; def v[0:1] 7815; GFX900-NEXT: ;;#ASMEND 7816; GFX900-NEXT: v_mov_b32_e32 v2, 0 7817; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 7818; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7819; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 7820; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 7821; GFX900-NEXT: s_waitcnt vmcnt(0) 7822; GFX900-NEXT: s_setpc_b64 s[30:31] 7823; 7824; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_7: 7825; GFX90A: ; %bb.0: 7826; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7827; GFX90A-NEXT: ;;#ASMSTART 7828; GFX90A-NEXT: ; def v[0:1] 7829; GFX90A-NEXT: ;;#ASMEND 7830; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7831; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 7832; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7833; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 7834; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 7835; GFX90A-NEXT: s_waitcnt vmcnt(0) 7836; GFX90A-NEXT: s_setpc_b64 s[30:31] 7837; 7838; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_7: 7839; GFX940: ; %bb.0: 7840; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7841; GFX940-NEXT: ;;#ASMSTART 7842; GFX940-NEXT: ; def v[0:1] 7843; GFX940-NEXT: ;;#ASMEND 7844; GFX940-NEXT: v_mov_b32_e32 v2, 0 7845; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 7846; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7847; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 7848; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7849; GFX940-NEXT: s_waitcnt vmcnt(0) 7850; GFX940-NEXT: s_setpc_b64 s[30:31] 7851 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7852 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7853 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 7> 7854 store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7855 ret void 7856} 7857 7858define void @s_shuffle_v3i16_v4i16__u_u_u() { 7859; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_u_u: 7860; GFX9: ; %bb.0: 7861; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7862; GFX9-NEXT: ;;#ASMSTART 7863; GFX9-NEXT: ; use s[8:9] 7864; GFX9-NEXT: ;;#ASMEND 7865; GFX9-NEXT: s_setpc_b64 s[30:31] 7866 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7867 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> poison 7868 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7869 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7870 ret void 7871} 7872 7873define void @s_shuffle_v3i16_v4i16__0_u_u() { 7874; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_u_u: 7875; GFX900: ; %bb.0: 7876; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7877; GFX900-NEXT: ;;#ASMSTART 7878; GFX900-NEXT: ; def s[8:9] 7879; GFX900-NEXT: ;;#ASMEND 7880; GFX900-NEXT: ;;#ASMSTART 7881; GFX900-NEXT: ; use s[8:9] 7882; GFX900-NEXT: ;;#ASMEND 7883; GFX900-NEXT: s_setpc_b64 s[30:31] 7884; 7885; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_u_u: 7886; GFX90A: ; %bb.0: 7887; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7888; GFX90A-NEXT: ;;#ASMSTART 7889; GFX90A-NEXT: ; def s[8:9] 7890; GFX90A-NEXT: ;;#ASMEND 7891; GFX90A-NEXT: ;;#ASMSTART 7892; GFX90A-NEXT: ; use s[8:9] 7893; GFX90A-NEXT: ;;#ASMEND 7894; GFX90A-NEXT: s_setpc_b64 s[30:31] 7895; 7896; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_u_u: 7897; GFX940: ; %bb.0: 7898; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7899; GFX940-NEXT: ;;#ASMSTART 7900; GFX940-NEXT: ; def s[8:9] 7901; GFX940-NEXT: ;;#ASMEND 7902; GFX940-NEXT: s_nop 0 7903; GFX940-NEXT: ;;#ASMSTART 7904; GFX940-NEXT: ; use s[8:9] 7905; GFX940-NEXT: ;;#ASMEND 7906; GFX940-NEXT: s_setpc_b64 s[30:31] 7907 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7908 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 poison, i32 poison> 7909 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7910 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7911 ret void 7912} 7913 7914define void @s_shuffle_v3i16_v4i16__1_u_u() { 7915; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_u_u: 7916; GFX900: ; %bb.0: 7917; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7918; GFX900-NEXT: ;;#ASMSTART 7919; GFX900-NEXT: ; def s[4:5] 7920; GFX900-NEXT: ;;#ASMEND 7921; GFX900-NEXT: s_lshr_b32 s8, s4, 16 7922; GFX900-NEXT: ;;#ASMSTART 7923; GFX900-NEXT: ; use s[8:9] 7924; GFX900-NEXT: ;;#ASMEND 7925; GFX900-NEXT: s_setpc_b64 s[30:31] 7926; 7927; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_u_u: 7928; GFX90A: ; %bb.0: 7929; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7930; GFX90A-NEXT: ;;#ASMSTART 7931; GFX90A-NEXT: ; def s[4:5] 7932; GFX90A-NEXT: ;;#ASMEND 7933; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 7934; GFX90A-NEXT: ;;#ASMSTART 7935; GFX90A-NEXT: ; use s[8:9] 7936; GFX90A-NEXT: ;;#ASMEND 7937; GFX90A-NEXT: s_setpc_b64 s[30:31] 7938; 7939; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_u_u: 7940; GFX940: ; %bb.0: 7941; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7942; GFX940-NEXT: ;;#ASMSTART 7943; GFX940-NEXT: ; def s[0:1] 7944; GFX940-NEXT: ;;#ASMEND 7945; GFX940-NEXT: s_lshr_b32 s8, s0, 16 7946; GFX940-NEXT: ;;#ASMSTART 7947; GFX940-NEXT: ; use s[8:9] 7948; GFX940-NEXT: ;;#ASMEND 7949; GFX940-NEXT: s_setpc_b64 s[30:31] 7950 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7951 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 poison, i32 poison> 7952 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7953 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7954 ret void 7955} 7956 7957define void @s_shuffle_v3i16_v4i16__2_u_u() { 7958; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_u_u: 7959; GFX900: ; %bb.0: 7960; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7961; GFX900-NEXT: ;;#ASMSTART 7962; GFX900-NEXT: ; def s[4:5] 7963; GFX900-NEXT: ;;#ASMEND 7964; GFX900-NEXT: s_mov_b32 s8, s5 7965; GFX900-NEXT: ;;#ASMSTART 7966; GFX900-NEXT: ; use s[8:9] 7967; GFX900-NEXT: ;;#ASMEND 7968; GFX900-NEXT: s_setpc_b64 s[30:31] 7969; 7970; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_u_u: 7971; GFX90A: ; %bb.0: 7972; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7973; GFX90A-NEXT: ;;#ASMSTART 7974; GFX90A-NEXT: ; def s[4:5] 7975; GFX90A-NEXT: ;;#ASMEND 7976; GFX90A-NEXT: s_mov_b32 s8, s5 7977; GFX90A-NEXT: ;;#ASMSTART 7978; GFX90A-NEXT: ; use s[8:9] 7979; GFX90A-NEXT: ;;#ASMEND 7980; GFX90A-NEXT: s_setpc_b64 s[30:31] 7981; 7982; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_u_u: 7983; GFX940: ; %bb.0: 7984; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7985; GFX940-NEXT: ;;#ASMSTART 7986; GFX940-NEXT: ; def s[0:1] 7987; GFX940-NEXT: ;;#ASMEND 7988; GFX940-NEXT: s_mov_b32 s8, s1 7989; GFX940-NEXT: ;;#ASMSTART 7990; GFX940-NEXT: ; use s[8:9] 7991; GFX940-NEXT: ;;#ASMEND 7992; GFX940-NEXT: s_setpc_b64 s[30:31] 7993 %vec0 = call <4 x i16> asm "; def $0", "=s"() 7994 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 poison, i32 poison> 7995 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7996 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 7997 ret void 7998} 7999 8000define void @s_shuffle_v3i16_v4i16__3_u_u() { 8001; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_u_u: 8002; GFX900: ; %bb.0: 8003; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8004; GFX900-NEXT: ;;#ASMSTART 8005; GFX900-NEXT: ; def s[4:5] 8006; GFX900-NEXT: ;;#ASMEND 8007; GFX900-NEXT: s_lshr_b32 s8, s5, 16 8008; GFX900-NEXT: ;;#ASMSTART 8009; GFX900-NEXT: ; use s[8:9] 8010; GFX900-NEXT: ;;#ASMEND 8011; GFX900-NEXT: s_setpc_b64 s[30:31] 8012; 8013; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_u_u: 8014; GFX90A: ; %bb.0: 8015; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8016; GFX90A-NEXT: ;;#ASMSTART 8017; GFX90A-NEXT: ; def s[4:5] 8018; GFX90A-NEXT: ;;#ASMEND 8019; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 8020; GFX90A-NEXT: ;;#ASMSTART 8021; GFX90A-NEXT: ; use s[8:9] 8022; GFX90A-NEXT: ;;#ASMEND 8023; GFX90A-NEXT: s_setpc_b64 s[30:31] 8024; 8025; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_u_u: 8026; GFX940: ; %bb.0: 8027; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8028; GFX940-NEXT: ;;#ASMSTART 8029; GFX940-NEXT: ; def s[0:1] 8030; GFX940-NEXT: ;;#ASMEND 8031; GFX940-NEXT: s_lshr_b32 s8, s1, 16 8032; GFX940-NEXT: ;;#ASMSTART 8033; GFX940-NEXT: ; use s[8:9] 8034; GFX940-NEXT: ;;#ASMEND 8035; GFX940-NEXT: s_setpc_b64 s[30:31] 8036 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8037 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 poison, i32 poison> 8038 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8039 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8040 ret void 8041} 8042 8043define void @s_shuffle_v3i16_v4i16__4_u_u() { 8044; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_u_u: 8045; GFX9: ; %bb.0: 8046; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8047; GFX9-NEXT: ;;#ASMSTART 8048; GFX9-NEXT: ; use s[8:9] 8049; GFX9-NEXT: ;;#ASMEND 8050; GFX9-NEXT: s_setpc_b64 s[30:31] 8051 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8052 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 poison, i32 poison> 8053 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8054 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8055 ret void 8056} 8057 8058define void @s_shuffle_v3i16_v4i16__5_u_u() { 8059; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_u_u: 8060; GFX900: ; %bb.0: 8061; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8062; GFX900-NEXT: ;;#ASMSTART 8063; GFX900-NEXT: ; def s[4:5] 8064; GFX900-NEXT: ;;#ASMEND 8065; GFX900-NEXT: s_lshr_b32 s8, s4, 16 8066; GFX900-NEXT: ;;#ASMSTART 8067; GFX900-NEXT: ; use s[8:9] 8068; GFX900-NEXT: ;;#ASMEND 8069; GFX900-NEXT: s_setpc_b64 s[30:31] 8070; 8071; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_u_u: 8072; GFX90A: ; %bb.0: 8073; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8074; GFX90A-NEXT: ;;#ASMSTART 8075; GFX90A-NEXT: ; def s[4:5] 8076; GFX90A-NEXT: ;;#ASMEND 8077; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 8078; GFX90A-NEXT: ;;#ASMSTART 8079; GFX90A-NEXT: ; use s[8:9] 8080; GFX90A-NEXT: ;;#ASMEND 8081; GFX90A-NEXT: s_setpc_b64 s[30:31] 8082; 8083; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_u_u: 8084; GFX940: ; %bb.0: 8085; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8086; GFX940-NEXT: ;;#ASMSTART 8087; GFX940-NEXT: ; def s[0:1] 8088; GFX940-NEXT: ;;#ASMEND 8089; GFX940-NEXT: s_lshr_b32 s8, s0, 16 8090; GFX940-NEXT: ;;#ASMSTART 8091; GFX940-NEXT: ; use s[8:9] 8092; GFX940-NEXT: ;;#ASMEND 8093; GFX940-NEXT: s_setpc_b64 s[30:31] 8094 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8095 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8096 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison> 8097 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8098 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8099 ret void 8100} 8101 8102define void @s_shuffle_v3i16_v4i16__6_u_u() { 8103; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_u_u: 8104; GFX900: ; %bb.0: 8105; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8106; GFX900-NEXT: ;;#ASMSTART 8107; GFX900-NEXT: ; def s[4:5] 8108; GFX900-NEXT: ;;#ASMEND 8109; GFX900-NEXT: s_mov_b32 s8, s5 8110; GFX900-NEXT: ;;#ASMSTART 8111; GFX900-NEXT: ; use s[8:9] 8112; GFX900-NEXT: ;;#ASMEND 8113; GFX900-NEXT: s_setpc_b64 s[30:31] 8114; 8115; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_u_u: 8116; GFX90A: ; %bb.0: 8117; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8118; GFX90A-NEXT: ;;#ASMSTART 8119; GFX90A-NEXT: ; def s[4:5] 8120; GFX90A-NEXT: ;;#ASMEND 8121; GFX90A-NEXT: s_mov_b32 s8, s5 8122; GFX90A-NEXT: ;;#ASMSTART 8123; GFX90A-NEXT: ; use s[8:9] 8124; GFX90A-NEXT: ;;#ASMEND 8125; GFX90A-NEXT: s_setpc_b64 s[30:31] 8126; 8127; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_u_u: 8128; GFX940: ; %bb.0: 8129; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8130; GFX940-NEXT: ;;#ASMSTART 8131; GFX940-NEXT: ; def s[0:1] 8132; GFX940-NEXT: ;;#ASMEND 8133; GFX940-NEXT: s_mov_b32 s8, s1 8134; GFX940-NEXT: ;;#ASMSTART 8135; GFX940-NEXT: ; use s[8:9] 8136; GFX940-NEXT: ;;#ASMEND 8137; GFX940-NEXT: s_setpc_b64 s[30:31] 8138 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8139 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8140 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison> 8141 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8142 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8143 ret void 8144} 8145 8146define void @s_shuffle_v3i16_v4i16__7_u_u() { 8147; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_u: 8148; GFX900: ; %bb.0: 8149; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8150; GFX900-NEXT: ;;#ASMSTART 8151; GFX900-NEXT: ; def s[4:5] 8152; GFX900-NEXT: ;;#ASMEND 8153; GFX900-NEXT: s_lshr_b32 s8, s5, 16 8154; GFX900-NEXT: ;;#ASMSTART 8155; GFX900-NEXT: ; use s[8:9] 8156; GFX900-NEXT: ;;#ASMEND 8157; GFX900-NEXT: s_setpc_b64 s[30:31] 8158; 8159; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_u: 8160; GFX90A: ; %bb.0: 8161; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8162; GFX90A-NEXT: ;;#ASMSTART 8163; GFX90A-NEXT: ; def s[4:5] 8164; GFX90A-NEXT: ;;#ASMEND 8165; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 8166; GFX90A-NEXT: ;;#ASMSTART 8167; GFX90A-NEXT: ; use s[8:9] 8168; GFX90A-NEXT: ;;#ASMEND 8169; GFX90A-NEXT: s_setpc_b64 s[30:31] 8170; 8171; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_u: 8172; GFX940: ; %bb.0: 8173; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8174; GFX940-NEXT: ;;#ASMSTART 8175; GFX940-NEXT: ; def s[0:1] 8176; GFX940-NEXT: ;;#ASMEND 8177; GFX940-NEXT: s_lshr_b32 s8, s1, 16 8178; GFX940-NEXT: ;;#ASMSTART 8179; GFX940-NEXT: ; use s[8:9] 8180; GFX940-NEXT: ;;#ASMEND 8181; GFX940-NEXT: s_setpc_b64 s[30:31] 8182 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8183 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8184 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison> 8185 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8186 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8187 ret void 8188} 8189 8190define void @s_shuffle_v3i16_v4i16__7_0_u() { 8191; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_u: 8192; GFX900: ; %bb.0: 8193; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8194; GFX900-NEXT: ;;#ASMSTART 8195; GFX900-NEXT: ; def s[4:5] 8196; GFX900-NEXT: ;;#ASMEND 8197; GFX900-NEXT: ;;#ASMSTART 8198; GFX900-NEXT: ; def s[6:7] 8199; GFX900-NEXT: ;;#ASMEND 8200; GFX900-NEXT: s_lshr_b32 s5, s7, 16 8201; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8202; GFX900-NEXT: ;;#ASMSTART 8203; GFX900-NEXT: ; use s[8:9] 8204; GFX900-NEXT: ;;#ASMEND 8205; GFX900-NEXT: s_setpc_b64 s[30:31] 8206; 8207; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_u: 8208; GFX90A: ; %bb.0: 8209; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8210; GFX90A-NEXT: ;;#ASMSTART 8211; GFX90A-NEXT: ; def s[4:5] 8212; GFX90A-NEXT: ;;#ASMEND 8213; GFX90A-NEXT: ;;#ASMSTART 8214; GFX90A-NEXT: ; def s[6:7] 8215; GFX90A-NEXT: ;;#ASMEND 8216; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 8217; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8218; GFX90A-NEXT: ;;#ASMSTART 8219; GFX90A-NEXT: ; use s[8:9] 8220; GFX90A-NEXT: ;;#ASMEND 8221; GFX90A-NEXT: s_setpc_b64 s[30:31] 8222; 8223; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_u: 8224; GFX940: ; %bb.0: 8225; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8226; GFX940-NEXT: ;;#ASMSTART 8227; GFX940-NEXT: ; def s[0:1] 8228; GFX940-NEXT: ;;#ASMEND 8229; GFX940-NEXT: ;;#ASMSTART 8230; GFX940-NEXT: ; def s[2:3] 8231; GFX940-NEXT: ;;#ASMEND 8232; GFX940-NEXT: s_lshr_b32 s1, s3, 16 8233; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 8234; GFX940-NEXT: ;;#ASMSTART 8235; GFX940-NEXT: ; use s[8:9] 8236; GFX940-NEXT: ;;#ASMEND 8237; GFX940-NEXT: s_setpc_b64 s[30:31] 8238 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8239 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8240 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 poison> 8241 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8242 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8243 ret void 8244} 8245 8246define void @s_shuffle_v3i16_v4i16__7_1_u() { 8247; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_u: 8248; GFX900: ; %bb.0: 8249; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8250; GFX900-NEXT: ;;#ASMSTART 8251; GFX900-NEXT: ; def s[4:5] 8252; GFX900-NEXT: ;;#ASMEND 8253; GFX900-NEXT: ;;#ASMSTART 8254; GFX900-NEXT: ; def s[6:7] 8255; GFX900-NEXT: ;;#ASMEND 8256; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 8257; GFX900-NEXT: ;;#ASMSTART 8258; GFX900-NEXT: ; use s[8:9] 8259; GFX900-NEXT: ;;#ASMEND 8260; GFX900-NEXT: s_setpc_b64 s[30:31] 8261; 8262; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_u: 8263; GFX90A: ; %bb.0: 8264; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8265; GFX90A-NEXT: ;;#ASMSTART 8266; GFX90A-NEXT: ; def s[4:5] 8267; GFX90A-NEXT: ;;#ASMEND 8268; GFX90A-NEXT: ;;#ASMSTART 8269; GFX90A-NEXT: ; def s[6:7] 8270; GFX90A-NEXT: ;;#ASMEND 8271; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 8272; GFX90A-NEXT: ;;#ASMSTART 8273; GFX90A-NEXT: ; use s[8:9] 8274; GFX90A-NEXT: ;;#ASMEND 8275; GFX90A-NEXT: s_setpc_b64 s[30:31] 8276; 8277; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_u: 8278; GFX940: ; %bb.0: 8279; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8280; GFX940-NEXT: ;;#ASMSTART 8281; GFX940-NEXT: ; def s[0:1] 8282; GFX940-NEXT: ;;#ASMEND 8283; GFX940-NEXT: ;;#ASMSTART 8284; GFX940-NEXT: ; def s[2:3] 8285; GFX940-NEXT: ;;#ASMEND 8286; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 8287; GFX940-NEXT: ;;#ASMSTART 8288; GFX940-NEXT: ; use s[8:9] 8289; GFX940-NEXT: ;;#ASMEND 8290; GFX940-NEXT: s_setpc_b64 s[30:31] 8291 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8292 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8293 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 poison> 8294 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8295 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8296 ret void 8297} 8298 8299define void @s_shuffle_v3i16_v4i16__7_2_u() { 8300; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_u: 8301; GFX900: ; %bb.0: 8302; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8303; GFX900-NEXT: ;;#ASMSTART 8304; GFX900-NEXT: ; def s[4:5] 8305; GFX900-NEXT: ;;#ASMEND 8306; GFX900-NEXT: ;;#ASMSTART 8307; GFX900-NEXT: ; def s[6:7] 8308; GFX900-NEXT: ;;#ASMEND 8309; GFX900-NEXT: s_lshr_b32 s4, s7, 16 8310; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 8311; GFX900-NEXT: ;;#ASMSTART 8312; GFX900-NEXT: ; use s[8:9] 8313; GFX900-NEXT: ;;#ASMEND 8314; GFX900-NEXT: s_setpc_b64 s[30:31] 8315; 8316; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_u: 8317; GFX90A: ; %bb.0: 8318; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8319; GFX90A-NEXT: ;;#ASMSTART 8320; GFX90A-NEXT: ; def s[4:5] 8321; GFX90A-NEXT: ;;#ASMEND 8322; GFX90A-NEXT: ;;#ASMSTART 8323; GFX90A-NEXT: ; def s[6:7] 8324; GFX90A-NEXT: ;;#ASMEND 8325; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 8326; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 8327; GFX90A-NEXT: ;;#ASMSTART 8328; GFX90A-NEXT: ; use s[8:9] 8329; GFX90A-NEXT: ;;#ASMEND 8330; GFX90A-NEXT: s_setpc_b64 s[30:31] 8331; 8332; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_u: 8333; GFX940: ; %bb.0: 8334; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8335; GFX940-NEXT: ;;#ASMSTART 8336; GFX940-NEXT: ; def s[0:1] 8337; GFX940-NEXT: ;;#ASMEND 8338; GFX940-NEXT: ;;#ASMSTART 8339; GFX940-NEXT: ; def s[2:3] 8340; GFX940-NEXT: ;;#ASMEND 8341; GFX940-NEXT: s_lshr_b32 s0, s3, 16 8342; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 8343; GFX940-NEXT: ;;#ASMSTART 8344; GFX940-NEXT: ; use s[8:9] 8345; GFX940-NEXT: ;;#ASMEND 8346; GFX940-NEXT: s_setpc_b64 s[30:31] 8347 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8348 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8349 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 poison> 8350 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8351 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8352 ret void 8353} 8354 8355define void @s_shuffle_v3i16_v4i16__7_3_u() { 8356; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_u: 8357; GFX900: ; %bb.0: 8358; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8359; GFX900-NEXT: ;;#ASMSTART 8360; GFX900-NEXT: ; def s[4:5] 8361; GFX900-NEXT: ;;#ASMEND 8362; GFX900-NEXT: ;;#ASMSTART 8363; GFX900-NEXT: ; def s[6:7] 8364; GFX900-NEXT: ;;#ASMEND 8365; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 8366; GFX900-NEXT: ;;#ASMSTART 8367; GFX900-NEXT: ; use s[8:9] 8368; GFX900-NEXT: ;;#ASMEND 8369; GFX900-NEXT: s_setpc_b64 s[30:31] 8370; 8371; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_u: 8372; GFX90A: ; %bb.0: 8373; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8374; GFX90A-NEXT: ;;#ASMSTART 8375; GFX90A-NEXT: ; def s[4:5] 8376; GFX90A-NEXT: ;;#ASMEND 8377; GFX90A-NEXT: ;;#ASMSTART 8378; GFX90A-NEXT: ; def s[6:7] 8379; GFX90A-NEXT: ;;#ASMEND 8380; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 8381; GFX90A-NEXT: ;;#ASMSTART 8382; GFX90A-NEXT: ; use s[8:9] 8383; GFX90A-NEXT: ;;#ASMEND 8384; GFX90A-NEXT: s_setpc_b64 s[30:31] 8385; 8386; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_u: 8387; GFX940: ; %bb.0: 8388; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8389; GFX940-NEXT: ;;#ASMSTART 8390; GFX940-NEXT: ; def s[0:1] 8391; GFX940-NEXT: ;;#ASMEND 8392; GFX940-NEXT: ;;#ASMSTART 8393; GFX940-NEXT: ; def s[2:3] 8394; GFX940-NEXT: ;;#ASMEND 8395; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 8396; GFX940-NEXT: ;;#ASMSTART 8397; GFX940-NEXT: ; use s[8:9] 8398; GFX940-NEXT: ;;#ASMEND 8399; GFX940-NEXT: s_setpc_b64 s[30:31] 8400 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8401 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8402 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 poison> 8403 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8404 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8405 ret void 8406} 8407 8408define void @s_shuffle_v3i16_v4i16__7_4_u() { 8409; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_u: 8410; GFX900: ; %bb.0: 8411; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8412; GFX900-NEXT: ;;#ASMSTART 8413; GFX900-NEXT: ; def s[4:5] 8414; GFX900-NEXT: ;;#ASMEND 8415; GFX900-NEXT: s_lshr_b32 s5, s5, 16 8416; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8417; GFX900-NEXT: ;;#ASMSTART 8418; GFX900-NEXT: ; use s[8:9] 8419; GFX900-NEXT: ;;#ASMEND 8420; GFX900-NEXT: s_setpc_b64 s[30:31] 8421; 8422; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_u: 8423; GFX90A: ; %bb.0: 8424; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8425; GFX90A-NEXT: ;;#ASMSTART 8426; GFX90A-NEXT: ; def s[4:5] 8427; GFX90A-NEXT: ;;#ASMEND 8428; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 8429; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8430; GFX90A-NEXT: ;;#ASMSTART 8431; GFX90A-NEXT: ; use s[8:9] 8432; GFX90A-NEXT: ;;#ASMEND 8433; GFX90A-NEXT: s_setpc_b64 s[30:31] 8434; 8435; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_u: 8436; GFX940: ; %bb.0: 8437; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8438; GFX940-NEXT: ;;#ASMSTART 8439; GFX940-NEXT: ; def s[0:1] 8440; GFX940-NEXT: ;;#ASMEND 8441; GFX940-NEXT: s_lshr_b32 s1, s1, 16 8442; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 8443; GFX940-NEXT: ;;#ASMSTART 8444; GFX940-NEXT: ; use s[8:9] 8445; GFX940-NEXT: ;;#ASMEND 8446; GFX940-NEXT: s_setpc_b64 s[30:31] 8447 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8448 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8449 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 poison> 8450 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8451 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8452 ret void 8453} 8454 8455define void @s_shuffle_v3i16_v4i16__7_5_u() { 8456; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_u: 8457; GFX900: ; %bb.0: 8458; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8459; GFX900-NEXT: ;;#ASMSTART 8460; GFX900-NEXT: ; def s[4:5] 8461; GFX900-NEXT: ;;#ASMEND 8462; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 8463; GFX900-NEXT: ;;#ASMSTART 8464; GFX900-NEXT: ; use s[8:9] 8465; GFX900-NEXT: ;;#ASMEND 8466; GFX900-NEXT: s_setpc_b64 s[30:31] 8467; 8468; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_u: 8469; GFX90A: ; %bb.0: 8470; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8471; GFX90A-NEXT: ;;#ASMSTART 8472; GFX90A-NEXT: ; def s[4:5] 8473; GFX90A-NEXT: ;;#ASMEND 8474; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 8475; GFX90A-NEXT: ;;#ASMSTART 8476; GFX90A-NEXT: ; use s[8:9] 8477; GFX90A-NEXT: ;;#ASMEND 8478; GFX90A-NEXT: s_setpc_b64 s[30:31] 8479; 8480; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_u: 8481; GFX940: ; %bb.0: 8482; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8483; GFX940-NEXT: ;;#ASMSTART 8484; GFX940-NEXT: ; def s[0:1] 8485; GFX940-NEXT: ;;#ASMEND 8486; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 8487; GFX940-NEXT: ;;#ASMSTART 8488; GFX940-NEXT: ; use s[8:9] 8489; GFX940-NEXT: ;;#ASMEND 8490; GFX940-NEXT: s_setpc_b64 s[30:31] 8491 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8492 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8493 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 poison> 8494 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8495 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8496 ret void 8497} 8498 8499define void @s_shuffle_v3i16_v4i16__7_6_u() { 8500; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_u: 8501; GFX900: ; %bb.0: 8502; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8503; GFX900-NEXT: ;;#ASMSTART 8504; GFX900-NEXT: ; def s[4:5] 8505; GFX900-NEXT: ;;#ASMEND 8506; GFX900-NEXT: s_lshr_b32 s4, s5, 16 8507; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 8508; GFX900-NEXT: ;;#ASMSTART 8509; GFX900-NEXT: ; use s[8:9] 8510; GFX900-NEXT: ;;#ASMEND 8511; GFX900-NEXT: s_setpc_b64 s[30:31] 8512; 8513; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_u: 8514; GFX90A: ; %bb.0: 8515; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8516; GFX90A-NEXT: ;;#ASMSTART 8517; GFX90A-NEXT: ; def s[4:5] 8518; GFX90A-NEXT: ;;#ASMEND 8519; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 8520; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 8521; GFX90A-NEXT: ;;#ASMSTART 8522; GFX90A-NEXT: ; use s[8:9] 8523; GFX90A-NEXT: ;;#ASMEND 8524; GFX90A-NEXT: s_setpc_b64 s[30:31] 8525; 8526; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_u: 8527; GFX940: ; %bb.0: 8528; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8529; GFX940-NEXT: ;;#ASMSTART 8530; GFX940-NEXT: ; def s[0:1] 8531; GFX940-NEXT: ;;#ASMEND 8532; GFX940-NEXT: s_lshr_b32 s0, s1, 16 8533; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 8534; GFX940-NEXT: ;;#ASMSTART 8535; GFX940-NEXT: ; use s[8:9] 8536; GFX940-NEXT: ;;#ASMEND 8537; GFX940-NEXT: s_setpc_b64 s[30:31] 8538 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8539 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8540 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 poison> 8541 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8542 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8543 ret void 8544} 8545 8546define void @s_shuffle_v3i16_v4i16__7_7_u() { 8547; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_u: 8548; GFX900: ; %bb.0: 8549; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8550; GFX900-NEXT: ;;#ASMSTART 8551; GFX900-NEXT: ; def s[4:5] 8552; GFX900-NEXT: ;;#ASMEND 8553; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 8554; GFX900-NEXT: ;;#ASMSTART 8555; GFX900-NEXT: ; use s[8:9] 8556; GFX900-NEXT: ;;#ASMEND 8557; GFX900-NEXT: s_setpc_b64 s[30:31] 8558; 8559; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_u: 8560; GFX90A: ; %bb.0: 8561; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8562; GFX90A-NEXT: ;;#ASMSTART 8563; GFX90A-NEXT: ; def s[4:5] 8564; GFX90A-NEXT: ;;#ASMEND 8565; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 8566; GFX90A-NEXT: ;;#ASMSTART 8567; GFX90A-NEXT: ; use s[8:9] 8568; GFX90A-NEXT: ;;#ASMEND 8569; GFX90A-NEXT: s_setpc_b64 s[30:31] 8570; 8571; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_u: 8572; GFX940: ; %bb.0: 8573; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8574; GFX940-NEXT: ;;#ASMSTART 8575; GFX940-NEXT: ; def s[0:1] 8576; GFX940-NEXT: ;;#ASMEND 8577; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 8578; GFX940-NEXT: ;;#ASMSTART 8579; GFX940-NEXT: ; use s[8:9] 8580; GFX940-NEXT: ;;#ASMEND 8581; GFX940-NEXT: s_setpc_b64 s[30:31] 8582 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8583 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8584 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 poison> 8585 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8586 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8587 ret void 8588} 8589 8590define void @s_shuffle_v3i16_v4i16__7_7_0() { 8591; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_0: 8592; GFX900: ; %bb.0: 8593; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8594; GFX900-NEXT: ;;#ASMSTART 8595; GFX900-NEXT: ; def s[4:5] 8596; GFX900-NEXT: ;;#ASMEND 8597; GFX900-NEXT: ;;#ASMSTART 8598; GFX900-NEXT: ; def s[6:7] 8599; GFX900-NEXT: ;;#ASMEND 8600; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 8601; GFX900-NEXT: s_mov_b32 s9, s4 8602; GFX900-NEXT: ;;#ASMSTART 8603; GFX900-NEXT: ; use s[8:9] 8604; GFX900-NEXT: ;;#ASMEND 8605; GFX900-NEXT: s_setpc_b64 s[30:31] 8606; 8607; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_0: 8608; GFX90A: ; %bb.0: 8609; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8610; GFX90A-NEXT: ;;#ASMSTART 8611; GFX90A-NEXT: ; def s[4:5] 8612; GFX90A-NEXT: ;;#ASMEND 8613; GFX90A-NEXT: ;;#ASMSTART 8614; GFX90A-NEXT: ; def s[6:7] 8615; GFX90A-NEXT: ;;#ASMEND 8616; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 8617; GFX90A-NEXT: s_mov_b32 s9, s4 8618; GFX90A-NEXT: ;;#ASMSTART 8619; GFX90A-NEXT: ; use s[8:9] 8620; GFX90A-NEXT: ;;#ASMEND 8621; GFX90A-NEXT: s_setpc_b64 s[30:31] 8622; 8623; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_0: 8624; GFX940: ; %bb.0: 8625; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8626; GFX940-NEXT: ;;#ASMSTART 8627; GFX940-NEXT: ; def s[0:1] 8628; GFX940-NEXT: ;;#ASMEND 8629; GFX940-NEXT: ;;#ASMSTART 8630; GFX940-NEXT: ; def s[2:3] 8631; GFX940-NEXT: ;;#ASMEND 8632; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 8633; GFX940-NEXT: s_mov_b32 s9, s0 8634; GFX940-NEXT: ;;#ASMSTART 8635; GFX940-NEXT: ; use s[8:9] 8636; GFX940-NEXT: ;;#ASMEND 8637; GFX940-NEXT: s_setpc_b64 s[30:31] 8638 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8639 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8640 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 0> 8641 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8642 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8643 ret void 8644} 8645 8646define void @s_shuffle_v3i16_v4i16__7_7_1() { 8647; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_1: 8648; GFX900: ; %bb.0: 8649; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8650; GFX900-NEXT: ;;#ASMSTART 8651; GFX900-NEXT: ; def s[4:5] 8652; GFX900-NEXT: ;;#ASMEND 8653; GFX900-NEXT: ;;#ASMSTART 8654; GFX900-NEXT: ; def s[6:7] 8655; GFX900-NEXT: ;;#ASMEND 8656; GFX900-NEXT: s_lshr_b32 s9, s4, 16 8657; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 8658; GFX900-NEXT: ;;#ASMSTART 8659; GFX900-NEXT: ; use s[8:9] 8660; GFX900-NEXT: ;;#ASMEND 8661; GFX900-NEXT: s_setpc_b64 s[30:31] 8662; 8663; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_1: 8664; GFX90A: ; %bb.0: 8665; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8666; GFX90A-NEXT: ;;#ASMSTART 8667; GFX90A-NEXT: ; def s[4:5] 8668; GFX90A-NEXT: ;;#ASMEND 8669; GFX90A-NEXT: ;;#ASMSTART 8670; GFX90A-NEXT: ; def s[6:7] 8671; GFX90A-NEXT: ;;#ASMEND 8672; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 8673; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 8674; GFX90A-NEXT: ;;#ASMSTART 8675; GFX90A-NEXT: ; use s[8:9] 8676; GFX90A-NEXT: ;;#ASMEND 8677; GFX90A-NEXT: s_setpc_b64 s[30:31] 8678; 8679; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_1: 8680; GFX940: ; %bb.0: 8681; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8682; GFX940-NEXT: ;;#ASMSTART 8683; GFX940-NEXT: ; def s[0:1] 8684; GFX940-NEXT: ;;#ASMEND 8685; GFX940-NEXT: ;;#ASMSTART 8686; GFX940-NEXT: ; def s[2:3] 8687; GFX940-NEXT: ;;#ASMEND 8688; GFX940-NEXT: s_lshr_b32 s9, s0, 16 8689; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 8690; GFX940-NEXT: ;;#ASMSTART 8691; GFX940-NEXT: ; use s[8:9] 8692; GFX940-NEXT: ;;#ASMEND 8693; GFX940-NEXT: s_setpc_b64 s[30:31] 8694 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8695 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8696 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 1> 8697 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8698 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8699 ret void 8700} 8701 8702define void @s_shuffle_v3i16_v4i16__7_7_2() { 8703; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_2: 8704; GFX900: ; %bb.0: 8705; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8706; GFX900-NEXT: ;;#ASMSTART 8707; GFX900-NEXT: ; def s[8:9] 8708; GFX900-NEXT: ;;#ASMEND 8709; GFX900-NEXT: ;;#ASMSTART 8710; GFX900-NEXT: ; def s[4:5] 8711; GFX900-NEXT: ;;#ASMEND 8712; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 8713; GFX900-NEXT: ;;#ASMSTART 8714; GFX900-NEXT: ; use s[8:9] 8715; GFX900-NEXT: ;;#ASMEND 8716; GFX900-NEXT: s_setpc_b64 s[30:31] 8717; 8718; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_2: 8719; GFX90A: ; %bb.0: 8720; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8721; GFX90A-NEXT: ;;#ASMSTART 8722; GFX90A-NEXT: ; def s[8:9] 8723; GFX90A-NEXT: ;;#ASMEND 8724; GFX90A-NEXT: ;;#ASMSTART 8725; GFX90A-NEXT: ; def s[4:5] 8726; GFX90A-NEXT: ;;#ASMEND 8727; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 8728; GFX90A-NEXT: ;;#ASMSTART 8729; GFX90A-NEXT: ; use s[8:9] 8730; GFX90A-NEXT: ;;#ASMEND 8731; GFX90A-NEXT: s_setpc_b64 s[30:31] 8732; 8733; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_2: 8734; GFX940: ; %bb.0: 8735; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8736; GFX940-NEXT: ;;#ASMSTART 8737; GFX940-NEXT: ; def s[8:9] 8738; GFX940-NEXT: ;;#ASMEND 8739; GFX940-NEXT: ;;#ASMSTART 8740; GFX940-NEXT: ; def s[0:1] 8741; GFX940-NEXT: ;;#ASMEND 8742; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 8743; GFX940-NEXT: ;;#ASMSTART 8744; GFX940-NEXT: ; use s[8:9] 8745; GFX940-NEXT: ;;#ASMEND 8746; GFX940-NEXT: s_setpc_b64 s[30:31] 8747 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8748 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8749 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 2> 8750 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8751 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8752 ret void 8753} 8754 8755define void @s_shuffle_v3i16_v4i16__7_7_3() { 8756; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_3: 8757; GFX900: ; %bb.0: 8758; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8759; GFX900-NEXT: ;;#ASMSTART 8760; GFX900-NEXT: ; def s[4:5] 8761; GFX900-NEXT: ;;#ASMEND 8762; GFX900-NEXT: ;;#ASMSTART 8763; GFX900-NEXT: ; def s[6:7] 8764; GFX900-NEXT: ;;#ASMEND 8765; GFX900-NEXT: s_lshr_b32 s9, s5, 16 8766; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 8767; GFX900-NEXT: ;;#ASMSTART 8768; GFX900-NEXT: ; use s[8:9] 8769; GFX900-NEXT: ;;#ASMEND 8770; GFX900-NEXT: s_setpc_b64 s[30:31] 8771; 8772; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_3: 8773; GFX90A: ; %bb.0: 8774; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8775; GFX90A-NEXT: ;;#ASMSTART 8776; GFX90A-NEXT: ; def s[4:5] 8777; GFX90A-NEXT: ;;#ASMEND 8778; GFX90A-NEXT: ;;#ASMSTART 8779; GFX90A-NEXT: ; def s[6:7] 8780; GFX90A-NEXT: ;;#ASMEND 8781; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 8782; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 8783; GFX90A-NEXT: ;;#ASMSTART 8784; GFX90A-NEXT: ; use s[8:9] 8785; GFX90A-NEXT: ;;#ASMEND 8786; GFX90A-NEXT: s_setpc_b64 s[30:31] 8787; 8788; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_3: 8789; GFX940: ; %bb.0: 8790; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8791; GFX940-NEXT: ;;#ASMSTART 8792; GFX940-NEXT: ; def s[0:1] 8793; GFX940-NEXT: ;;#ASMEND 8794; GFX940-NEXT: ;;#ASMSTART 8795; GFX940-NEXT: ; def s[2:3] 8796; GFX940-NEXT: ;;#ASMEND 8797; GFX940-NEXT: s_lshr_b32 s9, s1, 16 8798; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 8799; GFX940-NEXT: ;;#ASMSTART 8800; GFX940-NEXT: ; use s[8:9] 8801; GFX940-NEXT: ;;#ASMEND 8802; GFX940-NEXT: s_setpc_b64 s[30:31] 8803 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8804 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8805 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 3> 8806 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8807 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8808 ret void 8809} 8810 8811define void @s_shuffle_v3i16_v4i16__7_7_4() { 8812; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_4: 8813; GFX900: ; %bb.0: 8814; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8815; GFX900-NEXT: ;;#ASMSTART 8816; GFX900-NEXT: ; def s[4:5] 8817; GFX900-NEXT: ;;#ASMEND 8818; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 8819; GFX900-NEXT: s_mov_b32 s9, s4 8820; GFX900-NEXT: ;;#ASMSTART 8821; GFX900-NEXT: ; use s[8:9] 8822; GFX900-NEXT: ;;#ASMEND 8823; GFX900-NEXT: s_setpc_b64 s[30:31] 8824; 8825; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_4: 8826; GFX90A: ; %bb.0: 8827; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8828; GFX90A-NEXT: ;;#ASMSTART 8829; GFX90A-NEXT: ; def s[4:5] 8830; GFX90A-NEXT: ;;#ASMEND 8831; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 8832; GFX90A-NEXT: s_mov_b32 s9, s4 8833; GFX90A-NEXT: ;;#ASMSTART 8834; GFX90A-NEXT: ; use s[8:9] 8835; GFX90A-NEXT: ;;#ASMEND 8836; GFX90A-NEXT: s_setpc_b64 s[30:31] 8837; 8838; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_4: 8839; GFX940: ; %bb.0: 8840; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8841; GFX940-NEXT: ;;#ASMSTART 8842; GFX940-NEXT: ; def s[0:1] 8843; GFX940-NEXT: ;;#ASMEND 8844; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 8845; GFX940-NEXT: s_mov_b32 s9, s0 8846; GFX940-NEXT: ;;#ASMSTART 8847; GFX940-NEXT: ; use s[8:9] 8848; GFX940-NEXT: ;;#ASMEND 8849; GFX940-NEXT: s_setpc_b64 s[30:31] 8850 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8851 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8852 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 4> 8853 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8854 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8855 ret void 8856} 8857 8858define void @s_shuffle_v3i16_v4i16__7_7_5() { 8859; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_5: 8860; GFX900: ; %bb.0: 8861; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8862; GFX900-NEXT: ;;#ASMSTART 8863; GFX900-NEXT: ; def s[4:5] 8864; GFX900-NEXT: ;;#ASMEND 8865; GFX900-NEXT: s_lshr_b32 s9, s4, 16 8866; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 8867; GFX900-NEXT: ;;#ASMSTART 8868; GFX900-NEXT: ; use s[8:9] 8869; GFX900-NEXT: ;;#ASMEND 8870; GFX900-NEXT: s_setpc_b64 s[30:31] 8871; 8872; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_5: 8873; GFX90A: ; %bb.0: 8874; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8875; GFX90A-NEXT: ;;#ASMSTART 8876; GFX90A-NEXT: ; def s[4:5] 8877; GFX90A-NEXT: ;;#ASMEND 8878; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 8879; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 8880; GFX90A-NEXT: ;;#ASMSTART 8881; GFX90A-NEXT: ; use s[8:9] 8882; GFX90A-NEXT: ;;#ASMEND 8883; GFX90A-NEXT: s_setpc_b64 s[30:31] 8884; 8885; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_5: 8886; GFX940: ; %bb.0: 8887; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8888; GFX940-NEXT: ;;#ASMSTART 8889; GFX940-NEXT: ; def s[0:1] 8890; GFX940-NEXT: ;;#ASMEND 8891; GFX940-NEXT: s_lshr_b32 s9, s0, 16 8892; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 8893; GFX940-NEXT: ;;#ASMSTART 8894; GFX940-NEXT: ; use s[8:9] 8895; GFX940-NEXT: ;;#ASMEND 8896; GFX940-NEXT: s_setpc_b64 s[30:31] 8897 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8898 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8899 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 5> 8900 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8901 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8902 ret void 8903} 8904 8905define void @s_shuffle_v3i16_v4i16__7_7_6() { 8906; GFX9-LABEL: s_shuffle_v3i16_v4i16__7_7_6: 8907; GFX9: ; %bb.0: 8908; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8909; GFX9-NEXT: ;;#ASMSTART 8910; GFX9-NEXT: ; def s[8:9] 8911; GFX9-NEXT: ;;#ASMEND 8912; GFX9-NEXT: s_pack_hh_b32_b16 s8, s9, s9 8913; GFX9-NEXT: ;;#ASMSTART 8914; GFX9-NEXT: ; use s[8:9] 8915; GFX9-NEXT: ;;#ASMEND 8916; GFX9-NEXT: s_setpc_b64 s[30:31] 8917 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8918 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8919 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 6> 8920 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8921 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8922 ret void 8923} 8924 8925define void @s_shuffle_v3i16_v4i16__7_7_7() { 8926; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_7: 8927; GFX900: ; %bb.0: 8928; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8929; GFX900-NEXT: ;;#ASMSTART 8930; GFX900-NEXT: ; def s[4:5] 8931; GFX900-NEXT: ;;#ASMEND 8932; GFX900-NEXT: s_lshr_b32 s9, s5, 16 8933; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 8934; GFX900-NEXT: ;;#ASMSTART 8935; GFX900-NEXT: ; use s[8:9] 8936; GFX900-NEXT: ;;#ASMEND 8937; GFX900-NEXT: s_setpc_b64 s[30:31] 8938; 8939; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_7: 8940; GFX90A: ; %bb.0: 8941; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8942; GFX90A-NEXT: ;;#ASMSTART 8943; GFX90A-NEXT: ; def s[4:5] 8944; GFX90A-NEXT: ;;#ASMEND 8945; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 8946; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 8947; GFX90A-NEXT: ;;#ASMSTART 8948; GFX90A-NEXT: ; use s[8:9] 8949; GFX90A-NEXT: ;;#ASMEND 8950; GFX90A-NEXT: s_setpc_b64 s[30:31] 8951; 8952; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_7: 8953; GFX940: ; %bb.0: 8954; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8955; GFX940-NEXT: ;;#ASMSTART 8956; GFX940-NEXT: ; def s[0:1] 8957; GFX940-NEXT: ;;#ASMEND 8958; GFX940-NEXT: s_lshr_b32 s9, s1, 16 8959; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 8960; GFX940-NEXT: ;;#ASMSTART 8961; GFX940-NEXT: ; use s[8:9] 8962; GFX940-NEXT: ;;#ASMEND 8963; GFX940-NEXT: s_setpc_b64 s[30:31] 8964 %vec0 = call <4 x i16> asm "; def $0", "=s"() 8965 %vec1 = call <4 x i16> asm "; def $0", "=s"() 8966 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 7> 8967 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8968 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 8969 ret void 8970} 8971 8972define void @s_shuffle_v3i16_v4i16__u_0_0() { 8973; GFX900-LABEL: s_shuffle_v3i16_v4i16__u_0_0: 8974; GFX900: ; %bb.0: 8975; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8976; GFX900-NEXT: ;;#ASMSTART 8977; GFX900-NEXT: ; def s[4:5] 8978; GFX900-NEXT: ;;#ASMEND 8979; GFX900-NEXT: s_lshl_b32 s8, s4, 16 8980; GFX900-NEXT: s_mov_b32 s9, s4 8981; GFX900-NEXT: ;;#ASMSTART 8982; GFX900-NEXT: ; use s[8:9] 8983; GFX900-NEXT: ;;#ASMEND 8984; GFX900-NEXT: s_setpc_b64 s[30:31] 8985; 8986; GFX90A-LABEL: s_shuffle_v3i16_v4i16__u_0_0: 8987; GFX90A: ; %bb.0: 8988; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8989; GFX90A-NEXT: ;;#ASMSTART 8990; GFX90A-NEXT: ; def s[4:5] 8991; GFX90A-NEXT: ;;#ASMEND 8992; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 8993; GFX90A-NEXT: s_mov_b32 s9, s4 8994; GFX90A-NEXT: ;;#ASMSTART 8995; GFX90A-NEXT: ; use s[8:9] 8996; GFX90A-NEXT: ;;#ASMEND 8997; GFX90A-NEXT: s_setpc_b64 s[30:31] 8998; 8999; GFX940-LABEL: s_shuffle_v3i16_v4i16__u_0_0: 9000; GFX940: ; %bb.0: 9001; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9002; GFX940-NEXT: ;;#ASMSTART 9003; GFX940-NEXT: ; def s[0:1] 9004; GFX940-NEXT: ;;#ASMEND 9005; GFX940-NEXT: s_lshl_b32 s8, s0, 16 9006; GFX940-NEXT: s_mov_b32 s9, s0 9007; GFX940-NEXT: ;;#ASMSTART 9008; GFX940-NEXT: ; use s[8:9] 9009; GFX940-NEXT: ;;#ASMEND 9010; GFX940-NEXT: s_setpc_b64 s[30:31] 9011 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9012 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 0, i32 0> 9013 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9014 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9015 ret void 9016} 9017 9018define void @s_shuffle_v3i16_v4i16__0_0_0() { 9019; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_0_0: 9020; GFX900: ; %bb.0: 9021; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9022; GFX900-NEXT: ;;#ASMSTART 9023; GFX900-NEXT: ; def s[4:5] 9024; GFX900-NEXT: ;;#ASMEND 9025; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 9026; GFX900-NEXT: s_mov_b32 s9, s4 9027; GFX900-NEXT: ;;#ASMSTART 9028; GFX900-NEXT: ; use s[8:9] 9029; GFX900-NEXT: ;;#ASMEND 9030; GFX900-NEXT: s_setpc_b64 s[30:31] 9031; 9032; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_0_0: 9033; GFX90A: ; %bb.0: 9034; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9035; GFX90A-NEXT: ;;#ASMSTART 9036; GFX90A-NEXT: ; def s[4:5] 9037; GFX90A-NEXT: ;;#ASMEND 9038; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 9039; GFX90A-NEXT: s_mov_b32 s9, s4 9040; GFX90A-NEXT: ;;#ASMSTART 9041; GFX90A-NEXT: ; use s[8:9] 9042; GFX90A-NEXT: ;;#ASMEND 9043; GFX90A-NEXT: s_setpc_b64 s[30:31] 9044; 9045; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_0_0: 9046; GFX940: ; %bb.0: 9047; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9048; GFX940-NEXT: ;;#ASMSTART 9049; GFX940-NEXT: ; def s[0:1] 9050; GFX940-NEXT: ;;#ASMEND 9051; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 9052; GFX940-NEXT: s_mov_b32 s9, s0 9053; GFX940-NEXT: ;;#ASMSTART 9054; GFX940-NEXT: ; use s[8:9] 9055; GFX940-NEXT: ;;#ASMEND 9056; GFX940-NEXT: s_setpc_b64 s[30:31] 9057 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9058 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> zeroinitializer 9059 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9060 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9061 ret void 9062} 9063 9064define void @s_shuffle_v3i16_v4i16__1_0_0() { 9065; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_0_0: 9066; GFX900: ; %bb.0: 9067; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9068; GFX900-NEXT: ;;#ASMSTART 9069; GFX900-NEXT: ; def s[4:5] 9070; GFX900-NEXT: ;;#ASMEND 9071; GFX900-NEXT: s_lshr_b32 s5, s4, 16 9072; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9073; GFX900-NEXT: s_mov_b32 s9, s4 9074; GFX900-NEXT: ;;#ASMSTART 9075; GFX900-NEXT: ; use s[8:9] 9076; GFX900-NEXT: ;;#ASMEND 9077; GFX900-NEXT: s_setpc_b64 s[30:31] 9078; 9079; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_0_0: 9080; GFX90A: ; %bb.0: 9081; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9082; GFX90A-NEXT: ;;#ASMSTART 9083; GFX90A-NEXT: ; def s[4:5] 9084; GFX90A-NEXT: ;;#ASMEND 9085; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 9086; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9087; GFX90A-NEXT: s_mov_b32 s9, s4 9088; GFX90A-NEXT: ;;#ASMSTART 9089; GFX90A-NEXT: ; use s[8:9] 9090; GFX90A-NEXT: ;;#ASMEND 9091; GFX90A-NEXT: s_setpc_b64 s[30:31] 9092; 9093; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_0_0: 9094; GFX940: ; %bb.0: 9095; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9096; GFX940-NEXT: ;;#ASMSTART 9097; GFX940-NEXT: ; def s[0:1] 9098; GFX940-NEXT: ;;#ASMEND 9099; GFX940-NEXT: s_lshr_b32 s1, s0, 16 9100; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 9101; GFX940-NEXT: s_mov_b32 s9, s0 9102; GFX940-NEXT: ;;#ASMSTART 9103; GFX940-NEXT: ; use s[8:9] 9104; GFX940-NEXT: ;;#ASMEND 9105; GFX940-NEXT: s_setpc_b64 s[30:31] 9106 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9107 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 0, i32 0> 9108 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9109 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9110 ret void 9111} 9112 9113define void @s_shuffle_v3i16_v4i16__2_0_0() { 9114; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_0_0: 9115; GFX900: ; %bb.0: 9116; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9117; GFX900-NEXT: ;;#ASMSTART 9118; GFX900-NEXT: ; def s[4:5] 9119; GFX900-NEXT: ;;#ASMEND 9120; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9121; GFX900-NEXT: s_mov_b32 s9, s4 9122; GFX900-NEXT: ;;#ASMSTART 9123; GFX900-NEXT: ; use s[8:9] 9124; GFX900-NEXT: ;;#ASMEND 9125; GFX900-NEXT: s_setpc_b64 s[30:31] 9126; 9127; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_0_0: 9128; GFX90A: ; %bb.0: 9129; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9130; GFX90A-NEXT: ;;#ASMSTART 9131; GFX90A-NEXT: ; def s[4:5] 9132; GFX90A-NEXT: ;;#ASMEND 9133; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9134; GFX90A-NEXT: s_mov_b32 s9, s4 9135; GFX90A-NEXT: ;;#ASMSTART 9136; GFX90A-NEXT: ; use s[8:9] 9137; GFX90A-NEXT: ;;#ASMEND 9138; GFX90A-NEXT: s_setpc_b64 s[30:31] 9139; 9140; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_0_0: 9141; GFX940: ; %bb.0: 9142; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9143; GFX940-NEXT: ;;#ASMSTART 9144; GFX940-NEXT: ; def s[0:1] 9145; GFX940-NEXT: ;;#ASMEND 9146; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 9147; GFX940-NEXT: s_mov_b32 s9, s0 9148; GFX940-NEXT: ;;#ASMSTART 9149; GFX940-NEXT: ; use s[8:9] 9150; GFX940-NEXT: ;;#ASMEND 9151; GFX940-NEXT: s_setpc_b64 s[30:31] 9152 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9153 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 0, i32 0> 9154 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9155 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9156 ret void 9157} 9158 9159define void @s_shuffle_v3i16_v4i16__3_0_0() { 9160; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_0_0: 9161; GFX900: ; %bb.0: 9162; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9163; GFX900-NEXT: ;;#ASMSTART 9164; GFX900-NEXT: ; def s[4:5] 9165; GFX900-NEXT: ;;#ASMEND 9166; GFX900-NEXT: s_lshr_b32 s5, s5, 16 9167; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9168; GFX900-NEXT: s_mov_b32 s9, s4 9169; GFX900-NEXT: ;;#ASMSTART 9170; GFX900-NEXT: ; use s[8:9] 9171; GFX900-NEXT: ;;#ASMEND 9172; GFX900-NEXT: s_setpc_b64 s[30:31] 9173; 9174; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_0_0: 9175; GFX90A: ; %bb.0: 9176; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9177; GFX90A-NEXT: ;;#ASMSTART 9178; GFX90A-NEXT: ; def s[4:5] 9179; GFX90A-NEXT: ;;#ASMEND 9180; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 9181; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9182; GFX90A-NEXT: s_mov_b32 s9, s4 9183; GFX90A-NEXT: ;;#ASMSTART 9184; GFX90A-NEXT: ; use s[8:9] 9185; GFX90A-NEXT: ;;#ASMEND 9186; GFX90A-NEXT: s_setpc_b64 s[30:31] 9187; 9188; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_0_0: 9189; GFX940: ; %bb.0: 9190; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9191; GFX940-NEXT: ;;#ASMSTART 9192; GFX940-NEXT: ; def s[0:1] 9193; GFX940-NEXT: ;;#ASMEND 9194; GFX940-NEXT: s_lshr_b32 s1, s1, 16 9195; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 9196; GFX940-NEXT: s_mov_b32 s9, s0 9197; GFX940-NEXT: ;;#ASMSTART 9198; GFX940-NEXT: ; use s[8:9] 9199; GFX940-NEXT: ;;#ASMEND 9200; GFX940-NEXT: s_setpc_b64 s[30:31] 9201 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9202 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 0, i32 0> 9203 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9204 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9205 ret void 9206} 9207 9208define void @s_shuffle_v3i16_v4i16__4_0_0() { 9209; GFX900-LABEL: s_shuffle_v3i16_v4i16__4_0_0: 9210; GFX900: ; %bb.0: 9211; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9212; GFX900-NEXT: ;;#ASMSTART 9213; GFX900-NEXT: ; def s[4:5] 9214; GFX900-NEXT: ;;#ASMEND 9215; GFX900-NEXT: s_lshl_b32 s8, s4, 16 9216; GFX900-NEXT: s_mov_b32 s9, s4 9217; GFX900-NEXT: ;;#ASMSTART 9218; GFX900-NEXT: ; use s[8:9] 9219; GFX900-NEXT: ;;#ASMEND 9220; GFX900-NEXT: s_setpc_b64 s[30:31] 9221; 9222; GFX90A-LABEL: s_shuffle_v3i16_v4i16__4_0_0: 9223; GFX90A: ; %bb.0: 9224; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9225; GFX90A-NEXT: ;;#ASMSTART 9226; GFX90A-NEXT: ; def s[4:5] 9227; GFX90A-NEXT: ;;#ASMEND 9228; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 9229; GFX90A-NEXT: s_mov_b32 s9, s4 9230; GFX90A-NEXT: ;;#ASMSTART 9231; GFX90A-NEXT: ; use s[8:9] 9232; GFX90A-NEXT: ;;#ASMEND 9233; GFX90A-NEXT: s_setpc_b64 s[30:31] 9234; 9235; GFX940-LABEL: s_shuffle_v3i16_v4i16__4_0_0: 9236; GFX940: ; %bb.0: 9237; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9238; GFX940-NEXT: ;;#ASMSTART 9239; GFX940-NEXT: ; def s[0:1] 9240; GFX940-NEXT: ;;#ASMEND 9241; GFX940-NEXT: s_lshl_b32 s8, s0, 16 9242; GFX940-NEXT: s_mov_b32 s9, s0 9243; GFX940-NEXT: ;;#ASMSTART 9244; GFX940-NEXT: ; use s[8:9] 9245; GFX940-NEXT: ;;#ASMEND 9246; GFX940-NEXT: s_setpc_b64 s[30:31] 9247 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9248 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 0, i32 0> 9249 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9250 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9251 ret void 9252} 9253 9254define void @s_shuffle_v3i16_v4i16__5_0_0() { 9255; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_0_0: 9256; GFX900: ; %bb.0: 9257; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9258; GFX900-NEXT: ;;#ASMSTART 9259; GFX900-NEXT: ; def s[4:5] 9260; GFX900-NEXT: ;;#ASMEND 9261; GFX900-NEXT: ;;#ASMSTART 9262; GFX900-NEXT: ; def s[6:7] 9263; GFX900-NEXT: ;;#ASMEND 9264; GFX900-NEXT: s_lshr_b32 s5, s6, 16 9265; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9266; GFX900-NEXT: s_mov_b32 s9, s4 9267; GFX900-NEXT: ;;#ASMSTART 9268; GFX900-NEXT: ; use s[8:9] 9269; GFX900-NEXT: ;;#ASMEND 9270; GFX900-NEXT: s_setpc_b64 s[30:31] 9271; 9272; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_0_0: 9273; GFX90A: ; %bb.0: 9274; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9275; GFX90A-NEXT: ;;#ASMSTART 9276; GFX90A-NEXT: ; def s[4:5] 9277; GFX90A-NEXT: ;;#ASMEND 9278; GFX90A-NEXT: ;;#ASMSTART 9279; GFX90A-NEXT: ; def s[6:7] 9280; GFX90A-NEXT: ;;#ASMEND 9281; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 9282; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9283; GFX90A-NEXT: s_mov_b32 s9, s4 9284; GFX90A-NEXT: ;;#ASMSTART 9285; GFX90A-NEXT: ; use s[8:9] 9286; GFX90A-NEXT: ;;#ASMEND 9287; GFX90A-NEXT: s_setpc_b64 s[30:31] 9288; 9289; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_0_0: 9290; GFX940: ; %bb.0: 9291; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9292; GFX940-NEXT: ;;#ASMSTART 9293; GFX940-NEXT: ; def s[0:1] 9294; GFX940-NEXT: ;;#ASMEND 9295; GFX940-NEXT: ;;#ASMSTART 9296; GFX940-NEXT: ; def s[2:3] 9297; GFX940-NEXT: ;;#ASMEND 9298; GFX940-NEXT: s_lshr_b32 s1, s2, 16 9299; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 9300; GFX940-NEXT: s_mov_b32 s9, s0 9301; GFX940-NEXT: ;;#ASMSTART 9302; GFX940-NEXT: ; use s[8:9] 9303; GFX940-NEXT: ;;#ASMEND 9304; GFX940-NEXT: s_setpc_b64 s[30:31] 9305 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9306 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9307 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 0, i32 0> 9308 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9309 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9310 ret void 9311} 9312 9313define void @s_shuffle_v3i16_v4i16__6_0_0() { 9314; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_0_0: 9315; GFX900: ; %bb.0: 9316; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9317; GFX900-NEXT: ;;#ASMSTART 9318; GFX900-NEXT: ; def s[4:5] 9319; GFX900-NEXT: ;;#ASMEND 9320; GFX900-NEXT: ;;#ASMSTART 9321; GFX900-NEXT: ; def s[6:7] 9322; GFX900-NEXT: ;;#ASMEND 9323; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 9324; GFX900-NEXT: s_mov_b32 s9, s4 9325; GFX900-NEXT: ;;#ASMSTART 9326; GFX900-NEXT: ; use s[8:9] 9327; GFX900-NEXT: ;;#ASMEND 9328; GFX900-NEXT: s_setpc_b64 s[30:31] 9329; 9330; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_0_0: 9331; GFX90A: ; %bb.0: 9332; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9333; GFX90A-NEXT: ;;#ASMSTART 9334; GFX90A-NEXT: ; def s[4:5] 9335; GFX90A-NEXT: ;;#ASMEND 9336; GFX90A-NEXT: ;;#ASMSTART 9337; GFX90A-NEXT: ; def s[6:7] 9338; GFX90A-NEXT: ;;#ASMEND 9339; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 9340; GFX90A-NEXT: s_mov_b32 s9, s4 9341; GFX90A-NEXT: ;;#ASMSTART 9342; GFX90A-NEXT: ; use s[8:9] 9343; GFX90A-NEXT: ;;#ASMEND 9344; GFX90A-NEXT: s_setpc_b64 s[30:31] 9345; 9346; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_0_0: 9347; GFX940: ; %bb.0: 9348; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9349; GFX940-NEXT: ;;#ASMSTART 9350; GFX940-NEXT: ; def s[0:1] 9351; GFX940-NEXT: ;;#ASMEND 9352; GFX940-NEXT: ;;#ASMSTART 9353; GFX940-NEXT: ; def s[2:3] 9354; GFX940-NEXT: ;;#ASMEND 9355; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 9356; GFX940-NEXT: s_mov_b32 s9, s0 9357; GFX940-NEXT: ;;#ASMSTART 9358; GFX940-NEXT: ; use s[8:9] 9359; GFX940-NEXT: ;;#ASMEND 9360; GFX940-NEXT: s_setpc_b64 s[30:31] 9361 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9362 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9363 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 0, i32 0> 9364 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9365 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9366 ret void 9367} 9368 9369define void @s_shuffle_v3i16_v4i16__7_0_0() { 9370; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_0: 9371; GFX900: ; %bb.0: 9372; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9373; GFX900-NEXT: ;;#ASMSTART 9374; GFX900-NEXT: ; def s[4:5] 9375; GFX900-NEXT: ;;#ASMEND 9376; GFX900-NEXT: ;;#ASMSTART 9377; GFX900-NEXT: ; def s[6:7] 9378; GFX900-NEXT: ;;#ASMEND 9379; GFX900-NEXT: s_lshr_b32 s5, s7, 16 9380; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9381; GFX900-NEXT: s_mov_b32 s9, s4 9382; GFX900-NEXT: ;;#ASMSTART 9383; GFX900-NEXT: ; use s[8:9] 9384; GFX900-NEXT: ;;#ASMEND 9385; GFX900-NEXT: s_setpc_b64 s[30:31] 9386; 9387; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_0: 9388; GFX90A: ; %bb.0: 9389; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9390; GFX90A-NEXT: ;;#ASMSTART 9391; GFX90A-NEXT: ; def s[4:5] 9392; GFX90A-NEXT: ;;#ASMEND 9393; GFX90A-NEXT: ;;#ASMSTART 9394; GFX90A-NEXT: ; def s[6:7] 9395; GFX90A-NEXT: ;;#ASMEND 9396; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 9397; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9398; GFX90A-NEXT: s_mov_b32 s9, s4 9399; GFX90A-NEXT: ;;#ASMSTART 9400; GFX90A-NEXT: ; use s[8:9] 9401; GFX90A-NEXT: ;;#ASMEND 9402; GFX90A-NEXT: s_setpc_b64 s[30:31] 9403; 9404; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_0: 9405; GFX940: ; %bb.0: 9406; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9407; GFX940-NEXT: ;;#ASMSTART 9408; GFX940-NEXT: ; def s[0:1] 9409; GFX940-NEXT: ;;#ASMEND 9410; GFX940-NEXT: ;;#ASMSTART 9411; GFX940-NEXT: ; def s[2:3] 9412; GFX940-NEXT: ;;#ASMEND 9413; GFX940-NEXT: s_lshr_b32 s1, s3, 16 9414; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 9415; GFX940-NEXT: s_mov_b32 s9, s0 9416; GFX940-NEXT: ;;#ASMSTART 9417; GFX940-NEXT: ; use s[8:9] 9418; GFX940-NEXT: ;;#ASMEND 9419; GFX940-NEXT: s_setpc_b64 s[30:31] 9420 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9421 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9422 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 0> 9423 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9424 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9425 ret void 9426} 9427 9428define void @s_shuffle_v3i16_v4i16__7_u_0() { 9429; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_0: 9430; GFX900: ; %bb.0: 9431; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9432; GFX900-NEXT: ;;#ASMSTART 9433; GFX900-NEXT: ; def s[4:5] 9434; GFX900-NEXT: ;;#ASMEND 9435; GFX900-NEXT: ;;#ASMSTART 9436; GFX900-NEXT: ; def s[6:7] 9437; GFX900-NEXT: ;;#ASMEND 9438; GFX900-NEXT: s_lshr_b32 s8, s7, 16 9439; GFX900-NEXT: s_mov_b32 s9, s4 9440; GFX900-NEXT: ;;#ASMSTART 9441; GFX900-NEXT: ; use s[8:9] 9442; GFX900-NEXT: ;;#ASMEND 9443; GFX900-NEXT: s_setpc_b64 s[30:31] 9444; 9445; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_0: 9446; GFX90A: ; %bb.0: 9447; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9448; GFX90A-NEXT: ;;#ASMSTART 9449; GFX90A-NEXT: ; def s[4:5] 9450; GFX90A-NEXT: ;;#ASMEND 9451; GFX90A-NEXT: ;;#ASMSTART 9452; GFX90A-NEXT: ; def s[6:7] 9453; GFX90A-NEXT: ;;#ASMEND 9454; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 9455; GFX90A-NEXT: s_mov_b32 s9, s4 9456; GFX90A-NEXT: ;;#ASMSTART 9457; GFX90A-NEXT: ; use s[8:9] 9458; GFX90A-NEXT: ;;#ASMEND 9459; GFX90A-NEXT: s_setpc_b64 s[30:31] 9460; 9461; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_0: 9462; GFX940: ; %bb.0: 9463; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9464; GFX940-NEXT: ;;#ASMSTART 9465; GFX940-NEXT: ; def s[0:1] 9466; GFX940-NEXT: ;;#ASMEND 9467; GFX940-NEXT: ;;#ASMSTART 9468; GFX940-NEXT: ; def s[2:3] 9469; GFX940-NEXT: ;;#ASMEND 9470; GFX940-NEXT: s_lshr_b32 s8, s3, 16 9471; GFX940-NEXT: s_mov_b32 s9, s0 9472; GFX940-NEXT: ;;#ASMSTART 9473; GFX940-NEXT: ; use s[8:9] 9474; GFX940-NEXT: ;;#ASMEND 9475; GFX940-NEXT: s_setpc_b64 s[30:31] 9476 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9477 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9478 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 0> 9479 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9480 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9481 ret void 9482} 9483 9484define void @s_shuffle_v3i16_v4i16__7_1_0() { 9485; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_0: 9486; GFX900: ; %bb.0: 9487; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9488; GFX900-NEXT: ;;#ASMSTART 9489; GFX900-NEXT: ; def s[4:5] 9490; GFX900-NEXT: ;;#ASMEND 9491; GFX900-NEXT: ;;#ASMSTART 9492; GFX900-NEXT: ; def s[6:7] 9493; GFX900-NEXT: ;;#ASMEND 9494; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 9495; GFX900-NEXT: s_mov_b32 s9, s4 9496; GFX900-NEXT: ;;#ASMSTART 9497; GFX900-NEXT: ; use s[8:9] 9498; GFX900-NEXT: ;;#ASMEND 9499; GFX900-NEXT: s_setpc_b64 s[30:31] 9500; 9501; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_0: 9502; GFX90A: ; %bb.0: 9503; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9504; GFX90A-NEXT: ;;#ASMSTART 9505; GFX90A-NEXT: ; def s[4:5] 9506; GFX90A-NEXT: ;;#ASMEND 9507; GFX90A-NEXT: ;;#ASMSTART 9508; GFX90A-NEXT: ; def s[6:7] 9509; GFX90A-NEXT: ;;#ASMEND 9510; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 9511; GFX90A-NEXT: s_mov_b32 s9, s4 9512; GFX90A-NEXT: ;;#ASMSTART 9513; GFX90A-NEXT: ; use s[8:9] 9514; GFX90A-NEXT: ;;#ASMEND 9515; GFX90A-NEXT: s_setpc_b64 s[30:31] 9516; 9517; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_0: 9518; GFX940: ; %bb.0: 9519; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9520; GFX940-NEXT: ;;#ASMSTART 9521; GFX940-NEXT: ; def s[0:1] 9522; GFX940-NEXT: ;;#ASMEND 9523; GFX940-NEXT: ;;#ASMSTART 9524; GFX940-NEXT: ; def s[2:3] 9525; GFX940-NEXT: ;;#ASMEND 9526; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 9527; GFX940-NEXT: s_mov_b32 s9, s0 9528; GFX940-NEXT: ;;#ASMSTART 9529; GFX940-NEXT: ; use s[8:9] 9530; GFX940-NEXT: ;;#ASMEND 9531; GFX940-NEXT: s_setpc_b64 s[30:31] 9532 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9533 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9534 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 0> 9535 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9536 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9537 ret void 9538} 9539 9540define void @s_shuffle_v3i16_v4i16__7_2_0() { 9541; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_0: 9542; GFX900: ; %bb.0: 9543; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9544; GFX900-NEXT: ;;#ASMSTART 9545; GFX900-NEXT: ; def s[6:7] 9546; GFX900-NEXT: ;;#ASMEND 9547; GFX900-NEXT: s_lshr_b32 s6, s7, 16 9548; GFX900-NEXT: ;;#ASMSTART 9549; GFX900-NEXT: ; def s[4:5] 9550; GFX900-NEXT: ;;#ASMEND 9551; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 9552; GFX900-NEXT: s_mov_b32 s9, s4 9553; GFX900-NEXT: ;;#ASMSTART 9554; GFX900-NEXT: ; use s[8:9] 9555; GFX900-NEXT: ;;#ASMEND 9556; GFX900-NEXT: s_setpc_b64 s[30:31] 9557; 9558; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_0: 9559; GFX90A: ; %bb.0: 9560; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9561; GFX90A-NEXT: ;;#ASMSTART 9562; GFX90A-NEXT: ; def s[6:7] 9563; GFX90A-NEXT: ;;#ASMEND 9564; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 9565; GFX90A-NEXT: ;;#ASMSTART 9566; GFX90A-NEXT: ; def s[4:5] 9567; GFX90A-NEXT: ;;#ASMEND 9568; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 9569; GFX90A-NEXT: s_mov_b32 s9, s4 9570; GFX90A-NEXT: ;;#ASMSTART 9571; GFX90A-NEXT: ; use s[8:9] 9572; GFX90A-NEXT: ;;#ASMEND 9573; GFX90A-NEXT: s_setpc_b64 s[30:31] 9574; 9575; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_0: 9576; GFX940: ; %bb.0: 9577; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9578; GFX940-NEXT: ;;#ASMSTART 9579; GFX940-NEXT: ; def s[2:3] 9580; GFX940-NEXT: ;;#ASMEND 9581; GFX940-NEXT: s_lshr_b32 s2, s3, 16 9582; GFX940-NEXT: ;;#ASMSTART 9583; GFX940-NEXT: ; def s[0:1] 9584; GFX940-NEXT: ;;#ASMEND 9585; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 9586; GFX940-NEXT: s_mov_b32 s9, s0 9587; GFX940-NEXT: ;;#ASMSTART 9588; GFX940-NEXT: ; use s[8:9] 9589; GFX940-NEXT: ;;#ASMEND 9590; GFX940-NEXT: s_setpc_b64 s[30:31] 9591 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9592 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9593 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 0> 9594 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9595 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9596 ret void 9597} 9598 9599define void @s_shuffle_v3i16_v4i16__7_3_0() { 9600; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_0: 9601; GFX900: ; %bb.0: 9602; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9603; GFX900-NEXT: ;;#ASMSTART 9604; GFX900-NEXT: ; def s[4:5] 9605; GFX900-NEXT: ;;#ASMEND 9606; GFX900-NEXT: ;;#ASMSTART 9607; GFX900-NEXT: ; def s[6:7] 9608; GFX900-NEXT: ;;#ASMEND 9609; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 9610; GFX900-NEXT: s_mov_b32 s9, s4 9611; GFX900-NEXT: ;;#ASMSTART 9612; GFX900-NEXT: ; use s[8:9] 9613; GFX900-NEXT: ;;#ASMEND 9614; GFX900-NEXT: s_setpc_b64 s[30:31] 9615; 9616; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_0: 9617; GFX90A: ; %bb.0: 9618; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9619; GFX90A-NEXT: ;;#ASMSTART 9620; GFX90A-NEXT: ; def s[4:5] 9621; GFX90A-NEXT: ;;#ASMEND 9622; GFX90A-NEXT: ;;#ASMSTART 9623; GFX90A-NEXT: ; def s[6:7] 9624; GFX90A-NEXT: ;;#ASMEND 9625; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 9626; GFX90A-NEXT: s_mov_b32 s9, s4 9627; GFX90A-NEXT: ;;#ASMSTART 9628; GFX90A-NEXT: ; use s[8:9] 9629; GFX90A-NEXT: ;;#ASMEND 9630; GFX90A-NEXT: s_setpc_b64 s[30:31] 9631; 9632; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_0: 9633; GFX940: ; %bb.0: 9634; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9635; GFX940-NEXT: ;;#ASMSTART 9636; GFX940-NEXT: ; def s[0:1] 9637; GFX940-NEXT: ;;#ASMEND 9638; GFX940-NEXT: ;;#ASMSTART 9639; GFX940-NEXT: ; def s[2:3] 9640; GFX940-NEXT: ;;#ASMEND 9641; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 9642; GFX940-NEXT: s_mov_b32 s9, s0 9643; GFX940-NEXT: ;;#ASMSTART 9644; GFX940-NEXT: ; use s[8:9] 9645; GFX940-NEXT: ;;#ASMEND 9646; GFX940-NEXT: s_setpc_b64 s[30:31] 9647 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9648 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9649 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 0> 9650 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9651 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9652 ret void 9653} 9654 9655define void @s_shuffle_v3i16_v4i16__7_4_0() { 9656; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_0: 9657; GFX900: ; %bb.0: 9658; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9659; GFX900-NEXT: ;;#ASMSTART 9660; GFX900-NEXT: ; def s[4:5] 9661; GFX900-NEXT: ;;#ASMEND 9662; GFX900-NEXT: ;;#ASMSTART 9663; GFX900-NEXT: ; def s[6:7] 9664; GFX900-NEXT: ;;#ASMEND 9665; GFX900-NEXT: s_lshr_b32 s5, s7, 16 9666; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6 9667; GFX900-NEXT: s_mov_b32 s9, s4 9668; GFX900-NEXT: ;;#ASMSTART 9669; GFX900-NEXT: ; use s[8:9] 9670; GFX900-NEXT: ;;#ASMEND 9671; GFX900-NEXT: s_setpc_b64 s[30:31] 9672; 9673; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_0: 9674; GFX90A: ; %bb.0: 9675; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9676; GFX90A-NEXT: ;;#ASMSTART 9677; GFX90A-NEXT: ; def s[4:5] 9678; GFX90A-NEXT: ;;#ASMEND 9679; GFX90A-NEXT: ;;#ASMSTART 9680; GFX90A-NEXT: ; def s[6:7] 9681; GFX90A-NEXT: ;;#ASMEND 9682; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 9683; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6 9684; GFX90A-NEXT: s_mov_b32 s9, s4 9685; GFX90A-NEXT: ;;#ASMSTART 9686; GFX90A-NEXT: ; use s[8:9] 9687; GFX90A-NEXT: ;;#ASMEND 9688; GFX90A-NEXT: s_setpc_b64 s[30:31] 9689; 9690; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_0: 9691; GFX940: ; %bb.0: 9692; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9693; GFX940-NEXT: ;;#ASMSTART 9694; GFX940-NEXT: ; def s[0:1] 9695; GFX940-NEXT: ;;#ASMEND 9696; GFX940-NEXT: ;;#ASMSTART 9697; GFX940-NEXT: ; def s[2:3] 9698; GFX940-NEXT: ;;#ASMEND 9699; GFX940-NEXT: s_lshr_b32 s1, s3, 16 9700; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 9701; GFX940-NEXT: s_mov_b32 s9, s0 9702; GFX940-NEXT: ;;#ASMSTART 9703; GFX940-NEXT: ; use s[8:9] 9704; GFX940-NEXT: ;;#ASMEND 9705; GFX940-NEXT: s_setpc_b64 s[30:31] 9706 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9707 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9708 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 0> 9709 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9710 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9711 ret void 9712} 9713 9714define void @s_shuffle_v3i16_v4i16__7_5_0() { 9715; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_0: 9716; GFX900: ; %bb.0: 9717; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9718; GFX900-NEXT: ;;#ASMSTART 9719; GFX900-NEXT: ; def s[4:5] 9720; GFX900-NEXT: ;;#ASMEND 9721; GFX900-NEXT: ;;#ASMSTART 9722; GFX900-NEXT: ; def s[6:7] 9723; GFX900-NEXT: ;;#ASMEND 9724; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s6 9725; GFX900-NEXT: s_mov_b32 s9, s4 9726; GFX900-NEXT: ;;#ASMSTART 9727; GFX900-NEXT: ; use s[8:9] 9728; GFX900-NEXT: ;;#ASMEND 9729; GFX900-NEXT: s_setpc_b64 s[30:31] 9730; 9731; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_0: 9732; GFX90A: ; %bb.0: 9733; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9734; GFX90A-NEXT: ;;#ASMSTART 9735; GFX90A-NEXT: ; def s[4:5] 9736; GFX90A-NEXT: ;;#ASMEND 9737; GFX90A-NEXT: ;;#ASMSTART 9738; GFX90A-NEXT: ; def s[6:7] 9739; GFX90A-NEXT: ;;#ASMEND 9740; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s6 9741; GFX90A-NEXT: s_mov_b32 s9, s4 9742; GFX90A-NEXT: ;;#ASMSTART 9743; GFX90A-NEXT: ; use s[8:9] 9744; GFX90A-NEXT: ;;#ASMEND 9745; GFX90A-NEXT: s_setpc_b64 s[30:31] 9746; 9747; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_0: 9748; GFX940: ; %bb.0: 9749; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9750; GFX940-NEXT: ;;#ASMSTART 9751; GFX940-NEXT: ; def s[0:1] 9752; GFX940-NEXT: ;;#ASMEND 9753; GFX940-NEXT: ;;#ASMSTART 9754; GFX940-NEXT: ; def s[2:3] 9755; GFX940-NEXT: ;;#ASMEND 9756; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 9757; GFX940-NEXT: s_mov_b32 s9, s0 9758; GFX940-NEXT: ;;#ASMSTART 9759; GFX940-NEXT: ; use s[8:9] 9760; GFX940-NEXT: ;;#ASMEND 9761; GFX940-NEXT: s_setpc_b64 s[30:31] 9762 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9763 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9764 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 0> 9765 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9766 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9767 ret void 9768} 9769 9770define void @s_shuffle_v3i16_v4i16__7_6_0() { 9771; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_0: 9772; GFX900: ; %bb.0: 9773; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9774; GFX900-NEXT: ;;#ASMSTART 9775; GFX900-NEXT: ; def s[4:5] 9776; GFX900-NEXT: ;;#ASMEND 9777; GFX900-NEXT: ;;#ASMSTART 9778; GFX900-NEXT: ; def s[6:7] 9779; GFX900-NEXT: ;;#ASMEND 9780; GFX900-NEXT: s_lshr_b32 s5, s7, 16 9781; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 9782; GFX900-NEXT: s_mov_b32 s9, s4 9783; GFX900-NEXT: ;;#ASMSTART 9784; GFX900-NEXT: ; use s[8:9] 9785; GFX900-NEXT: ;;#ASMEND 9786; GFX900-NEXT: s_setpc_b64 s[30:31] 9787; 9788; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_0: 9789; GFX90A: ; %bb.0: 9790; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9791; GFX90A-NEXT: ;;#ASMSTART 9792; GFX90A-NEXT: ; def s[4:5] 9793; GFX90A-NEXT: ;;#ASMEND 9794; GFX90A-NEXT: ;;#ASMSTART 9795; GFX90A-NEXT: ; def s[6:7] 9796; GFX90A-NEXT: ;;#ASMEND 9797; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 9798; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 9799; GFX90A-NEXT: s_mov_b32 s9, s4 9800; GFX90A-NEXT: ;;#ASMSTART 9801; GFX90A-NEXT: ; use s[8:9] 9802; GFX90A-NEXT: ;;#ASMEND 9803; GFX90A-NEXT: s_setpc_b64 s[30:31] 9804; 9805; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_0: 9806; GFX940: ; %bb.0: 9807; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9808; GFX940-NEXT: ;;#ASMSTART 9809; GFX940-NEXT: ; def s[0:1] 9810; GFX940-NEXT: ;;#ASMEND 9811; GFX940-NEXT: ;;#ASMSTART 9812; GFX940-NEXT: ; def s[2:3] 9813; GFX940-NEXT: ;;#ASMEND 9814; GFX940-NEXT: s_lshr_b32 s1, s3, 16 9815; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 9816; GFX940-NEXT: s_mov_b32 s9, s0 9817; GFX940-NEXT: ;;#ASMSTART 9818; GFX940-NEXT: ; use s[8:9] 9819; GFX940-NEXT: ;;#ASMEND 9820; GFX940-NEXT: s_setpc_b64 s[30:31] 9821 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9822 %vec1 = call <4 x i16> asm "; def $0", "=s"() 9823 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 0> 9824 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9825 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9826 ret void 9827} 9828 9829define void @s_shuffle_v3i16_v4i16__u_1_1() { 9830; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_1_1: 9831; GFX9: ; %bb.0: 9832; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9833; GFX9-NEXT: ;;#ASMSTART 9834; GFX9-NEXT: ; def s[8:9] 9835; GFX9-NEXT: ;;#ASMEND 9836; GFX9-NEXT: s_lshr_b32 s9, s8, 16 9837; GFX9-NEXT: ;;#ASMSTART 9838; GFX9-NEXT: ; use s[8:9] 9839; GFX9-NEXT: ;;#ASMEND 9840; GFX9-NEXT: s_setpc_b64 s[30:31] 9841 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9842 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 1, i32 1> 9843 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9844 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9845 ret void 9846} 9847 9848define void @s_shuffle_v3i16_v4i16__0_1_1() { 9849; GFX9-LABEL: s_shuffle_v3i16_v4i16__0_1_1: 9850; GFX9: ; %bb.0: 9851; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9852; GFX9-NEXT: ;;#ASMSTART 9853; GFX9-NEXT: ; def s[8:9] 9854; GFX9-NEXT: ;;#ASMEND 9855; GFX9-NEXT: s_lshr_b32 s9, s8, 16 9856; GFX9-NEXT: ;;#ASMSTART 9857; GFX9-NEXT: ; use s[8:9] 9858; GFX9-NEXT: ;;#ASMEND 9859; GFX9-NEXT: s_setpc_b64 s[30:31] 9860 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9861 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 1> 9862 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9863 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9864 ret void 9865} 9866 9867define void @s_shuffle_v3i16_v4i16__1_1_1() { 9868; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_1_1: 9869; GFX900: ; %bb.0: 9870; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9871; GFX900-NEXT: ;;#ASMSTART 9872; GFX900-NEXT: ; def s[4:5] 9873; GFX900-NEXT: ;;#ASMEND 9874; GFX900-NEXT: s_lshr_b32 s9, s4, 16 9875; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4 9876; GFX900-NEXT: ;;#ASMSTART 9877; GFX900-NEXT: ; use s[8:9] 9878; GFX900-NEXT: ;;#ASMEND 9879; GFX900-NEXT: s_setpc_b64 s[30:31] 9880; 9881; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_1_1: 9882; GFX90A: ; %bb.0: 9883; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9884; GFX90A-NEXT: ;;#ASMSTART 9885; GFX90A-NEXT: ; def s[4:5] 9886; GFX90A-NEXT: ;;#ASMEND 9887; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 9888; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4 9889; GFX90A-NEXT: ;;#ASMSTART 9890; GFX90A-NEXT: ; use s[8:9] 9891; GFX90A-NEXT: ;;#ASMEND 9892; GFX90A-NEXT: s_setpc_b64 s[30:31] 9893; 9894; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_1_1: 9895; GFX940: ; %bb.0: 9896; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9897; GFX940-NEXT: ;;#ASMSTART 9898; GFX940-NEXT: ; def s[0:1] 9899; GFX940-NEXT: ;;#ASMEND 9900; GFX940-NEXT: s_lshr_b32 s9, s0, 16 9901; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 9902; GFX940-NEXT: ;;#ASMSTART 9903; GFX940-NEXT: ; use s[8:9] 9904; GFX940-NEXT: ;;#ASMEND 9905; GFX940-NEXT: s_setpc_b64 s[30:31] 9906 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9907 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 1, i32 1> 9908 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9909 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9910 ret void 9911} 9912 9913define void @s_shuffle_v3i16_v4i16__2_1_1() { 9914; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_1_1: 9915; GFX900: ; %bb.0: 9916; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9917; GFX900-NEXT: ;;#ASMSTART 9918; GFX900-NEXT: ; def s[4:5] 9919; GFX900-NEXT: ;;#ASMEND 9920; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 9921; GFX900-NEXT: s_lshr_b32 s9, s4, 16 9922; GFX900-NEXT: ;;#ASMSTART 9923; GFX900-NEXT: ; use s[8:9] 9924; GFX900-NEXT: ;;#ASMEND 9925; GFX900-NEXT: s_setpc_b64 s[30:31] 9926; 9927; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_1_1: 9928; GFX90A: ; %bb.0: 9929; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9930; GFX90A-NEXT: ;;#ASMSTART 9931; GFX90A-NEXT: ; def s[4:5] 9932; GFX90A-NEXT: ;;#ASMEND 9933; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 9934; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 9935; GFX90A-NEXT: ;;#ASMSTART 9936; GFX90A-NEXT: ; use s[8:9] 9937; GFX90A-NEXT: ;;#ASMEND 9938; GFX90A-NEXT: s_setpc_b64 s[30:31] 9939; 9940; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_1_1: 9941; GFX940: ; %bb.0: 9942; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9943; GFX940-NEXT: ;;#ASMSTART 9944; GFX940-NEXT: ; def s[0:1] 9945; GFX940-NEXT: ;;#ASMEND 9946; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 9947; GFX940-NEXT: s_lshr_b32 s9, s0, 16 9948; GFX940-NEXT: ;;#ASMSTART 9949; GFX940-NEXT: ; use s[8:9] 9950; GFX940-NEXT: ;;#ASMEND 9951; GFX940-NEXT: s_setpc_b64 s[30:31] 9952 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9953 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 1, i32 1> 9954 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9955 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 9956 ret void 9957} 9958 9959define void @s_shuffle_v3i16_v4i16__3_1_1() { 9960; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_1_1: 9961; GFX900: ; %bb.0: 9962; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9963; GFX900-NEXT: ;;#ASMSTART 9964; GFX900-NEXT: ; def s[4:5] 9965; GFX900-NEXT: ;;#ASMEND 9966; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 9967; GFX900-NEXT: s_lshr_b32 s9, s4, 16 9968; GFX900-NEXT: ;;#ASMSTART 9969; GFX900-NEXT: ; use s[8:9] 9970; GFX900-NEXT: ;;#ASMEND 9971; GFX900-NEXT: s_setpc_b64 s[30:31] 9972; 9973; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_1_1: 9974; GFX90A: ; %bb.0: 9975; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9976; GFX90A-NEXT: ;;#ASMSTART 9977; GFX90A-NEXT: ; def s[4:5] 9978; GFX90A-NEXT: ;;#ASMEND 9979; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 9980; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 9981; GFX90A-NEXT: ;;#ASMSTART 9982; GFX90A-NEXT: ; use s[8:9] 9983; GFX90A-NEXT: ;;#ASMEND 9984; GFX90A-NEXT: s_setpc_b64 s[30:31] 9985; 9986; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_1_1: 9987; GFX940: ; %bb.0: 9988; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9989; GFX940-NEXT: ;;#ASMSTART 9990; GFX940-NEXT: ; def s[0:1] 9991; GFX940-NEXT: ;;#ASMEND 9992; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 9993; GFX940-NEXT: s_lshr_b32 s9, s0, 16 9994; GFX940-NEXT: ;;#ASMSTART 9995; GFX940-NEXT: ; use s[8:9] 9996; GFX940-NEXT: ;;#ASMEND 9997; GFX940-NEXT: s_setpc_b64 s[30:31] 9998 %vec0 = call <4 x i16> asm "; def $0", "=s"() 9999 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 1, i32 1> 10000 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10001 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10002 ret void 10003} 10004 10005define void @s_shuffle_v3i16_v4i16__4_1_1() { 10006; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_1_1: 10007; GFX9: ; %bb.0: 10008; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10009; GFX9-NEXT: ;;#ASMSTART 10010; GFX9-NEXT: ; def s[8:9] 10011; GFX9-NEXT: ;;#ASMEND 10012; GFX9-NEXT: s_lshr_b32 s9, s8, 16 10013; GFX9-NEXT: ;;#ASMSTART 10014; GFX9-NEXT: ; use s[8:9] 10015; GFX9-NEXT: ;;#ASMEND 10016; GFX9-NEXT: s_setpc_b64 s[30:31] 10017 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10018 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 1, i32 1> 10019 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10020 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10021 ret void 10022} 10023 10024define void @s_shuffle_v3i16_v4i16__5_1_1() { 10025; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_1_1: 10026; GFX900: ; %bb.0: 10027; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10028; GFX900-NEXT: ;;#ASMSTART 10029; GFX900-NEXT: ; def s[4:5] 10030; GFX900-NEXT: ;;#ASMEND 10031; GFX900-NEXT: ;;#ASMSTART 10032; GFX900-NEXT: ; def s[6:7] 10033; GFX900-NEXT: ;;#ASMEND 10034; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s4 10035; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10036; GFX900-NEXT: ;;#ASMSTART 10037; GFX900-NEXT: ; use s[8:9] 10038; GFX900-NEXT: ;;#ASMEND 10039; GFX900-NEXT: s_setpc_b64 s[30:31] 10040; 10041; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_1_1: 10042; GFX90A: ; %bb.0: 10043; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10044; GFX90A-NEXT: ;;#ASMSTART 10045; GFX90A-NEXT: ; def s[4:5] 10046; GFX90A-NEXT: ;;#ASMEND 10047; GFX90A-NEXT: ;;#ASMSTART 10048; GFX90A-NEXT: ; def s[6:7] 10049; GFX90A-NEXT: ;;#ASMEND 10050; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s4 10051; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10052; GFX90A-NEXT: ;;#ASMSTART 10053; GFX90A-NEXT: ; use s[8:9] 10054; GFX90A-NEXT: ;;#ASMEND 10055; GFX90A-NEXT: s_setpc_b64 s[30:31] 10056; 10057; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_1_1: 10058; GFX940: ; %bb.0: 10059; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10060; GFX940-NEXT: ;;#ASMSTART 10061; GFX940-NEXT: ; def s[0:1] 10062; GFX940-NEXT: ;;#ASMEND 10063; GFX940-NEXT: ;;#ASMSTART 10064; GFX940-NEXT: ; def s[2:3] 10065; GFX940-NEXT: ;;#ASMEND 10066; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 10067; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10068; GFX940-NEXT: ;;#ASMSTART 10069; GFX940-NEXT: ; use s[8:9] 10070; GFX940-NEXT: ;;#ASMEND 10071; GFX940-NEXT: s_setpc_b64 s[30:31] 10072 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10073 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10074 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 1, i32 1> 10075 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10076 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10077 ret void 10078} 10079 10080define void @s_shuffle_v3i16_v4i16__6_1_1() { 10081; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_1_1: 10082; GFX900: ; %bb.0: 10083; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10084; GFX900-NEXT: ;;#ASMSTART 10085; GFX900-NEXT: ; def s[4:5] 10086; GFX900-NEXT: ;;#ASMEND 10087; GFX900-NEXT: ;;#ASMSTART 10088; GFX900-NEXT: ; def s[6:7] 10089; GFX900-NEXT: ;;#ASMEND 10090; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 10091; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10092; GFX900-NEXT: ;;#ASMSTART 10093; GFX900-NEXT: ; use s[8:9] 10094; GFX900-NEXT: ;;#ASMEND 10095; GFX900-NEXT: s_setpc_b64 s[30:31] 10096; 10097; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_1_1: 10098; GFX90A: ; %bb.0: 10099; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10100; GFX90A-NEXT: ;;#ASMSTART 10101; GFX90A-NEXT: ; def s[4:5] 10102; GFX90A-NEXT: ;;#ASMEND 10103; GFX90A-NEXT: ;;#ASMSTART 10104; GFX90A-NEXT: ; def s[6:7] 10105; GFX90A-NEXT: ;;#ASMEND 10106; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 10107; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10108; GFX90A-NEXT: ;;#ASMSTART 10109; GFX90A-NEXT: ; use s[8:9] 10110; GFX90A-NEXT: ;;#ASMEND 10111; GFX90A-NEXT: s_setpc_b64 s[30:31] 10112; 10113; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_1_1: 10114; GFX940: ; %bb.0: 10115; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10116; GFX940-NEXT: ;;#ASMSTART 10117; GFX940-NEXT: ; def s[0:1] 10118; GFX940-NEXT: ;;#ASMEND 10119; GFX940-NEXT: ;;#ASMSTART 10120; GFX940-NEXT: ; def s[2:3] 10121; GFX940-NEXT: ;;#ASMEND 10122; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 10123; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10124; GFX940-NEXT: ;;#ASMSTART 10125; GFX940-NEXT: ; use s[8:9] 10126; GFX940-NEXT: ;;#ASMEND 10127; GFX940-NEXT: s_setpc_b64 s[30:31] 10128 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10129 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10130 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 1, i32 1> 10131 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10132 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10133 ret void 10134} 10135 10136define void @s_shuffle_v3i16_v4i16__7_1_1() { 10137; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_1: 10138; GFX900: ; %bb.0: 10139; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10140; GFX900-NEXT: ;;#ASMSTART 10141; GFX900-NEXT: ; def s[4:5] 10142; GFX900-NEXT: ;;#ASMEND 10143; GFX900-NEXT: ;;#ASMSTART 10144; GFX900-NEXT: ; def s[6:7] 10145; GFX900-NEXT: ;;#ASMEND 10146; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 10147; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10148; GFX900-NEXT: ;;#ASMSTART 10149; GFX900-NEXT: ; use s[8:9] 10150; GFX900-NEXT: ;;#ASMEND 10151; GFX900-NEXT: s_setpc_b64 s[30:31] 10152; 10153; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_1: 10154; GFX90A: ; %bb.0: 10155; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10156; GFX90A-NEXT: ;;#ASMSTART 10157; GFX90A-NEXT: ; def s[4:5] 10158; GFX90A-NEXT: ;;#ASMEND 10159; GFX90A-NEXT: ;;#ASMSTART 10160; GFX90A-NEXT: ; def s[6:7] 10161; GFX90A-NEXT: ;;#ASMEND 10162; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 10163; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10164; GFX90A-NEXT: ;;#ASMSTART 10165; GFX90A-NEXT: ; use s[8:9] 10166; GFX90A-NEXT: ;;#ASMEND 10167; GFX90A-NEXT: s_setpc_b64 s[30:31] 10168; 10169; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_1: 10170; GFX940: ; %bb.0: 10171; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10172; GFX940-NEXT: ;;#ASMSTART 10173; GFX940-NEXT: ; def s[0:1] 10174; GFX940-NEXT: ;;#ASMEND 10175; GFX940-NEXT: ;;#ASMSTART 10176; GFX940-NEXT: ; def s[2:3] 10177; GFX940-NEXT: ;;#ASMEND 10178; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 10179; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10180; GFX940-NEXT: ;;#ASMSTART 10181; GFX940-NEXT: ; use s[8:9] 10182; GFX940-NEXT: ;;#ASMEND 10183; GFX940-NEXT: s_setpc_b64 s[30:31] 10184 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10185 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10186 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 1> 10187 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10188 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10189 ret void 10190} 10191 10192define void @s_shuffle_v3i16_v4i16__7_u_1() { 10193; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_1: 10194; GFX900: ; %bb.0: 10195; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10196; GFX900-NEXT: ;;#ASMSTART 10197; GFX900-NEXT: ; def s[4:5] 10198; GFX900-NEXT: ;;#ASMEND 10199; GFX900-NEXT: ;;#ASMSTART 10200; GFX900-NEXT: ; def s[6:7] 10201; GFX900-NEXT: ;;#ASMEND 10202; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10203; GFX900-NEXT: s_lshr_b32 s8, s7, 16 10204; GFX900-NEXT: ;;#ASMSTART 10205; GFX900-NEXT: ; use s[8:9] 10206; GFX900-NEXT: ;;#ASMEND 10207; GFX900-NEXT: s_setpc_b64 s[30:31] 10208; 10209; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_1: 10210; GFX90A: ; %bb.0: 10211; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10212; GFX90A-NEXT: ;;#ASMSTART 10213; GFX90A-NEXT: ; def s[4:5] 10214; GFX90A-NEXT: ;;#ASMEND 10215; GFX90A-NEXT: ;;#ASMSTART 10216; GFX90A-NEXT: ; def s[6:7] 10217; GFX90A-NEXT: ;;#ASMEND 10218; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10219; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 10220; GFX90A-NEXT: ;;#ASMSTART 10221; GFX90A-NEXT: ; use s[8:9] 10222; GFX90A-NEXT: ;;#ASMEND 10223; GFX90A-NEXT: s_setpc_b64 s[30:31] 10224; 10225; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_1: 10226; GFX940: ; %bb.0: 10227; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10228; GFX940-NEXT: ;;#ASMSTART 10229; GFX940-NEXT: ; def s[0:1] 10230; GFX940-NEXT: ;;#ASMEND 10231; GFX940-NEXT: ;;#ASMSTART 10232; GFX940-NEXT: ; def s[2:3] 10233; GFX940-NEXT: ;;#ASMEND 10234; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10235; GFX940-NEXT: s_lshr_b32 s8, s3, 16 10236; GFX940-NEXT: ;;#ASMSTART 10237; GFX940-NEXT: ; use s[8:9] 10238; GFX940-NEXT: ;;#ASMEND 10239; GFX940-NEXT: s_setpc_b64 s[30:31] 10240 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10241 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10242 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 1> 10243 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10244 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10245 ret void 10246} 10247 10248define void @s_shuffle_v3i16_v4i16__7_0_1() { 10249; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_1: 10250; GFX900: ; %bb.0: 10251; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10252; GFX900-NEXT: ;;#ASMSTART 10253; GFX900-NEXT: ; def s[4:5] 10254; GFX900-NEXT: ;;#ASMEND 10255; GFX900-NEXT: ;;#ASMSTART 10256; GFX900-NEXT: ; def s[6:7] 10257; GFX900-NEXT: ;;#ASMEND 10258; GFX900-NEXT: s_lshr_b32 s5, s7, 16 10259; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 10260; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10261; GFX900-NEXT: ;;#ASMSTART 10262; GFX900-NEXT: ; use s[8:9] 10263; GFX900-NEXT: ;;#ASMEND 10264; GFX900-NEXT: s_setpc_b64 s[30:31] 10265; 10266; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_1: 10267; GFX90A: ; %bb.0: 10268; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10269; GFX90A-NEXT: ;;#ASMSTART 10270; GFX90A-NEXT: ; def s[4:5] 10271; GFX90A-NEXT: ;;#ASMEND 10272; GFX90A-NEXT: ;;#ASMSTART 10273; GFX90A-NEXT: ; def s[6:7] 10274; GFX90A-NEXT: ;;#ASMEND 10275; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 10276; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 10277; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10278; GFX90A-NEXT: ;;#ASMSTART 10279; GFX90A-NEXT: ; use s[8:9] 10280; GFX90A-NEXT: ;;#ASMEND 10281; GFX90A-NEXT: s_setpc_b64 s[30:31] 10282; 10283; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_1: 10284; GFX940: ; %bb.0: 10285; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10286; GFX940-NEXT: ;;#ASMSTART 10287; GFX940-NEXT: ; def s[0:1] 10288; GFX940-NEXT: ;;#ASMEND 10289; GFX940-NEXT: ;;#ASMSTART 10290; GFX940-NEXT: ; def s[2:3] 10291; GFX940-NEXT: ;;#ASMEND 10292; GFX940-NEXT: s_lshr_b32 s1, s3, 16 10293; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 10294; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10295; GFX940-NEXT: ;;#ASMSTART 10296; GFX940-NEXT: ; use s[8:9] 10297; GFX940-NEXT: ;;#ASMEND 10298; GFX940-NEXT: s_setpc_b64 s[30:31] 10299 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10300 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10301 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 1> 10302 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10303 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10304 ret void 10305} 10306 10307define void @s_shuffle_v3i16_v4i16__7_2_1() { 10308; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_1: 10309; GFX900: ; %bb.0: 10310; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10311; GFX900-NEXT: ;;#ASMSTART 10312; GFX900-NEXT: ; def s[6:7] 10313; GFX900-NEXT: ;;#ASMEND 10314; GFX900-NEXT: s_lshr_b32 s6, s7, 16 10315; GFX900-NEXT: ;;#ASMSTART 10316; GFX900-NEXT: ; def s[4:5] 10317; GFX900-NEXT: ;;#ASMEND 10318; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 10319; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10320; GFX900-NEXT: ;;#ASMSTART 10321; GFX900-NEXT: ; use s[8:9] 10322; GFX900-NEXT: ;;#ASMEND 10323; GFX900-NEXT: s_setpc_b64 s[30:31] 10324; 10325; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_1: 10326; GFX90A: ; %bb.0: 10327; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10328; GFX90A-NEXT: ;;#ASMSTART 10329; GFX90A-NEXT: ; def s[6:7] 10330; GFX90A-NEXT: ;;#ASMEND 10331; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 10332; GFX90A-NEXT: ;;#ASMSTART 10333; GFX90A-NEXT: ; def s[4:5] 10334; GFX90A-NEXT: ;;#ASMEND 10335; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 10336; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10337; GFX90A-NEXT: ;;#ASMSTART 10338; GFX90A-NEXT: ; use s[8:9] 10339; GFX90A-NEXT: ;;#ASMEND 10340; GFX90A-NEXT: s_setpc_b64 s[30:31] 10341; 10342; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_1: 10343; GFX940: ; %bb.0: 10344; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10345; GFX940-NEXT: ;;#ASMSTART 10346; GFX940-NEXT: ; def s[2:3] 10347; GFX940-NEXT: ;;#ASMEND 10348; GFX940-NEXT: s_lshr_b32 s2, s3, 16 10349; GFX940-NEXT: ;;#ASMSTART 10350; GFX940-NEXT: ; def s[0:1] 10351; GFX940-NEXT: ;;#ASMEND 10352; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 10353; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10354; GFX940-NEXT: ;;#ASMSTART 10355; GFX940-NEXT: ; use s[8:9] 10356; GFX940-NEXT: ;;#ASMEND 10357; GFX940-NEXT: s_setpc_b64 s[30:31] 10358 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10359 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10360 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 1> 10361 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10362 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10363 ret void 10364} 10365 10366define void @s_shuffle_v3i16_v4i16__7_3_1() { 10367; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_1: 10368; GFX900: ; %bb.0: 10369; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10370; GFX900-NEXT: ;;#ASMSTART 10371; GFX900-NEXT: ; def s[4:5] 10372; GFX900-NEXT: ;;#ASMEND 10373; GFX900-NEXT: ;;#ASMSTART 10374; GFX900-NEXT: ; def s[6:7] 10375; GFX900-NEXT: ;;#ASMEND 10376; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 10377; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10378; GFX900-NEXT: ;;#ASMSTART 10379; GFX900-NEXT: ; use s[8:9] 10380; GFX900-NEXT: ;;#ASMEND 10381; GFX900-NEXT: s_setpc_b64 s[30:31] 10382; 10383; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_1: 10384; GFX90A: ; %bb.0: 10385; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10386; GFX90A-NEXT: ;;#ASMSTART 10387; GFX90A-NEXT: ; def s[4:5] 10388; GFX90A-NEXT: ;;#ASMEND 10389; GFX90A-NEXT: ;;#ASMSTART 10390; GFX90A-NEXT: ; def s[6:7] 10391; GFX90A-NEXT: ;;#ASMEND 10392; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 10393; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10394; GFX90A-NEXT: ;;#ASMSTART 10395; GFX90A-NEXT: ; use s[8:9] 10396; GFX90A-NEXT: ;;#ASMEND 10397; GFX90A-NEXT: s_setpc_b64 s[30:31] 10398; 10399; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_1: 10400; GFX940: ; %bb.0: 10401; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10402; GFX940-NEXT: ;;#ASMSTART 10403; GFX940-NEXT: ; def s[0:1] 10404; GFX940-NEXT: ;;#ASMEND 10405; GFX940-NEXT: ;;#ASMSTART 10406; GFX940-NEXT: ; def s[2:3] 10407; GFX940-NEXT: ;;#ASMEND 10408; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 10409; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10410; GFX940-NEXT: ;;#ASMSTART 10411; GFX940-NEXT: ; use s[8:9] 10412; GFX940-NEXT: ;;#ASMEND 10413; GFX940-NEXT: s_setpc_b64 s[30:31] 10414 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10415 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10416 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 1> 10417 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10418 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10419 ret void 10420} 10421 10422define void @s_shuffle_v3i16_v4i16__7_4_1() { 10423; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_1: 10424; GFX900: ; %bb.0: 10425; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10426; GFX900-NEXT: ;;#ASMSTART 10427; GFX900-NEXT: ; def s[4:5] 10428; GFX900-NEXT: ;;#ASMEND 10429; GFX900-NEXT: ;;#ASMSTART 10430; GFX900-NEXT: ; def s[6:7] 10431; GFX900-NEXT: ;;#ASMEND 10432; GFX900-NEXT: s_lshr_b32 s5, s7, 16 10433; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6 10434; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10435; GFX900-NEXT: ;;#ASMSTART 10436; GFX900-NEXT: ; use s[8:9] 10437; GFX900-NEXT: ;;#ASMEND 10438; GFX900-NEXT: s_setpc_b64 s[30:31] 10439; 10440; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_1: 10441; GFX90A: ; %bb.0: 10442; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10443; GFX90A-NEXT: ;;#ASMSTART 10444; GFX90A-NEXT: ; def s[4:5] 10445; GFX90A-NEXT: ;;#ASMEND 10446; GFX90A-NEXT: ;;#ASMSTART 10447; GFX90A-NEXT: ; def s[6:7] 10448; GFX90A-NEXT: ;;#ASMEND 10449; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 10450; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6 10451; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10452; GFX90A-NEXT: ;;#ASMSTART 10453; GFX90A-NEXT: ; use s[8:9] 10454; GFX90A-NEXT: ;;#ASMEND 10455; GFX90A-NEXT: s_setpc_b64 s[30:31] 10456; 10457; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_1: 10458; GFX940: ; %bb.0: 10459; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10460; GFX940-NEXT: ;;#ASMSTART 10461; GFX940-NEXT: ; def s[0:1] 10462; GFX940-NEXT: ;;#ASMEND 10463; GFX940-NEXT: ;;#ASMSTART 10464; GFX940-NEXT: ; def s[2:3] 10465; GFX940-NEXT: ;;#ASMEND 10466; GFX940-NEXT: s_lshr_b32 s1, s3, 16 10467; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 10468; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10469; GFX940-NEXT: ;;#ASMSTART 10470; GFX940-NEXT: ; use s[8:9] 10471; GFX940-NEXT: ;;#ASMEND 10472; GFX940-NEXT: s_setpc_b64 s[30:31] 10473 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10474 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10475 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 1> 10476 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10477 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10478 ret void 10479} 10480 10481define void @s_shuffle_v3i16_v4i16__7_5_1() { 10482; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_1: 10483; GFX900: ; %bb.0: 10484; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10485; GFX900-NEXT: ;;#ASMSTART 10486; GFX900-NEXT: ; def s[4:5] 10487; GFX900-NEXT: ;;#ASMEND 10488; GFX900-NEXT: ;;#ASMSTART 10489; GFX900-NEXT: ; def s[6:7] 10490; GFX900-NEXT: ;;#ASMEND 10491; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s6 10492; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10493; GFX900-NEXT: ;;#ASMSTART 10494; GFX900-NEXT: ; use s[8:9] 10495; GFX900-NEXT: ;;#ASMEND 10496; GFX900-NEXT: s_setpc_b64 s[30:31] 10497; 10498; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_1: 10499; GFX90A: ; %bb.0: 10500; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10501; GFX90A-NEXT: ;;#ASMSTART 10502; GFX90A-NEXT: ; def s[4:5] 10503; GFX90A-NEXT: ;;#ASMEND 10504; GFX90A-NEXT: ;;#ASMSTART 10505; GFX90A-NEXT: ; def s[6:7] 10506; GFX90A-NEXT: ;;#ASMEND 10507; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s6 10508; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10509; GFX90A-NEXT: ;;#ASMSTART 10510; GFX90A-NEXT: ; use s[8:9] 10511; GFX90A-NEXT: ;;#ASMEND 10512; GFX90A-NEXT: s_setpc_b64 s[30:31] 10513; 10514; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_1: 10515; GFX940: ; %bb.0: 10516; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10517; GFX940-NEXT: ;;#ASMSTART 10518; GFX940-NEXT: ; def s[0:1] 10519; GFX940-NEXT: ;;#ASMEND 10520; GFX940-NEXT: ;;#ASMSTART 10521; GFX940-NEXT: ; def s[2:3] 10522; GFX940-NEXT: ;;#ASMEND 10523; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 10524; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10525; GFX940-NEXT: ;;#ASMSTART 10526; GFX940-NEXT: ; use s[8:9] 10527; GFX940-NEXT: ;;#ASMEND 10528; GFX940-NEXT: s_setpc_b64 s[30:31] 10529 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10530 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10531 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 1> 10532 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10533 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10534 ret void 10535} 10536 10537define void @s_shuffle_v3i16_v4i16__7_6_1() { 10538; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_1: 10539; GFX900: ; %bb.0: 10540; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10541; GFX900-NEXT: ;;#ASMSTART 10542; GFX900-NEXT: ; def s[4:5] 10543; GFX900-NEXT: ;;#ASMEND 10544; GFX900-NEXT: ;;#ASMSTART 10545; GFX900-NEXT: ; def s[6:7] 10546; GFX900-NEXT: ;;#ASMEND 10547; GFX900-NEXT: s_lshr_b32 s5, s7, 16 10548; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 10549; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10550; GFX900-NEXT: ;;#ASMSTART 10551; GFX900-NEXT: ; use s[8:9] 10552; GFX900-NEXT: ;;#ASMEND 10553; GFX900-NEXT: s_setpc_b64 s[30:31] 10554; 10555; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_1: 10556; GFX90A: ; %bb.0: 10557; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10558; GFX90A-NEXT: ;;#ASMSTART 10559; GFX90A-NEXT: ; def s[4:5] 10560; GFX90A-NEXT: ;;#ASMEND 10561; GFX90A-NEXT: ;;#ASMSTART 10562; GFX90A-NEXT: ; def s[6:7] 10563; GFX90A-NEXT: ;;#ASMEND 10564; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 10565; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 10566; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10567; GFX90A-NEXT: ;;#ASMSTART 10568; GFX90A-NEXT: ; use s[8:9] 10569; GFX90A-NEXT: ;;#ASMEND 10570; GFX90A-NEXT: s_setpc_b64 s[30:31] 10571; 10572; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_1: 10573; GFX940: ; %bb.0: 10574; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10575; GFX940-NEXT: ;;#ASMSTART 10576; GFX940-NEXT: ; def s[0:1] 10577; GFX940-NEXT: ;;#ASMEND 10578; GFX940-NEXT: ;;#ASMSTART 10579; GFX940-NEXT: ; def s[2:3] 10580; GFX940-NEXT: ;;#ASMEND 10581; GFX940-NEXT: s_lshr_b32 s1, s3, 16 10582; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 10583; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10584; GFX940-NEXT: ;;#ASMSTART 10585; GFX940-NEXT: ; use s[8:9] 10586; GFX940-NEXT: ;;#ASMEND 10587; GFX940-NEXT: s_setpc_b64 s[30:31] 10588 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10589 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10590 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 1> 10591 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10592 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10593 ret void 10594} 10595 10596define void @s_shuffle_v3i16_v4i16__u_2_2() { 10597; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_2_2: 10598; GFX9: ; %bb.0: 10599; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10600; GFX9-NEXT: ;;#ASMSTART 10601; GFX9-NEXT: ; def s[8:9] 10602; GFX9-NEXT: ;;#ASMEND 10603; GFX9-NEXT: s_lshl_b32 s8, s9, 16 10604; GFX9-NEXT: ;;#ASMSTART 10605; GFX9-NEXT: ; use s[8:9] 10606; GFX9-NEXT: ;;#ASMEND 10607; GFX9-NEXT: s_setpc_b64 s[30:31] 10608 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10609 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 2, i32 2> 10610 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10611 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10612 ret void 10613} 10614 10615define void @s_shuffle_v3i16_v4i16__0_2_2() { 10616; GFX9-LABEL: s_shuffle_v3i16_v4i16__0_2_2: 10617; GFX9: ; %bb.0: 10618; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10619; GFX9-NEXT: ;;#ASMSTART 10620; GFX9-NEXT: ; def s[8:9] 10621; GFX9-NEXT: ;;#ASMEND 10622; GFX9-NEXT: s_pack_ll_b32_b16 s8, s8, s9 10623; GFX9-NEXT: ;;#ASMSTART 10624; GFX9-NEXT: ; use s[8:9] 10625; GFX9-NEXT: ;;#ASMEND 10626; GFX9-NEXT: s_setpc_b64 s[30:31] 10627 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10628 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 2, i32 2> 10629 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10630 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10631 ret void 10632} 10633 10634define void @s_shuffle_v3i16_v4i16__1_2_2() { 10635; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_2_2: 10636; GFX900: ; %bb.0: 10637; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10638; GFX900-NEXT: ;;#ASMSTART 10639; GFX900-NEXT: ; def s[8:9] 10640; GFX900-NEXT: ;;#ASMEND 10641; GFX900-NEXT: s_lshr_b32 s4, s8, 16 10642; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10643; GFX900-NEXT: ;;#ASMSTART 10644; GFX900-NEXT: ; use s[8:9] 10645; GFX900-NEXT: ;;#ASMEND 10646; GFX900-NEXT: s_setpc_b64 s[30:31] 10647; 10648; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_2_2: 10649; GFX90A: ; %bb.0: 10650; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10651; GFX90A-NEXT: ;;#ASMSTART 10652; GFX90A-NEXT: ; def s[8:9] 10653; GFX90A-NEXT: ;;#ASMEND 10654; GFX90A-NEXT: s_lshr_b32 s4, s8, 16 10655; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10656; GFX90A-NEXT: ;;#ASMSTART 10657; GFX90A-NEXT: ; use s[8:9] 10658; GFX90A-NEXT: ;;#ASMEND 10659; GFX90A-NEXT: s_setpc_b64 s[30:31] 10660; 10661; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_2_2: 10662; GFX940: ; %bb.0: 10663; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10664; GFX940-NEXT: ;;#ASMSTART 10665; GFX940-NEXT: ; def s[8:9] 10666; GFX940-NEXT: ;;#ASMEND 10667; GFX940-NEXT: s_lshr_b32 s0, s8, 16 10668; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 10669; GFX940-NEXT: ;;#ASMSTART 10670; GFX940-NEXT: ; use s[8:9] 10671; GFX940-NEXT: ;;#ASMEND 10672; GFX940-NEXT: s_setpc_b64 s[30:31] 10673 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10674 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 2> 10675 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10676 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10677 ret void 10678} 10679 10680define void @s_shuffle_v3i16_v4i16__2_2_2() { 10681; GFX9-LABEL: s_shuffle_v3i16_v4i16__2_2_2: 10682; GFX9: ; %bb.0: 10683; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10684; GFX9-NEXT: ;;#ASMSTART 10685; GFX9-NEXT: ; def s[8:9] 10686; GFX9-NEXT: ;;#ASMEND 10687; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9 10688; GFX9-NEXT: ;;#ASMSTART 10689; GFX9-NEXT: ; use s[8:9] 10690; GFX9-NEXT: ;;#ASMEND 10691; GFX9-NEXT: s_setpc_b64 s[30:31] 10692 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10693 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 2, i32 2> 10694 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10695 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10696 ret void 10697} 10698 10699define void @s_shuffle_v3i16_v4i16__3_2_2() { 10700; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_2_2: 10701; GFX900: ; %bb.0: 10702; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10703; GFX900-NEXT: ;;#ASMSTART 10704; GFX900-NEXT: ; def s[8:9] 10705; GFX900-NEXT: ;;#ASMEND 10706; GFX900-NEXT: s_lshr_b32 s4, s9, 16 10707; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10708; GFX900-NEXT: ;;#ASMSTART 10709; GFX900-NEXT: ; use s[8:9] 10710; GFX900-NEXT: ;;#ASMEND 10711; GFX900-NEXT: s_setpc_b64 s[30:31] 10712; 10713; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_2_2: 10714; GFX90A: ; %bb.0: 10715; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10716; GFX90A-NEXT: ;;#ASMSTART 10717; GFX90A-NEXT: ; def s[8:9] 10718; GFX90A-NEXT: ;;#ASMEND 10719; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 10720; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10721; GFX90A-NEXT: ;;#ASMSTART 10722; GFX90A-NEXT: ; use s[8:9] 10723; GFX90A-NEXT: ;;#ASMEND 10724; GFX90A-NEXT: s_setpc_b64 s[30:31] 10725; 10726; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_2_2: 10727; GFX940: ; %bb.0: 10728; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10729; GFX940-NEXT: ;;#ASMSTART 10730; GFX940-NEXT: ; def s[8:9] 10731; GFX940-NEXT: ;;#ASMEND 10732; GFX940-NEXT: s_lshr_b32 s0, s9, 16 10733; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 10734; GFX940-NEXT: ;;#ASMSTART 10735; GFX940-NEXT: ; use s[8:9] 10736; GFX940-NEXT: ;;#ASMEND 10737; GFX940-NEXT: s_setpc_b64 s[30:31] 10738 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10739 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 2, i32 2> 10740 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10741 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10742 ret void 10743} 10744 10745define void @s_shuffle_v3i16_v4i16__4_2_2() { 10746; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_2_2: 10747; GFX9: ; %bb.0: 10748; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10749; GFX9-NEXT: ;;#ASMSTART 10750; GFX9-NEXT: ; def s[8:9] 10751; GFX9-NEXT: ;;#ASMEND 10752; GFX9-NEXT: s_lshl_b32 s8, s9, 16 10753; GFX9-NEXT: ;;#ASMSTART 10754; GFX9-NEXT: ; use s[8:9] 10755; GFX9-NEXT: ;;#ASMEND 10756; GFX9-NEXT: s_setpc_b64 s[30:31] 10757 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10758 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 2, i32 2> 10759 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10760 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10761 ret void 10762} 10763 10764define void @s_shuffle_v3i16_v4i16__5_2_2() { 10765; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_2_2: 10766; GFX900: ; %bb.0: 10767; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10768; GFX900-NEXT: ;;#ASMSTART 10769; GFX900-NEXT: ; def s[4:5] 10770; GFX900-NEXT: ;;#ASMEND 10771; GFX900-NEXT: ;;#ASMSTART 10772; GFX900-NEXT: ; def s[8:9] 10773; GFX900-NEXT: ;;#ASMEND 10774; GFX900-NEXT: s_lshr_b32 s4, s4, 16 10775; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10776; GFX900-NEXT: ;;#ASMSTART 10777; GFX900-NEXT: ; use s[8:9] 10778; GFX900-NEXT: ;;#ASMEND 10779; GFX900-NEXT: s_setpc_b64 s[30:31] 10780; 10781; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_2_2: 10782; GFX90A: ; %bb.0: 10783; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10784; GFX90A-NEXT: ;;#ASMSTART 10785; GFX90A-NEXT: ; def s[4:5] 10786; GFX90A-NEXT: ;;#ASMEND 10787; GFX90A-NEXT: ;;#ASMSTART 10788; GFX90A-NEXT: ; def s[8:9] 10789; GFX90A-NEXT: ;;#ASMEND 10790; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 10791; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10792; GFX90A-NEXT: ;;#ASMSTART 10793; GFX90A-NEXT: ; use s[8:9] 10794; GFX90A-NEXT: ;;#ASMEND 10795; GFX90A-NEXT: s_setpc_b64 s[30:31] 10796; 10797; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_2_2: 10798; GFX940: ; %bb.0: 10799; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10800; GFX940-NEXT: ;;#ASMSTART 10801; GFX940-NEXT: ; def s[0:1] 10802; GFX940-NEXT: ;;#ASMEND 10803; GFX940-NEXT: ;;#ASMSTART 10804; GFX940-NEXT: ; def s[8:9] 10805; GFX940-NEXT: ;;#ASMEND 10806; GFX940-NEXT: s_lshr_b32 s0, s0, 16 10807; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 10808; GFX940-NEXT: ;;#ASMSTART 10809; GFX940-NEXT: ; use s[8:9] 10810; GFX940-NEXT: ;;#ASMEND 10811; GFX940-NEXT: s_setpc_b64 s[30:31] 10812 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10813 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10814 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 2, i32 2> 10815 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10816 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10817 ret void 10818} 10819 10820define void @s_shuffle_v3i16_v4i16__6_2_2() { 10821; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_2_2: 10822; GFX900: ; %bb.0: 10823; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10824; GFX900-NEXT: ;;#ASMSTART 10825; GFX900-NEXT: ; def s[8:9] 10826; GFX900-NEXT: ;;#ASMEND 10827; GFX900-NEXT: ;;#ASMSTART 10828; GFX900-NEXT: ; def s[4:5] 10829; GFX900-NEXT: ;;#ASMEND 10830; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9 10831; GFX900-NEXT: ;;#ASMSTART 10832; GFX900-NEXT: ; use s[8:9] 10833; GFX900-NEXT: ;;#ASMEND 10834; GFX900-NEXT: s_setpc_b64 s[30:31] 10835; 10836; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_2_2: 10837; GFX90A: ; %bb.0: 10838; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10839; GFX90A-NEXT: ;;#ASMSTART 10840; GFX90A-NEXT: ; def s[8:9] 10841; GFX90A-NEXT: ;;#ASMEND 10842; GFX90A-NEXT: ;;#ASMSTART 10843; GFX90A-NEXT: ; def s[4:5] 10844; GFX90A-NEXT: ;;#ASMEND 10845; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9 10846; GFX90A-NEXT: ;;#ASMSTART 10847; GFX90A-NEXT: ; use s[8:9] 10848; GFX90A-NEXT: ;;#ASMEND 10849; GFX90A-NEXT: s_setpc_b64 s[30:31] 10850; 10851; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_2_2: 10852; GFX940: ; %bb.0: 10853; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10854; GFX940-NEXT: ;;#ASMSTART 10855; GFX940-NEXT: ; def s[8:9] 10856; GFX940-NEXT: ;;#ASMEND 10857; GFX940-NEXT: ;;#ASMSTART 10858; GFX940-NEXT: ; def s[0:1] 10859; GFX940-NEXT: ;;#ASMEND 10860; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 10861; GFX940-NEXT: ;;#ASMSTART 10862; GFX940-NEXT: ; use s[8:9] 10863; GFX940-NEXT: ;;#ASMEND 10864; GFX940-NEXT: s_setpc_b64 s[30:31] 10865 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10866 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10867 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 2, i32 2> 10868 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10869 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10870 ret void 10871} 10872 10873define void @s_shuffle_v3i16_v4i16__7_2_2() { 10874; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_2: 10875; GFX900: ; %bb.0: 10876; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10877; GFX900-NEXT: ;;#ASMSTART 10878; GFX900-NEXT: ; def s[4:5] 10879; GFX900-NEXT: ;;#ASMEND 10880; GFX900-NEXT: ;;#ASMSTART 10881; GFX900-NEXT: ; def s[8:9] 10882; GFX900-NEXT: ;;#ASMEND 10883; GFX900-NEXT: s_lshr_b32 s4, s5, 16 10884; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10885; GFX900-NEXT: ;;#ASMSTART 10886; GFX900-NEXT: ; use s[8:9] 10887; GFX900-NEXT: ;;#ASMEND 10888; GFX900-NEXT: s_setpc_b64 s[30:31] 10889; 10890; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_2: 10891; GFX90A: ; %bb.0: 10892; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10893; GFX90A-NEXT: ;;#ASMSTART 10894; GFX90A-NEXT: ; def s[4:5] 10895; GFX90A-NEXT: ;;#ASMEND 10896; GFX90A-NEXT: ;;#ASMSTART 10897; GFX90A-NEXT: ; def s[8:9] 10898; GFX90A-NEXT: ;;#ASMEND 10899; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 10900; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10901; GFX90A-NEXT: ;;#ASMSTART 10902; GFX90A-NEXT: ; use s[8:9] 10903; GFX90A-NEXT: ;;#ASMEND 10904; GFX90A-NEXT: s_setpc_b64 s[30:31] 10905; 10906; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_2: 10907; GFX940: ; %bb.0: 10908; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10909; GFX940-NEXT: ;;#ASMSTART 10910; GFX940-NEXT: ; def s[0:1] 10911; GFX940-NEXT: ;;#ASMEND 10912; GFX940-NEXT: ;;#ASMSTART 10913; GFX940-NEXT: ; def s[8:9] 10914; GFX940-NEXT: ;;#ASMEND 10915; GFX940-NEXT: s_lshr_b32 s0, s1, 16 10916; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 10917; GFX940-NEXT: ;;#ASMSTART 10918; GFX940-NEXT: ; use s[8:9] 10919; GFX940-NEXT: ;;#ASMEND 10920; GFX940-NEXT: s_setpc_b64 s[30:31] 10921 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10922 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10923 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 2> 10924 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10925 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10926 ret void 10927} 10928 10929define void @s_shuffle_v3i16_v4i16__7_u_2() { 10930; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_2: 10931; GFX900: ; %bb.0: 10932; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10933; GFX900-NEXT: ;;#ASMSTART 10934; GFX900-NEXT: ; def s[8:9] 10935; GFX900-NEXT: ;;#ASMEND 10936; GFX900-NEXT: ;;#ASMSTART 10937; GFX900-NEXT: ; def s[4:5] 10938; GFX900-NEXT: ;;#ASMEND 10939; GFX900-NEXT: s_lshr_b32 s8, s5, 16 10940; GFX900-NEXT: ;;#ASMSTART 10941; GFX900-NEXT: ; use s[8:9] 10942; GFX900-NEXT: ;;#ASMEND 10943; GFX900-NEXT: s_setpc_b64 s[30:31] 10944; 10945; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_2: 10946; GFX90A: ; %bb.0: 10947; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10948; GFX90A-NEXT: ;;#ASMSTART 10949; GFX90A-NEXT: ; def s[8:9] 10950; GFX90A-NEXT: ;;#ASMEND 10951; GFX90A-NEXT: ;;#ASMSTART 10952; GFX90A-NEXT: ; def s[4:5] 10953; GFX90A-NEXT: ;;#ASMEND 10954; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 10955; GFX90A-NEXT: ;;#ASMSTART 10956; GFX90A-NEXT: ; use s[8:9] 10957; GFX90A-NEXT: ;;#ASMEND 10958; GFX90A-NEXT: s_setpc_b64 s[30:31] 10959; 10960; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_2: 10961; GFX940: ; %bb.0: 10962; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10963; GFX940-NEXT: ;;#ASMSTART 10964; GFX940-NEXT: ; def s[8:9] 10965; GFX940-NEXT: ;;#ASMEND 10966; GFX940-NEXT: ;;#ASMSTART 10967; GFX940-NEXT: ; def s[0:1] 10968; GFX940-NEXT: ;;#ASMEND 10969; GFX940-NEXT: s_lshr_b32 s8, s1, 16 10970; GFX940-NEXT: ;;#ASMSTART 10971; GFX940-NEXT: ; use s[8:9] 10972; GFX940-NEXT: ;;#ASMEND 10973; GFX940-NEXT: s_setpc_b64 s[30:31] 10974 %vec0 = call <4 x i16> asm "; def $0", "=s"() 10975 %vec1 = call <4 x i16> asm "; def $0", "=s"() 10976 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 2> 10977 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10978 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 10979 ret void 10980} 10981 10982define void @s_shuffle_v3i16_v4i16__7_0_2() { 10983; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_2: 10984; GFX900: ; %bb.0: 10985; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10986; GFX900-NEXT: ;;#ASMSTART 10987; GFX900-NEXT: ; def s[4:5] 10988; GFX900-NEXT: ;;#ASMEND 10989; GFX900-NEXT: ;;#ASMSTART 10990; GFX900-NEXT: ; def s[8:9] 10991; GFX900-NEXT: ;;#ASMEND 10992; GFX900-NEXT: s_lshr_b32 s4, s5, 16 10993; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s8 10994; GFX900-NEXT: ;;#ASMSTART 10995; GFX900-NEXT: ; use s[8:9] 10996; GFX900-NEXT: ;;#ASMEND 10997; GFX900-NEXT: s_setpc_b64 s[30:31] 10998; 10999; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_2: 11000; GFX90A: ; %bb.0: 11001; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11002; GFX90A-NEXT: ;;#ASMSTART 11003; GFX90A-NEXT: ; def s[4:5] 11004; GFX90A-NEXT: ;;#ASMEND 11005; GFX90A-NEXT: ;;#ASMSTART 11006; GFX90A-NEXT: ; def s[8:9] 11007; GFX90A-NEXT: ;;#ASMEND 11008; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 11009; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s8 11010; GFX90A-NEXT: ;;#ASMSTART 11011; GFX90A-NEXT: ; use s[8:9] 11012; GFX90A-NEXT: ;;#ASMEND 11013; GFX90A-NEXT: s_setpc_b64 s[30:31] 11014; 11015; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_2: 11016; GFX940: ; %bb.0: 11017; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11018; GFX940-NEXT: ;;#ASMSTART 11019; GFX940-NEXT: ; def s[0:1] 11020; GFX940-NEXT: ;;#ASMEND 11021; GFX940-NEXT: ;;#ASMSTART 11022; GFX940-NEXT: ; def s[8:9] 11023; GFX940-NEXT: ;;#ASMEND 11024; GFX940-NEXT: s_lshr_b32 s0, s1, 16 11025; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8 11026; GFX940-NEXT: ;;#ASMSTART 11027; GFX940-NEXT: ; use s[8:9] 11028; GFX940-NEXT: ;;#ASMEND 11029; GFX940-NEXT: s_setpc_b64 s[30:31] 11030 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11031 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11032 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 2> 11033 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11034 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11035 ret void 11036} 11037 11038define void @s_shuffle_v3i16_v4i16__7_1_2() { 11039; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_2: 11040; GFX900: ; %bb.0: 11041; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11042; GFX900-NEXT: ;;#ASMSTART 11043; GFX900-NEXT: ; def s[8:9] 11044; GFX900-NEXT: ;;#ASMEND 11045; GFX900-NEXT: ;;#ASMSTART 11046; GFX900-NEXT: ; def s[4:5] 11047; GFX900-NEXT: ;;#ASMEND 11048; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s8 11049; GFX900-NEXT: ;;#ASMSTART 11050; GFX900-NEXT: ; use s[8:9] 11051; GFX900-NEXT: ;;#ASMEND 11052; GFX900-NEXT: s_setpc_b64 s[30:31] 11053; 11054; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_2: 11055; GFX90A: ; %bb.0: 11056; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11057; GFX90A-NEXT: ;;#ASMSTART 11058; GFX90A-NEXT: ; def s[8:9] 11059; GFX90A-NEXT: ;;#ASMEND 11060; GFX90A-NEXT: ;;#ASMSTART 11061; GFX90A-NEXT: ; def s[4:5] 11062; GFX90A-NEXT: ;;#ASMEND 11063; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s8 11064; GFX90A-NEXT: ;;#ASMSTART 11065; GFX90A-NEXT: ; use s[8:9] 11066; GFX90A-NEXT: ;;#ASMEND 11067; GFX90A-NEXT: s_setpc_b64 s[30:31] 11068; 11069; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_2: 11070; GFX940: ; %bb.0: 11071; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11072; GFX940-NEXT: ;;#ASMSTART 11073; GFX940-NEXT: ; def s[8:9] 11074; GFX940-NEXT: ;;#ASMEND 11075; GFX940-NEXT: ;;#ASMSTART 11076; GFX940-NEXT: ; def s[0:1] 11077; GFX940-NEXT: ;;#ASMEND 11078; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s8 11079; GFX940-NEXT: ;;#ASMSTART 11080; GFX940-NEXT: ; use s[8:9] 11081; GFX940-NEXT: ;;#ASMEND 11082; GFX940-NEXT: s_setpc_b64 s[30:31] 11083 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11084 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11085 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 2> 11086 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11087 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11088 ret void 11089} 11090 11091define void @s_shuffle_v3i16_v4i16__7_3_2() { 11092; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_2: 11093; GFX900: ; %bb.0: 11094; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11095; GFX900-NEXT: ;;#ASMSTART 11096; GFX900-NEXT: ; def s[8:9] 11097; GFX900-NEXT: ;;#ASMEND 11098; GFX900-NEXT: ;;#ASMSTART 11099; GFX900-NEXT: ; def s[4:5] 11100; GFX900-NEXT: ;;#ASMEND 11101; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s9 11102; GFX900-NEXT: ;;#ASMSTART 11103; GFX900-NEXT: ; use s[8:9] 11104; GFX900-NEXT: ;;#ASMEND 11105; GFX900-NEXT: s_setpc_b64 s[30:31] 11106; 11107; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_2: 11108; GFX90A: ; %bb.0: 11109; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11110; GFX90A-NEXT: ;;#ASMSTART 11111; GFX90A-NEXT: ; def s[8:9] 11112; GFX90A-NEXT: ;;#ASMEND 11113; GFX90A-NEXT: ;;#ASMSTART 11114; GFX90A-NEXT: ; def s[4:5] 11115; GFX90A-NEXT: ;;#ASMEND 11116; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s9 11117; GFX90A-NEXT: ;;#ASMSTART 11118; GFX90A-NEXT: ; use s[8:9] 11119; GFX90A-NEXT: ;;#ASMEND 11120; GFX90A-NEXT: s_setpc_b64 s[30:31] 11121; 11122; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_2: 11123; GFX940: ; %bb.0: 11124; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11125; GFX940-NEXT: ;;#ASMSTART 11126; GFX940-NEXT: ; def s[8:9] 11127; GFX940-NEXT: ;;#ASMEND 11128; GFX940-NEXT: ;;#ASMSTART 11129; GFX940-NEXT: ; def s[0:1] 11130; GFX940-NEXT: ;;#ASMEND 11131; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s9 11132; GFX940-NEXT: ;;#ASMSTART 11133; GFX940-NEXT: ; use s[8:9] 11134; GFX940-NEXT: ;;#ASMEND 11135; GFX940-NEXT: s_setpc_b64 s[30:31] 11136 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11137 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11138 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 2> 11139 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11140 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11141 ret void 11142} 11143 11144define void @s_shuffle_v3i16_v4i16__7_4_2() { 11145; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_2: 11146; GFX900: ; %bb.0: 11147; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11148; GFX900-NEXT: ;;#ASMSTART 11149; GFX900-NEXT: ; def s[4:5] 11150; GFX900-NEXT: ;;#ASMEND 11151; GFX900-NEXT: ;;#ASMSTART 11152; GFX900-NEXT: ; def s[8:9] 11153; GFX900-NEXT: ;;#ASMEND 11154; GFX900-NEXT: s_lshr_b32 s5, s5, 16 11155; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11156; GFX900-NEXT: ;;#ASMSTART 11157; GFX900-NEXT: ; use s[8:9] 11158; GFX900-NEXT: ;;#ASMEND 11159; GFX900-NEXT: s_setpc_b64 s[30:31] 11160; 11161; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_2: 11162; GFX90A: ; %bb.0: 11163; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11164; GFX90A-NEXT: ;;#ASMSTART 11165; GFX90A-NEXT: ; def s[4:5] 11166; GFX90A-NEXT: ;;#ASMEND 11167; GFX90A-NEXT: ;;#ASMSTART 11168; GFX90A-NEXT: ; def s[8:9] 11169; GFX90A-NEXT: ;;#ASMEND 11170; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 11171; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11172; GFX90A-NEXT: ;;#ASMSTART 11173; GFX90A-NEXT: ; use s[8:9] 11174; GFX90A-NEXT: ;;#ASMEND 11175; GFX90A-NEXT: s_setpc_b64 s[30:31] 11176; 11177; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_2: 11178; GFX940: ; %bb.0: 11179; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11180; GFX940-NEXT: ;;#ASMSTART 11181; GFX940-NEXT: ; def s[0:1] 11182; GFX940-NEXT: ;;#ASMEND 11183; GFX940-NEXT: ;;#ASMSTART 11184; GFX940-NEXT: ; def s[8:9] 11185; GFX940-NEXT: ;;#ASMEND 11186; GFX940-NEXT: s_lshr_b32 s1, s1, 16 11187; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 11188; GFX940-NEXT: ;;#ASMSTART 11189; GFX940-NEXT: ; use s[8:9] 11190; GFX940-NEXT: ;;#ASMEND 11191; GFX940-NEXT: s_setpc_b64 s[30:31] 11192 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11193 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11194 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 2> 11195 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11196 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11197 ret void 11198} 11199 11200define void @s_shuffle_v3i16_v4i16__7_5_2() { 11201; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_2: 11202; GFX900: ; %bb.0: 11203; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11204; GFX900-NEXT: ;;#ASMSTART 11205; GFX900-NEXT: ; def s[8:9] 11206; GFX900-NEXT: ;;#ASMEND 11207; GFX900-NEXT: ;;#ASMSTART 11208; GFX900-NEXT: ; def s[4:5] 11209; GFX900-NEXT: ;;#ASMEND 11210; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 11211; GFX900-NEXT: ;;#ASMSTART 11212; GFX900-NEXT: ; use s[8:9] 11213; GFX900-NEXT: ;;#ASMEND 11214; GFX900-NEXT: s_setpc_b64 s[30:31] 11215; 11216; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_2: 11217; GFX90A: ; %bb.0: 11218; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11219; GFX90A-NEXT: ;;#ASMSTART 11220; GFX90A-NEXT: ; def s[8:9] 11221; GFX90A-NEXT: ;;#ASMEND 11222; GFX90A-NEXT: ;;#ASMSTART 11223; GFX90A-NEXT: ; def s[4:5] 11224; GFX90A-NEXT: ;;#ASMEND 11225; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 11226; GFX90A-NEXT: ;;#ASMSTART 11227; GFX90A-NEXT: ; use s[8:9] 11228; GFX90A-NEXT: ;;#ASMEND 11229; GFX90A-NEXT: s_setpc_b64 s[30:31] 11230; 11231; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_2: 11232; GFX940: ; %bb.0: 11233; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11234; GFX940-NEXT: ;;#ASMSTART 11235; GFX940-NEXT: ; def s[8:9] 11236; GFX940-NEXT: ;;#ASMEND 11237; GFX940-NEXT: ;;#ASMSTART 11238; GFX940-NEXT: ; def s[0:1] 11239; GFX940-NEXT: ;;#ASMEND 11240; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 11241; GFX940-NEXT: ;;#ASMSTART 11242; GFX940-NEXT: ; use s[8:9] 11243; GFX940-NEXT: ;;#ASMEND 11244; GFX940-NEXT: s_setpc_b64 s[30:31] 11245 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11246 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11247 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 2> 11248 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11249 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11250 ret void 11251} 11252 11253define void @s_shuffle_v3i16_v4i16__7_6_2() { 11254; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_2: 11255; GFX900: ; %bb.0: 11256; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11257; GFX900-NEXT: ;;#ASMSTART 11258; GFX900-NEXT: ; def s[4:5] 11259; GFX900-NEXT: ;;#ASMEND 11260; GFX900-NEXT: ;;#ASMSTART 11261; GFX900-NEXT: ; def s[8:9] 11262; GFX900-NEXT: ;;#ASMEND 11263; GFX900-NEXT: s_lshr_b32 s4, s5, 16 11264; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 11265; GFX900-NEXT: ;;#ASMSTART 11266; GFX900-NEXT: ; use s[8:9] 11267; GFX900-NEXT: ;;#ASMEND 11268; GFX900-NEXT: s_setpc_b64 s[30:31] 11269; 11270; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_2: 11271; GFX90A: ; %bb.0: 11272; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11273; GFX90A-NEXT: ;;#ASMSTART 11274; GFX90A-NEXT: ; def s[4:5] 11275; GFX90A-NEXT: ;;#ASMEND 11276; GFX90A-NEXT: ;;#ASMSTART 11277; GFX90A-NEXT: ; def s[8:9] 11278; GFX90A-NEXT: ;;#ASMEND 11279; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 11280; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 11281; GFX90A-NEXT: ;;#ASMSTART 11282; GFX90A-NEXT: ; use s[8:9] 11283; GFX90A-NEXT: ;;#ASMEND 11284; GFX90A-NEXT: s_setpc_b64 s[30:31] 11285; 11286; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_2: 11287; GFX940: ; %bb.0: 11288; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11289; GFX940-NEXT: ;;#ASMSTART 11290; GFX940-NEXT: ; def s[0:1] 11291; GFX940-NEXT: ;;#ASMEND 11292; GFX940-NEXT: ;;#ASMSTART 11293; GFX940-NEXT: ; def s[8:9] 11294; GFX940-NEXT: ;;#ASMEND 11295; GFX940-NEXT: s_lshr_b32 s0, s1, 16 11296; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 11297; GFX940-NEXT: ;;#ASMSTART 11298; GFX940-NEXT: ; use s[8:9] 11299; GFX940-NEXT: ;;#ASMEND 11300; GFX940-NEXT: s_setpc_b64 s[30:31] 11301 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11302 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11303 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 2> 11304 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11305 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11306 ret void 11307} 11308 11309define void @s_shuffle_v3i16_v4i16__u_3_3() { 11310; GFX900-LABEL: s_shuffle_v3i16_v4i16__u_3_3: 11311; GFX900: ; %bb.0: 11312; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11313; GFX900-NEXT: ;;#ASMSTART 11314; GFX900-NEXT: ; def s[4:5] 11315; GFX900-NEXT: ;;#ASMEND 11316; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11317; GFX900-NEXT: s_mov_b32 s8, s5 11318; GFX900-NEXT: ;;#ASMSTART 11319; GFX900-NEXT: ; use s[8:9] 11320; GFX900-NEXT: ;;#ASMEND 11321; GFX900-NEXT: s_setpc_b64 s[30:31] 11322; 11323; GFX90A-LABEL: s_shuffle_v3i16_v4i16__u_3_3: 11324; GFX90A: ; %bb.0: 11325; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11326; GFX90A-NEXT: ;;#ASMSTART 11327; GFX90A-NEXT: ; def s[4:5] 11328; GFX90A-NEXT: ;;#ASMEND 11329; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11330; GFX90A-NEXT: s_mov_b32 s8, s5 11331; GFX90A-NEXT: ;;#ASMSTART 11332; GFX90A-NEXT: ; use s[8:9] 11333; GFX90A-NEXT: ;;#ASMEND 11334; GFX90A-NEXT: s_setpc_b64 s[30:31] 11335; 11336; GFX940-LABEL: s_shuffle_v3i16_v4i16__u_3_3: 11337; GFX940: ; %bb.0: 11338; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11339; GFX940-NEXT: ;;#ASMSTART 11340; GFX940-NEXT: ; def s[0:1] 11341; GFX940-NEXT: ;;#ASMEND 11342; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11343; GFX940-NEXT: s_mov_b32 s8, s1 11344; GFX940-NEXT: ;;#ASMSTART 11345; GFX940-NEXT: ; use s[8:9] 11346; GFX940-NEXT: ;;#ASMEND 11347; GFX940-NEXT: s_setpc_b64 s[30:31] 11348 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11349 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 3, i32 3> 11350 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11351 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11352 ret void 11353} 11354 11355define void @s_shuffle_v3i16_v4i16__0_3_3() { 11356; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_3_3: 11357; GFX900: ; %bb.0: 11358; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11359; GFX900-NEXT: ;;#ASMSTART 11360; GFX900-NEXT: ; def s[4:5] 11361; GFX900-NEXT: ;;#ASMEND 11362; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s5 11363; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11364; GFX900-NEXT: ;;#ASMSTART 11365; GFX900-NEXT: ; use s[8:9] 11366; GFX900-NEXT: ;;#ASMEND 11367; GFX900-NEXT: s_setpc_b64 s[30:31] 11368; 11369; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_3_3: 11370; GFX90A: ; %bb.0: 11371; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11372; GFX90A-NEXT: ;;#ASMSTART 11373; GFX90A-NEXT: ; def s[4:5] 11374; GFX90A-NEXT: ;;#ASMEND 11375; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s5 11376; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11377; GFX90A-NEXT: ;;#ASMSTART 11378; GFX90A-NEXT: ; use s[8:9] 11379; GFX90A-NEXT: ;;#ASMEND 11380; GFX90A-NEXT: s_setpc_b64 s[30:31] 11381; 11382; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_3_3: 11383; GFX940: ; %bb.0: 11384; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11385; GFX940-NEXT: ;;#ASMSTART 11386; GFX940-NEXT: ; def s[0:1] 11387; GFX940-NEXT: ;;#ASMEND 11388; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 11389; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11390; GFX940-NEXT: ;;#ASMSTART 11391; GFX940-NEXT: ; use s[8:9] 11392; GFX940-NEXT: ;;#ASMEND 11393; GFX940-NEXT: s_setpc_b64 s[30:31] 11394 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11395 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 3, i32 3> 11396 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11397 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11398 ret void 11399} 11400 11401define void @s_shuffle_v3i16_v4i16__1_3_3() { 11402; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_3_3: 11403; GFX900: ; %bb.0: 11404; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11405; GFX900-NEXT: ;;#ASMSTART 11406; GFX900-NEXT: ; def s[4:5] 11407; GFX900-NEXT: ;;#ASMEND 11408; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s5 11409; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11410; GFX900-NEXT: ;;#ASMSTART 11411; GFX900-NEXT: ; use s[8:9] 11412; GFX900-NEXT: ;;#ASMEND 11413; GFX900-NEXT: s_setpc_b64 s[30:31] 11414; 11415; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_3_3: 11416; GFX90A: ; %bb.0: 11417; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11418; GFX90A-NEXT: ;;#ASMSTART 11419; GFX90A-NEXT: ; def s[4:5] 11420; GFX90A-NEXT: ;;#ASMEND 11421; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s5 11422; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11423; GFX90A-NEXT: ;;#ASMSTART 11424; GFX90A-NEXT: ; use s[8:9] 11425; GFX90A-NEXT: ;;#ASMEND 11426; GFX90A-NEXT: s_setpc_b64 s[30:31] 11427; 11428; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_3_3: 11429; GFX940: ; %bb.0: 11430; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11431; GFX940-NEXT: ;;#ASMSTART 11432; GFX940-NEXT: ; def s[0:1] 11433; GFX940-NEXT: ;;#ASMEND 11434; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 11435; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11436; GFX940-NEXT: ;;#ASMSTART 11437; GFX940-NEXT: ; use s[8:9] 11438; GFX940-NEXT: ;;#ASMEND 11439; GFX940-NEXT: s_setpc_b64 s[30:31] 11440 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11441 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 3, i32 3> 11442 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11443 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11444 ret void 11445} 11446 11447define void @s_shuffle_v3i16_v4i16__2_3_3() { 11448; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_3_3: 11449; GFX900: ; %bb.0: 11450; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11451; GFX900-NEXT: ;;#ASMSTART 11452; GFX900-NEXT: ; def s[4:5] 11453; GFX900-NEXT: ;;#ASMEND 11454; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11455; GFX900-NEXT: s_mov_b32 s8, s5 11456; GFX900-NEXT: ;;#ASMSTART 11457; GFX900-NEXT: ; use s[8:9] 11458; GFX900-NEXT: ;;#ASMEND 11459; GFX900-NEXT: s_setpc_b64 s[30:31] 11460; 11461; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_3_3: 11462; GFX90A: ; %bb.0: 11463; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11464; GFX90A-NEXT: ;;#ASMSTART 11465; GFX90A-NEXT: ; def s[4:5] 11466; GFX90A-NEXT: ;;#ASMEND 11467; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11468; GFX90A-NEXT: s_mov_b32 s8, s5 11469; GFX90A-NEXT: ;;#ASMSTART 11470; GFX90A-NEXT: ; use s[8:9] 11471; GFX90A-NEXT: ;;#ASMEND 11472; GFX90A-NEXT: s_setpc_b64 s[30:31] 11473; 11474; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_3_3: 11475; GFX940: ; %bb.0: 11476; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11477; GFX940-NEXT: ;;#ASMSTART 11478; GFX940-NEXT: ; def s[0:1] 11479; GFX940-NEXT: ;;#ASMEND 11480; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11481; GFX940-NEXT: s_mov_b32 s8, s1 11482; GFX940-NEXT: ;;#ASMSTART 11483; GFX940-NEXT: ; use s[8:9] 11484; GFX940-NEXT: ;;#ASMEND 11485; GFX940-NEXT: s_setpc_b64 s[30:31] 11486 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11487 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 3, i32 3> 11488 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11489 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11490 ret void 11491} 11492 11493define void @s_shuffle_v3i16_v4i16__3_3_3() { 11494; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_3_3: 11495; GFX900: ; %bb.0: 11496; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11497; GFX900-NEXT: ;;#ASMSTART 11498; GFX900-NEXT: ; def s[4:5] 11499; GFX900-NEXT: ;;#ASMEND 11500; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11501; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 11502; GFX900-NEXT: ;;#ASMSTART 11503; GFX900-NEXT: ; use s[8:9] 11504; GFX900-NEXT: ;;#ASMEND 11505; GFX900-NEXT: s_setpc_b64 s[30:31] 11506; 11507; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_3_3: 11508; GFX90A: ; %bb.0: 11509; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11510; GFX90A-NEXT: ;;#ASMSTART 11511; GFX90A-NEXT: ; def s[4:5] 11512; GFX90A-NEXT: ;;#ASMEND 11513; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11514; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 11515; GFX90A-NEXT: ;;#ASMSTART 11516; GFX90A-NEXT: ; use s[8:9] 11517; GFX90A-NEXT: ;;#ASMEND 11518; GFX90A-NEXT: s_setpc_b64 s[30:31] 11519; 11520; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_3_3: 11521; GFX940: ; %bb.0: 11522; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11523; GFX940-NEXT: ;;#ASMSTART 11524; GFX940-NEXT: ; def s[0:1] 11525; GFX940-NEXT: ;;#ASMEND 11526; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11527; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 11528; GFX940-NEXT: ;;#ASMSTART 11529; GFX940-NEXT: ; use s[8:9] 11530; GFX940-NEXT: ;;#ASMEND 11531; GFX940-NEXT: s_setpc_b64 s[30:31] 11532 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11533 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 3, i32 3> 11534 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11535 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11536 ret void 11537} 11538 11539define void @s_shuffle_v3i16_v4i16__4_3_3() { 11540; GFX900-LABEL: s_shuffle_v3i16_v4i16__4_3_3: 11541; GFX900: ; %bb.0: 11542; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11543; GFX900-NEXT: ;;#ASMSTART 11544; GFX900-NEXT: ; def s[4:5] 11545; GFX900-NEXT: ;;#ASMEND 11546; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11547; GFX900-NEXT: s_mov_b32 s8, s5 11548; GFX900-NEXT: ;;#ASMSTART 11549; GFX900-NEXT: ; use s[8:9] 11550; GFX900-NEXT: ;;#ASMEND 11551; GFX900-NEXT: s_setpc_b64 s[30:31] 11552; 11553; GFX90A-LABEL: s_shuffle_v3i16_v4i16__4_3_3: 11554; GFX90A: ; %bb.0: 11555; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11556; GFX90A-NEXT: ;;#ASMSTART 11557; GFX90A-NEXT: ; def s[4:5] 11558; GFX90A-NEXT: ;;#ASMEND 11559; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11560; GFX90A-NEXT: s_mov_b32 s8, s5 11561; GFX90A-NEXT: ;;#ASMSTART 11562; GFX90A-NEXT: ; use s[8:9] 11563; GFX90A-NEXT: ;;#ASMEND 11564; GFX90A-NEXT: s_setpc_b64 s[30:31] 11565; 11566; GFX940-LABEL: s_shuffle_v3i16_v4i16__4_3_3: 11567; GFX940: ; %bb.0: 11568; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11569; GFX940-NEXT: ;;#ASMSTART 11570; GFX940-NEXT: ; def s[0:1] 11571; GFX940-NEXT: ;;#ASMEND 11572; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11573; GFX940-NEXT: s_mov_b32 s8, s1 11574; GFX940-NEXT: ;;#ASMSTART 11575; GFX940-NEXT: ; use s[8:9] 11576; GFX940-NEXT: ;;#ASMEND 11577; GFX940-NEXT: s_setpc_b64 s[30:31] 11578 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11579 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 3, i32 3> 11580 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11581 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11582 ret void 11583} 11584 11585define void @s_shuffle_v3i16_v4i16__5_3_3() { 11586; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_3_3: 11587; GFX900: ; %bb.0: 11588; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11589; GFX900-NEXT: ;;#ASMSTART 11590; GFX900-NEXT: ; def s[4:5] 11591; GFX900-NEXT: ;;#ASMEND 11592; GFX900-NEXT: ;;#ASMSTART 11593; GFX900-NEXT: ; def s[6:7] 11594; GFX900-NEXT: ;;#ASMEND 11595; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s5 11596; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11597; GFX900-NEXT: ;;#ASMSTART 11598; GFX900-NEXT: ; use s[8:9] 11599; GFX900-NEXT: ;;#ASMEND 11600; GFX900-NEXT: s_setpc_b64 s[30:31] 11601; 11602; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_3_3: 11603; GFX90A: ; %bb.0: 11604; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11605; GFX90A-NEXT: ;;#ASMSTART 11606; GFX90A-NEXT: ; def s[4:5] 11607; GFX90A-NEXT: ;;#ASMEND 11608; GFX90A-NEXT: ;;#ASMSTART 11609; GFX90A-NEXT: ; def s[6:7] 11610; GFX90A-NEXT: ;;#ASMEND 11611; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s5 11612; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11613; GFX90A-NEXT: ;;#ASMSTART 11614; GFX90A-NEXT: ; use s[8:9] 11615; GFX90A-NEXT: ;;#ASMEND 11616; GFX90A-NEXT: s_setpc_b64 s[30:31] 11617; 11618; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_3_3: 11619; GFX940: ; %bb.0: 11620; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11621; GFX940-NEXT: ;;#ASMSTART 11622; GFX940-NEXT: ; def s[0:1] 11623; GFX940-NEXT: ;;#ASMEND 11624; GFX940-NEXT: ;;#ASMSTART 11625; GFX940-NEXT: ; def s[2:3] 11626; GFX940-NEXT: ;;#ASMEND 11627; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s1 11628; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11629; GFX940-NEXT: ;;#ASMSTART 11630; GFX940-NEXT: ; use s[8:9] 11631; GFX940-NEXT: ;;#ASMEND 11632; GFX940-NEXT: s_setpc_b64 s[30:31] 11633 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11634 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11635 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 3, i32 3> 11636 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11637 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11638 ret void 11639} 11640 11641define void @s_shuffle_v3i16_v4i16__6_3_3() { 11642; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_3_3: 11643; GFX900: ; %bb.0: 11644; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11645; GFX900-NEXT: ;;#ASMSTART 11646; GFX900-NEXT: ; def s[4:5] 11647; GFX900-NEXT: ;;#ASMEND 11648; GFX900-NEXT: ;;#ASMSTART 11649; GFX900-NEXT: ; def s[6:7] 11650; GFX900-NEXT: ;;#ASMEND 11651; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s5 11652; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11653; GFX900-NEXT: ;;#ASMSTART 11654; GFX900-NEXT: ; use s[8:9] 11655; GFX900-NEXT: ;;#ASMEND 11656; GFX900-NEXT: s_setpc_b64 s[30:31] 11657; 11658; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_3_3: 11659; GFX90A: ; %bb.0: 11660; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11661; GFX90A-NEXT: ;;#ASMSTART 11662; GFX90A-NEXT: ; def s[4:5] 11663; GFX90A-NEXT: ;;#ASMEND 11664; GFX90A-NEXT: ;;#ASMSTART 11665; GFX90A-NEXT: ; def s[6:7] 11666; GFX90A-NEXT: ;;#ASMEND 11667; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s5 11668; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11669; GFX90A-NEXT: ;;#ASMSTART 11670; GFX90A-NEXT: ; use s[8:9] 11671; GFX90A-NEXT: ;;#ASMEND 11672; GFX90A-NEXT: s_setpc_b64 s[30:31] 11673; 11674; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_3_3: 11675; GFX940: ; %bb.0: 11676; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11677; GFX940-NEXT: ;;#ASMSTART 11678; GFX940-NEXT: ; def s[0:1] 11679; GFX940-NEXT: ;;#ASMEND 11680; GFX940-NEXT: ;;#ASMSTART 11681; GFX940-NEXT: ; def s[2:3] 11682; GFX940-NEXT: ;;#ASMEND 11683; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s1 11684; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11685; GFX940-NEXT: ;;#ASMSTART 11686; GFX940-NEXT: ; use s[8:9] 11687; GFX940-NEXT: ;;#ASMEND 11688; GFX940-NEXT: s_setpc_b64 s[30:31] 11689 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11690 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11691 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 3, i32 3> 11692 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11693 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11694 ret void 11695} 11696 11697define void @s_shuffle_v3i16_v4i16__7_3_3() { 11698; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_3: 11699; GFX900: ; %bb.0: 11700; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11701; GFX900-NEXT: ;;#ASMSTART 11702; GFX900-NEXT: ; def s[4:5] 11703; GFX900-NEXT: ;;#ASMEND 11704; GFX900-NEXT: ;;#ASMSTART 11705; GFX900-NEXT: ; def s[6:7] 11706; GFX900-NEXT: ;;#ASMEND 11707; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 11708; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11709; GFX900-NEXT: ;;#ASMSTART 11710; GFX900-NEXT: ; use s[8:9] 11711; GFX900-NEXT: ;;#ASMEND 11712; GFX900-NEXT: s_setpc_b64 s[30:31] 11713; 11714; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_3: 11715; GFX90A: ; %bb.0: 11716; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11717; GFX90A-NEXT: ;;#ASMSTART 11718; GFX90A-NEXT: ; def s[4:5] 11719; GFX90A-NEXT: ;;#ASMEND 11720; GFX90A-NEXT: ;;#ASMSTART 11721; GFX90A-NEXT: ; def s[6:7] 11722; GFX90A-NEXT: ;;#ASMEND 11723; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 11724; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11725; GFX90A-NEXT: ;;#ASMSTART 11726; GFX90A-NEXT: ; use s[8:9] 11727; GFX90A-NEXT: ;;#ASMEND 11728; GFX90A-NEXT: s_setpc_b64 s[30:31] 11729; 11730; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_3: 11731; GFX940: ; %bb.0: 11732; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11733; GFX940-NEXT: ;;#ASMSTART 11734; GFX940-NEXT: ; def s[0:1] 11735; GFX940-NEXT: ;;#ASMEND 11736; GFX940-NEXT: ;;#ASMSTART 11737; GFX940-NEXT: ; def s[2:3] 11738; GFX940-NEXT: ;;#ASMEND 11739; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 11740; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11741; GFX940-NEXT: ;;#ASMSTART 11742; GFX940-NEXT: ; use s[8:9] 11743; GFX940-NEXT: ;;#ASMEND 11744; GFX940-NEXT: s_setpc_b64 s[30:31] 11745 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11746 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11747 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 3> 11748 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11749 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11750 ret void 11751} 11752 11753define void @s_shuffle_v3i16_v4i16__7_u_3() { 11754; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_3: 11755; GFX900: ; %bb.0: 11756; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11757; GFX900-NEXT: ;;#ASMSTART 11758; GFX900-NEXT: ; def s[4:5] 11759; GFX900-NEXT: ;;#ASMEND 11760; GFX900-NEXT: ;;#ASMSTART 11761; GFX900-NEXT: ; def s[6:7] 11762; GFX900-NEXT: ;;#ASMEND 11763; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11764; GFX900-NEXT: s_lshr_b32 s8, s7, 16 11765; GFX900-NEXT: ;;#ASMSTART 11766; GFX900-NEXT: ; use s[8:9] 11767; GFX900-NEXT: ;;#ASMEND 11768; GFX900-NEXT: s_setpc_b64 s[30:31] 11769; 11770; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_3: 11771; GFX90A: ; %bb.0: 11772; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11773; GFX90A-NEXT: ;;#ASMSTART 11774; GFX90A-NEXT: ; def s[4:5] 11775; GFX90A-NEXT: ;;#ASMEND 11776; GFX90A-NEXT: ;;#ASMSTART 11777; GFX90A-NEXT: ; def s[6:7] 11778; GFX90A-NEXT: ;;#ASMEND 11779; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11780; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 11781; GFX90A-NEXT: ;;#ASMSTART 11782; GFX90A-NEXT: ; use s[8:9] 11783; GFX90A-NEXT: ;;#ASMEND 11784; GFX90A-NEXT: s_setpc_b64 s[30:31] 11785; 11786; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_3: 11787; GFX940: ; %bb.0: 11788; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11789; GFX940-NEXT: ;;#ASMSTART 11790; GFX940-NEXT: ; def s[0:1] 11791; GFX940-NEXT: ;;#ASMEND 11792; GFX940-NEXT: ;;#ASMSTART 11793; GFX940-NEXT: ; def s[2:3] 11794; GFX940-NEXT: ;;#ASMEND 11795; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11796; GFX940-NEXT: s_lshr_b32 s8, s3, 16 11797; GFX940-NEXT: ;;#ASMSTART 11798; GFX940-NEXT: ; use s[8:9] 11799; GFX940-NEXT: ;;#ASMEND 11800; GFX940-NEXT: s_setpc_b64 s[30:31] 11801 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11802 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11803 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 3> 11804 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11805 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11806 ret void 11807} 11808 11809define void @s_shuffle_v3i16_v4i16__7_0_3() { 11810; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_3: 11811; GFX900: ; %bb.0: 11812; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11813; GFX900-NEXT: ;;#ASMSTART 11814; GFX900-NEXT: ; def s[6:7] 11815; GFX900-NEXT: ;;#ASMEND 11816; GFX900-NEXT: s_lshr_b32 s6, s7, 16 11817; GFX900-NEXT: ;;#ASMSTART 11818; GFX900-NEXT: ; def s[4:5] 11819; GFX900-NEXT: ;;#ASMEND 11820; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4 11821; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11822; GFX900-NEXT: ;;#ASMSTART 11823; GFX900-NEXT: ; use s[8:9] 11824; GFX900-NEXT: ;;#ASMEND 11825; GFX900-NEXT: s_setpc_b64 s[30:31] 11826; 11827; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_3: 11828; GFX90A: ; %bb.0: 11829; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11830; GFX90A-NEXT: ;;#ASMSTART 11831; GFX90A-NEXT: ; def s[6:7] 11832; GFX90A-NEXT: ;;#ASMEND 11833; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 11834; GFX90A-NEXT: ;;#ASMSTART 11835; GFX90A-NEXT: ; def s[4:5] 11836; GFX90A-NEXT: ;;#ASMEND 11837; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4 11838; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11839; GFX90A-NEXT: ;;#ASMSTART 11840; GFX90A-NEXT: ; use s[8:9] 11841; GFX90A-NEXT: ;;#ASMEND 11842; GFX90A-NEXT: s_setpc_b64 s[30:31] 11843; 11844; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_3: 11845; GFX940: ; %bb.0: 11846; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11847; GFX940-NEXT: ;;#ASMSTART 11848; GFX940-NEXT: ; def s[2:3] 11849; GFX940-NEXT: ;;#ASMEND 11850; GFX940-NEXT: s_lshr_b32 s2, s3, 16 11851; GFX940-NEXT: ;;#ASMSTART 11852; GFX940-NEXT: ; def s[0:1] 11853; GFX940-NEXT: ;;#ASMEND 11854; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 11855; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11856; GFX940-NEXT: ;;#ASMSTART 11857; GFX940-NEXT: ; use s[8:9] 11858; GFX940-NEXT: ;;#ASMEND 11859; GFX940-NEXT: s_setpc_b64 s[30:31] 11860 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11861 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11862 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 3> 11863 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11864 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11865 ret void 11866} 11867 11868define void @s_shuffle_v3i16_v4i16__7_1_3() { 11869; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_3: 11870; GFX900: ; %bb.0: 11871; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11872; GFX900-NEXT: ;;#ASMSTART 11873; GFX900-NEXT: ; def s[4:5] 11874; GFX900-NEXT: ;;#ASMEND 11875; GFX900-NEXT: ;;#ASMSTART 11876; GFX900-NEXT: ; def s[6:7] 11877; GFX900-NEXT: ;;#ASMEND 11878; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 11879; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11880; GFX900-NEXT: ;;#ASMSTART 11881; GFX900-NEXT: ; use s[8:9] 11882; GFX900-NEXT: ;;#ASMEND 11883; GFX900-NEXT: s_setpc_b64 s[30:31] 11884; 11885; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_3: 11886; GFX90A: ; %bb.0: 11887; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11888; GFX90A-NEXT: ;;#ASMSTART 11889; GFX90A-NEXT: ; def s[4:5] 11890; GFX90A-NEXT: ;;#ASMEND 11891; GFX90A-NEXT: ;;#ASMSTART 11892; GFX90A-NEXT: ; def s[6:7] 11893; GFX90A-NEXT: ;;#ASMEND 11894; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 11895; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11896; GFX90A-NEXT: ;;#ASMSTART 11897; GFX90A-NEXT: ; use s[8:9] 11898; GFX90A-NEXT: ;;#ASMEND 11899; GFX90A-NEXT: s_setpc_b64 s[30:31] 11900; 11901; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_3: 11902; GFX940: ; %bb.0: 11903; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11904; GFX940-NEXT: ;;#ASMSTART 11905; GFX940-NEXT: ; def s[0:1] 11906; GFX940-NEXT: ;;#ASMEND 11907; GFX940-NEXT: ;;#ASMSTART 11908; GFX940-NEXT: ; def s[2:3] 11909; GFX940-NEXT: ;;#ASMEND 11910; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 11911; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11912; GFX940-NEXT: ;;#ASMSTART 11913; GFX940-NEXT: ; use s[8:9] 11914; GFX940-NEXT: ;;#ASMEND 11915; GFX940-NEXT: s_setpc_b64 s[30:31] 11916 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11917 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11918 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 3> 11919 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11920 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11921 ret void 11922} 11923 11924define void @s_shuffle_v3i16_v4i16__7_2_3() { 11925; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_3: 11926; GFX900: ; %bb.0: 11927; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11928; GFX900-NEXT: ;;#ASMSTART 11929; GFX900-NEXT: ; def s[4:5] 11930; GFX900-NEXT: ;;#ASMEND 11931; GFX900-NEXT: ;;#ASMSTART 11932; GFX900-NEXT: ; def s[6:7] 11933; GFX900-NEXT: ;;#ASMEND 11934; GFX900-NEXT: s_lshr_b32 s4, s7, 16 11935; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 11936; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11937; GFX900-NEXT: ;;#ASMSTART 11938; GFX900-NEXT: ; use s[8:9] 11939; GFX900-NEXT: ;;#ASMEND 11940; GFX900-NEXT: s_setpc_b64 s[30:31] 11941; 11942; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_3: 11943; GFX90A: ; %bb.0: 11944; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11945; GFX90A-NEXT: ;;#ASMSTART 11946; GFX90A-NEXT: ; def s[4:5] 11947; GFX90A-NEXT: ;;#ASMEND 11948; GFX90A-NEXT: ;;#ASMSTART 11949; GFX90A-NEXT: ; def s[6:7] 11950; GFX90A-NEXT: ;;#ASMEND 11951; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 11952; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 11953; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11954; GFX90A-NEXT: ;;#ASMSTART 11955; GFX90A-NEXT: ; use s[8:9] 11956; GFX90A-NEXT: ;;#ASMEND 11957; GFX90A-NEXT: s_setpc_b64 s[30:31] 11958; 11959; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_3: 11960; GFX940: ; %bb.0: 11961; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11962; GFX940-NEXT: ;;#ASMSTART 11963; GFX940-NEXT: ; def s[0:1] 11964; GFX940-NEXT: ;;#ASMEND 11965; GFX940-NEXT: ;;#ASMSTART 11966; GFX940-NEXT: ; def s[2:3] 11967; GFX940-NEXT: ;;#ASMEND 11968; GFX940-NEXT: s_lshr_b32 s0, s3, 16 11969; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 11970; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11971; GFX940-NEXT: ;;#ASMSTART 11972; GFX940-NEXT: ; use s[8:9] 11973; GFX940-NEXT: ;;#ASMEND 11974; GFX940-NEXT: s_setpc_b64 s[30:31] 11975 %vec0 = call <4 x i16> asm "; def $0", "=s"() 11976 %vec1 = call <4 x i16> asm "; def $0", "=s"() 11977 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 3> 11978 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11979 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 11980 ret void 11981} 11982 11983define void @s_shuffle_v3i16_v4i16__7_4_3() { 11984; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_3: 11985; GFX900: ; %bb.0: 11986; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11987; GFX900-NEXT: ;;#ASMSTART 11988; GFX900-NEXT: ; def s[4:5] 11989; GFX900-NEXT: ;;#ASMEND 11990; GFX900-NEXT: ;;#ASMSTART 11991; GFX900-NEXT: ; def s[6:7] 11992; GFX900-NEXT: ;;#ASMEND 11993; GFX900-NEXT: s_lshr_b32 s4, s7, 16 11994; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 11995; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11996; GFX900-NEXT: ;;#ASMSTART 11997; GFX900-NEXT: ; use s[8:9] 11998; GFX900-NEXT: ;;#ASMEND 11999; GFX900-NEXT: s_setpc_b64 s[30:31] 12000; 12001; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_3: 12002; GFX90A: ; %bb.0: 12003; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12004; GFX90A-NEXT: ;;#ASMSTART 12005; GFX90A-NEXT: ; def s[4:5] 12006; GFX90A-NEXT: ;;#ASMEND 12007; GFX90A-NEXT: ;;#ASMSTART 12008; GFX90A-NEXT: ; def s[6:7] 12009; GFX90A-NEXT: ;;#ASMEND 12010; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 12011; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 12012; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 12013; GFX90A-NEXT: ;;#ASMSTART 12014; GFX90A-NEXT: ; use s[8:9] 12015; GFX90A-NEXT: ;;#ASMEND 12016; GFX90A-NEXT: s_setpc_b64 s[30:31] 12017; 12018; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_3: 12019; GFX940: ; %bb.0: 12020; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12021; GFX940-NEXT: ;;#ASMSTART 12022; GFX940-NEXT: ; def s[0:1] 12023; GFX940-NEXT: ;;#ASMEND 12024; GFX940-NEXT: ;;#ASMSTART 12025; GFX940-NEXT: ; def s[2:3] 12026; GFX940-NEXT: ;;#ASMEND 12027; GFX940-NEXT: s_lshr_b32 s0, s3, 16 12028; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 12029; GFX940-NEXT: s_lshr_b32 s9, s1, 16 12030; GFX940-NEXT: ;;#ASMSTART 12031; GFX940-NEXT: ; use s[8:9] 12032; GFX940-NEXT: ;;#ASMEND 12033; GFX940-NEXT: s_setpc_b64 s[30:31] 12034 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12035 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12036 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 3> 12037 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12038 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12039 ret void 12040} 12041 12042define void @s_shuffle_v3i16_v4i16__7_5_3() { 12043; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_3: 12044; GFX900: ; %bb.0: 12045; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12046; GFX900-NEXT: ;;#ASMSTART 12047; GFX900-NEXT: ; def s[4:5] 12048; GFX900-NEXT: ;;#ASMEND 12049; GFX900-NEXT: ;;#ASMSTART 12050; GFX900-NEXT: ; def s[6:7] 12051; GFX900-NEXT: ;;#ASMEND 12052; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s6 12053; GFX900-NEXT: s_lshr_b32 s9, s5, 16 12054; GFX900-NEXT: ;;#ASMSTART 12055; GFX900-NEXT: ; use s[8:9] 12056; GFX900-NEXT: ;;#ASMEND 12057; GFX900-NEXT: s_setpc_b64 s[30:31] 12058; 12059; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_3: 12060; GFX90A: ; %bb.0: 12061; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12062; GFX90A-NEXT: ;;#ASMSTART 12063; GFX90A-NEXT: ; def s[4:5] 12064; GFX90A-NEXT: ;;#ASMEND 12065; GFX90A-NEXT: ;;#ASMSTART 12066; GFX90A-NEXT: ; def s[6:7] 12067; GFX90A-NEXT: ;;#ASMEND 12068; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s6 12069; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 12070; GFX90A-NEXT: ;;#ASMSTART 12071; GFX90A-NEXT: ; use s[8:9] 12072; GFX90A-NEXT: ;;#ASMEND 12073; GFX90A-NEXT: s_setpc_b64 s[30:31] 12074; 12075; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_3: 12076; GFX940: ; %bb.0: 12077; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12078; GFX940-NEXT: ;;#ASMSTART 12079; GFX940-NEXT: ; def s[0:1] 12080; GFX940-NEXT: ;;#ASMEND 12081; GFX940-NEXT: ;;#ASMSTART 12082; GFX940-NEXT: ; def s[2:3] 12083; GFX940-NEXT: ;;#ASMEND 12084; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 12085; GFX940-NEXT: s_lshr_b32 s9, s1, 16 12086; GFX940-NEXT: ;;#ASMSTART 12087; GFX940-NEXT: ; use s[8:9] 12088; GFX940-NEXT: ;;#ASMEND 12089; GFX940-NEXT: s_setpc_b64 s[30:31] 12090 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12091 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12092 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 3> 12093 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12094 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12095 ret void 12096} 12097 12098define void @s_shuffle_v3i16_v4i16__7_6_3() { 12099; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_3: 12100; GFX900: ; %bb.0: 12101; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12102; GFX900-NEXT: ;;#ASMSTART 12103; GFX900-NEXT: ; def s[4:5] 12104; GFX900-NEXT: ;;#ASMEND 12105; GFX900-NEXT: ;;#ASMSTART 12106; GFX900-NEXT: ; def s[6:7] 12107; GFX900-NEXT: ;;#ASMEND 12108; GFX900-NEXT: s_lshr_b32 s4, s7, 16 12109; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 12110; GFX900-NEXT: s_lshr_b32 s9, s5, 16 12111; GFX900-NEXT: ;;#ASMSTART 12112; GFX900-NEXT: ; use s[8:9] 12113; GFX900-NEXT: ;;#ASMEND 12114; GFX900-NEXT: s_setpc_b64 s[30:31] 12115; 12116; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_3: 12117; GFX90A: ; %bb.0: 12118; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12119; GFX90A-NEXT: ;;#ASMSTART 12120; GFX90A-NEXT: ; def s[4:5] 12121; GFX90A-NEXT: ;;#ASMEND 12122; GFX90A-NEXT: ;;#ASMSTART 12123; GFX90A-NEXT: ; def s[6:7] 12124; GFX90A-NEXT: ;;#ASMEND 12125; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 12126; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 12127; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 12128; GFX90A-NEXT: ;;#ASMSTART 12129; GFX90A-NEXT: ; use s[8:9] 12130; GFX90A-NEXT: ;;#ASMEND 12131; GFX90A-NEXT: s_setpc_b64 s[30:31] 12132; 12133; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_3: 12134; GFX940: ; %bb.0: 12135; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12136; GFX940-NEXT: ;;#ASMSTART 12137; GFX940-NEXT: ; def s[0:1] 12138; GFX940-NEXT: ;;#ASMEND 12139; GFX940-NEXT: ;;#ASMSTART 12140; GFX940-NEXT: ; def s[2:3] 12141; GFX940-NEXT: ;;#ASMEND 12142; GFX940-NEXT: s_lshr_b32 s0, s3, 16 12143; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 12144; GFX940-NEXT: s_lshr_b32 s9, s1, 16 12145; GFX940-NEXT: ;;#ASMSTART 12146; GFX940-NEXT: ; use s[8:9] 12147; GFX940-NEXT: ;;#ASMEND 12148; GFX940-NEXT: s_setpc_b64 s[30:31] 12149 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12150 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12151 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 3> 12152 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12153 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12154 ret void 12155} 12156 12157define void @s_shuffle_v3i16_v4i16__u_4_4() { 12158; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_4_4: 12159; GFX9: ; %bb.0: 12160; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12161; GFX9-NEXT: ;;#ASMSTART 12162; GFX9-NEXT: ; use s[8:9] 12163; GFX9-NEXT: ;;#ASMEND 12164; GFX9-NEXT: s_setpc_b64 s[30:31] 12165 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12166 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 4, i32 4> 12167 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12168 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12169 ret void 12170} 12171 12172define void @s_shuffle_v3i16_v4i16__0_4_4() { 12173; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_4_4: 12174; GFX900: ; %bb.0: 12175; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12176; GFX900-NEXT: ;;#ASMSTART 12177; GFX900-NEXT: ; def s[8:9] 12178; GFX900-NEXT: ;;#ASMEND 12179; GFX900-NEXT: ;;#ASMSTART 12180; GFX900-NEXT: ; use s[8:9] 12181; GFX900-NEXT: ;;#ASMEND 12182; GFX900-NEXT: s_setpc_b64 s[30:31] 12183; 12184; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_4_4: 12185; GFX90A: ; %bb.0: 12186; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12187; GFX90A-NEXT: ;;#ASMSTART 12188; GFX90A-NEXT: ; def s[8:9] 12189; GFX90A-NEXT: ;;#ASMEND 12190; GFX90A-NEXT: ;;#ASMSTART 12191; GFX90A-NEXT: ; use s[8:9] 12192; GFX90A-NEXT: ;;#ASMEND 12193; GFX90A-NEXT: s_setpc_b64 s[30:31] 12194; 12195; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_4_4: 12196; GFX940: ; %bb.0: 12197; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12198; GFX940-NEXT: ;;#ASMSTART 12199; GFX940-NEXT: ; def s[8:9] 12200; GFX940-NEXT: ;;#ASMEND 12201; GFX940-NEXT: s_nop 0 12202; GFX940-NEXT: ;;#ASMSTART 12203; GFX940-NEXT: ; use s[8:9] 12204; GFX940-NEXT: ;;#ASMEND 12205; GFX940-NEXT: s_setpc_b64 s[30:31] 12206 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12207 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 4, i32 4> 12208 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12209 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12210 ret void 12211} 12212 12213define void @s_shuffle_v3i16_v4i16__1_4_4() { 12214; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_4_4: 12215; GFX900: ; %bb.0: 12216; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12217; GFX900-NEXT: ;;#ASMSTART 12218; GFX900-NEXT: ; def s[4:5] 12219; GFX900-NEXT: ;;#ASMEND 12220; GFX900-NEXT: s_lshr_b32 s8, s4, 16 12221; GFX900-NEXT: ;;#ASMSTART 12222; GFX900-NEXT: ; use s[8:9] 12223; GFX900-NEXT: ;;#ASMEND 12224; GFX900-NEXT: s_setpc_b64 s[30:31] 12225; 12226; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_4_4: 12227; GFX90A: ; %bb.0: 12228; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12229; GFX90A-NEXT: ;;#ASMSTART 12230; GFX90A-NEXT: ; def s[4:5] 12231; GFX90A-NEXT: ;;#ASMEND 12232; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 12233; GFX90A-NEXT: ;;#ASMSTART 12234; GFX90A-NEXT: ; use s[8:9] 12235; GFX90A-NEXT: ;;#ASMEND 12236; GFX90A-NEXT: s_setpc_b64 s[30:31] 12237; 12238; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_4_4: 12239; GFX940: ; %bb.0: 12240; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12241; GFX940-NEXT: ;;#ASMSTART 12242; GFX940-NEXT: ; def s[0:1] 12243; GFX940-NEXT: ;;#ASMEND 12244; GFX940-NEXT: s_lshr_b32 s8, s0, 16 12245; GFX940-NEXT: ;;#ASMSTART 12246; GFX940-NEXT: ; use s[8:9] 12247; GFX940-NEXT: ;;#ASMEND 12248; GFX940-NEXT: s_setpc_b64 s[30:31] 12249 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12250 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 4, i32 4> 12251 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12252 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12253 ret void 12254} 12255 12256define void @s_shuffle_v3i16_v4i16__2_4_4() { 12257; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_4_4: 12258; GFX900: ; %bb.0: 12259; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12260; GFX900-NEXT: ;;#ASMSTART 12261; GFX900-NEXT: ; def s[4:5] 12262; GFX900-NEXT: ;;#ASMEND 12263; GFX900-NEXT: s_mov_b32 s8, s5 12264; GFX900-NEXT: ;;#ASMSTART 12265; GFX900-NEXT: ; use s[8:9] 12266; GFX900-NEXT: ;;#ASMEND 12267; GFX900-NEXT: s_setpc_b64 s[30:31] 12268; 12269; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_4_4: 12270; GFX90A: ; %bb.0: 12271; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12272; GFX90A-NEXT: ;;#ASMSTART 12273; GFX90A-NEXT: ; def s[4:5] 12274; GFX90A-NEXT: ;;#ASMEND 12275; GFX90A-NEXT: s_mov_b32 s8, s5 12276; GFX90A-NEXT: ;;#ASMSTART 12277; GFX90A-NEXT: ; use s[8:9] 12278; GFX90A-NEXT: ;;#ASMEND 12279; GFX90A-NEXT: s_setpc_b64 s[30:31] 12280; 12281; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_4_4: 12282; GFX940: ; %bb.0: 12283; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12284; GFX940-NEXT: ;;#ASMSTART 12285; GFX940-NEXT: ; def s[0:1] 12286; GFX940-NEXT: ;;#ASMEND 12287; GFX940-NEXT: s_mov_b32 s8, s1 12288; GFX940-NEXT: ;;#ASMSTART 12289; GFX940-NEXT: ; use s[8:9] 12290; GFX940-NEXT: ;;#ASMEND 12291; GFX940-NEXT: s_setpc_b64 s[30:31] 12292 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12293 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 4, i32 4> 12294 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12295 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12296 ret void 12297} 12298 12299define void @s_shuffle_v3i16_v4i16__3_4_4() { 12300; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_4_4: 12301; GFX900: ; %bb.0: 12302; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12303; GFX900-NEXT: ;;#ASMSTART 12304; GFX900-NEXT: ; def s[4:5] 12305; GFX900-NEXT: ;;#ASMEND 12306; GFX900-NEXT: s_lshr_b32 s8, s5, 16 12307; GFX900-NEXT: ;;#ASMSTART 12308; GFX900-NEXT: ; use s[8:9] 12309; GFX900-NEXT: ;;#ASMEND 12310; GFX900-NEXT: s_setpc_b64 s[30:31] 12311; 12312; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_4_4: 12313; GFX90A: ; %bb.0: 12314; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12315; GFX90A-NEXT: ;;#ASMSTART 12316; GFX90A-NEXT: ; def s[4:5] 12317; GFX90A-NEXT: ;;#ASMEND 12318; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 12319; GFX90A-NEXT: ;;#ASMSTART 12320; GFX90A-NEXT: ; use s[8:9] 12321; GFX90A-NEXT: ;;#ASMEND 12322; GFX90A-NEXT: s_setpc_b64 s[30:31] 12323; 12324; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_4_4: 12325; GFX940: ; %bb.0: 12326; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12327; GFX940-NEXT: ;;#ASMSTART 12328; GFX940-NEXT: ; def s[0:1] 12329; GFX940-NEXT: ;;#ASMEND 12330; GFX940-NEXT: s_lshr_b32 s8, s1, 16 12331; GFX940-NEXT: ;;#ASMSTART 12332; GFX940-NEXT: ; use s[8:9] 12333; GFX940-NEXT: ;;#ASMEND 12334; GFX940-NEXT: s_setpc_b64 s[30:31] 12335 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12336 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 4, i32 4> 12337 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12338 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12339 ret void 12340} 12341 12342define void @s_shuffle_v3i16_v4i16__4_4_4() { 12343; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_4_4: 12344; GFX9: ; %bb.0: 12345; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12346; GFX9-NEXT: ;;#ASMSTART 12347; GFX9-NEXT: ; use s[8:9] 12348; GFX9-NEXT: ;;#ASMEND 12349; GFX9-NEXT: s_setpc_b64 s[30:31] 12350 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12351 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 4, i32 4> 12352 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12353 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12354 ret void 12355} 12356 12357define void @s_shuffle_v3i16_v4i16__5_4_4() { 12358; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_4_4: 12359; GFX900: ; %bb.0: 12360; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12361; GFX900-NEXT: ;;#ASMSTART 12362; GFX900-NEXT: ; def s[4:5] 12363; GFX900-NEXT: ;;#ASMEND 12364; GFX900-NEXT: s_lshr_b32 s5, s4, 16 12365; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12366; GFX900-NEXT: s_mov_b32 s9, s4 12367; GFX900-NEXT: ;;#ASMSTART 12368; GFX900-NEXT: ; use s[8:9] 12369; GFX900-NEXT: ;;#ASMEND 12370; GFX900-NEXT: s_setpc_b64 s[30:31] 12371; 12372; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_4_4: 12373; GFX90A: ; %bb.0: 12374; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12375; GFX90A-NEXT: ;;#ASMSTART 12376; GFX90A-NEXT: ; def s[4:5] 12377; GFX90A-NEXT: ;;#ASMEND 12378; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 12379; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12380; GFX90A-NEXT: s_mov_b32 s9, s4 12381; GFX90A-NEXT: ;;#ASMSTART 12382; GFX90A-NEXT: ; use s[8:9] 12383; GFX90A-NEXT: ;;#ASMEND 12384; GFX90A-NEXT: s_setpc_b64 s[30:31] 12385; 12386; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_4_4: 12387; GFX940: ; %bb.0: 12388; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12389; GFX940-NEXT: ;;#ASMSTART 12390; GFX940-NEXT: ; def s[0:1] 12391; GFX940-NEXT: ;;#ASMEND 12392; GFX940-NEXT: s_lshr_b32 s1, s0, 16 12393; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12394; GFX940-NEXT: s_mov_b32 s9, s0 12395; GFX940-NEXT: ;;#ASMSTART 12396; GFX940-NEXT: ; use s[8:9] 12397; GFX940-NEXT: ;;#ASMEND 12398; GFX940-NEXT: s_setpc_b64 s[30:31] 12399 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12400 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12401 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 4, i32 4> 12402 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12403 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12404 ret void 12405} 12406 12407define void @s_shuffle_v3i16_v4i16__6_4_4() { 12408; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_4_4: 12409; GFX900: ; %bb.0: 12410; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12411; GFX900-NEXT: ;;#ASMSTART 12412; GFX900-NEXT: ; def s[4:5] 12413; GFX900-NEXT: ;;#ASMEND 12414; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12415; GFX900-NEXT: s_mov_b32 s9, s4 12416; GFX900-NEXT: ;;#ASMSTART 12417; GFX900-NEXT: ; use s[8:9] 12418; GFX900-NEXT: ;;#ASMEND 12419; GFX900-NEXT: s_setpc_b64 s[30:31] 12420; 12421; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_4_4: 12422; GFX90A: ; %bb.0: 12423; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12424; GFX90A-NEXT: ;;#ASMSTART 12425; GFX90A-NEXT: ; def s[4:5] 12426; GFX90A-NEXT: ;;#ASMEND 12427; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12428; GFX90A-NEXT: s_mov_b32 s9, s4 12429; GFX90A-NEXT: ;;#ASMSTART 12430; GFX90A-NEXT: ; use s[8:9] 12431; GFX90A-NEXT: ;;#ASMEND 12432; GFX90A-NEXT: s_setpc_b64 s[30:31] 12433; 12434; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_4_4: 12435; GFX940: ; %bb.0: 12436; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12437; GFX940-NEXT: ;;#ASMSTART 12438; GFX940-NEXT: ; def s[0:1] 12439; GFX940-NEXT: ;;#ASMEND 12440; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12441; GFX940-NEXT: s_mov_b32 s9, s0 12442; GFX940-NEXT: ;;#ASMSTART 12443; GFX940-NEXT: ; use s[8:9] 12444; GFX940-NEXT: ;;#ASMEND 12445; GFX940-NEXT: s_setpc_b64 s[30:31] 12446 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12447 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12448 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 4, i32 4> 12449 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12450 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12451 ret void 12452} 12453 12454define void @s_shuffle_v3i16_v4i16__7_4_4() { 12455; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_4: 12456; GFX900: ; %bb.0: 12457; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12458; GFX900-NEXT: ;;#ASMSTART 12459; GFX900-NEXT: ; def s[4:5] 12460; GFX900-NEXT: ;;#ASMEND 12461; GFX900-NEXT: s_lshr_b32 s5, s5, 16 12462; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12463; GFX900-NEXT: s_mov_b32 s9, s4 12464; GFX900-NEXT: ;;#ASMSTART 12465; GFX900-NEXT: ; use s[8:9] 12466; GFX900-NEXT: ;;#ASMEND 12467; GFX900-NEXT: s_setpc_b64 s[30:31] 12468; 12469; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_4: 12470; GFX90A: ; %bb.0: 12471; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12472; GFX90A-NEXT: ;;#ASMSTART 12473; GFX90A-NEXT: ; def s[4:5] 12474; GFX90A-NEXT: ;;#ASMEND 12475; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 12476; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12477; GFX90A-NEXT: s_mov_b32 s9, s4 12478; GFX90A-NEXT: ;;#ASMSTART 12479; GFX90A-NEXT: ; use s[8:9] 12480; GFX90A-NEXT: ;;#ASMEND 12481; GFX90A-NEXT: s_setpc_b64 s[30:31] 12482; 12483; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_4: 12484; GFX940: ; %bb.0: 12485; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12486; GFX940-NEXT: ;;#ASMSTART 12487; GFX940-NEXT: ; def s[0:1] 12488; GFX940-NEXT: ;;#ASMEND 12489; GFX940-NEXT: s_lshr_b32 s1, s1, 16 12490; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12491; GFX940-NEXT: s_mov_b32 s9, s0 12492; GFX940-NEXT: ;;#ASMSTART 12493; GFX940-NEXT: ; use s[8:9] 12494; GFX940-NEXT: ;;#ASMEND 12495; GFX940-NEXT: s_setpc_b64 s[30:31] 12496 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12497 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12498 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 4> 12499 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12500 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12501 ret void 12502} 12503 12504define void @s_shuffle_v3i16_v4i16__7_u_4() { 12505; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_4: 12506; GFX900: ; %bb.0: 12507; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12508; GFX900-NEXT: ;;#ASMSTART 12509; GFX900-NEXT: ; def s[4:5] 12510; GFX900-NEXT: ;;#ASMEND 12511; GFX900-NEXT: s_lshr_b32 s8, s5, 16 12512; GFX900-NEXT: s_mov_b32 s9, s4 12513; GFX900-NEXT: ;;#ASMSTART 12514; GFX900-NEXT: ; use s[8:9] 12515; GFX900-NEXT: ;;#ASMEND 12516; GFX900-NEXT: s_setpc_b64 s[30:31] 12517; 12518; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_4: 12519; GFX90A: ; %bb.0: 12520; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12521; GFX90A-NEXT: ;;#ASMSTART 12522; GFX90A-NEXT: ; def s[4:5] 12523; GFX90A-NEXT: ;;#ASMEND 12524; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 12525; GFX90A-NEXT: s_mov_b32 s9, s4 12526; GFX90A-NEXT: ;;#ASMSTART 12527; GFX90A-NEXT: ; use s[8:9] 12528; GFX90A-NEXT: ;;#ASMEND 12529; GFX90A-NEXT: s_setpc_b64 s[30:31] 12530; 12531; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_4: 12532; GFX940: ; %bb.0: 12533; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12534; GFX940-NEXT: ;;#ASMSTART 12535; GFX940-NEXT: ; def s[0:1] 12536; GFX940-NEXT: ;;#ASMEND 12537; GFX940-NEXT: s_lshr_b32 s8, s1, 16 12538; GFX940-NEXT: s_mov_b32 s9, s0 12539; GFX940-NEXT: ;;#ASMSTART 12540; GFX940-NEXT: ; use s[8:9] 12541; GFX940-NEXT: ;;#ASMEND 12542; GFX940-NEXT: s_setpc_b64 s[30:31] 12543 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12544 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12545 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 4> 12546 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12547 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12548 ret void 12549} 12550 12551define void @s_shuffle_v3i16_v4i16__7_0_4() { 12552; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_4: 12553; GFX900: ; %bb.0: 12554; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12555; GFX900-NEXT: ;;#ASMSTART 12556; GFX900-NEXT: ; def s[4:5] 12557; GFX900-NEXT: ;;#ASMEND 12558; GFX900-NEXT: ;;#ASMSTART 12559; GFX900-NEXT: ; def s[6:7] 12560; GFX900-NEXT: ;;#ASMEND 12561; GFX900-NEXT: s_lshr_b32 s5, s7, 16 12562; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12563; GFX900-NEXT: s_mov_b32 s9, s6 12564; GFX900-NEXT: ;;#ASMSTART 12565; GFX900-NEXT: ; use s[8:9] 12566; GFX900-NEXT: ;;#ASMEND 12567; GFX900-NEXT: s_setpc_b64 s[30:31] 12568; 12569; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_4: 12570; GFX90A: ; %bb.0: 12571; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12572; GFX90A-NEXT: ;;#ASMSTART 12573; GFX90A-NEXT: ; def s[4:5] 12574; GFX90A-NEXT: ;;#ASMEND 12575; GFX90A-NEXT: ;;#ASMSTART 12576; GFX90A-NEXT: ; def s[6:7] 12577; GFX90A-NEXT: ;;#ASMEND 12578; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 12579; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12580; GFX90A-NEXT: s_mov_b32 s9, s6 12581; GFX90A-NEXT: ;;#ASMSTART 12582; GFX90A-NEXT: ; use s[8:9] 12583; GFX90A-NEXT: ;;#ASMEND 12584; GFX90A-NEXT: s_setpc_b64 s[30:31] 12585; 12586; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_4: 12587; GFX940: ; %bb.0: 12588; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12589; GFX940-NEXT: ;;#ASMSTART 12590; GFX940-NEXT: ; def s[0:1] 12591; GFX940-NEXT: ;;#ASMEND 12592; GFX940-NEXT: ;;#ASMSTART 12593; GFX940-NEXT: ; def s[2:3] 12594; GFX940-NEXT: ;;#ASMEND 12595; GFX940-NEXT: s_lshr_b32 s1, s3, 16 12596; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12597; GFX940-NEXT: s_mov_b32 s9, s2 12598; GFX940-NEXT: ;;#ASMSTART 12599; GFX940-NEXT: ; use s[8:9] 12600; GFX940-NEXT: ;;#ASMEND 12601; GFX940-NEXT: s_setpc_b64 s[30:31] 12602 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12603 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12604 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 4> 12605 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12606 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12607 ret void 12608} 12609 12610define void @s_shuffle_v3i16_v4i16__7_1_4() { 12611; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_4: 12612; GFX900: ; %bb.0: 12613; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12614; GFX900-NEXT: ;;#ASMSTART 12615; GFX900-NEXT: ; def s[4:5] 12616; GFX900-NEXT: ;;#ASMEND 12617; GFX900-NEXT: ;;#ASMSTART 12618; GFX900-NEXT: ; def s[6:7] 12619; GFX900-NEXT: ;;#ASMEND 12620; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 12621; GFX900-NEXT: s_mov_b32 s9, s6 12622; GFX900-NEXT: ;;#ASMSTART 12623; GFX900-NEXT: ; use s[8:9] 12624; GFX900-NEXT: ;;#ASMEND 12625; GFX900-NEXT: s_setpc_b64 s[30:31] 12626; 12627; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_4: 12628; GFX90A: ; %bb.0: 12629; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12630; GFX90A-NEXT: ;;#ASMSTART 12631; GFX90A-NEXT: ; def s[4:5] 12632; GFX90A-NEXT: ;;#ASMEND 12633; GFX90A-NEXT: ;;#ASMSTART 12634; GFX90A-NEXT: ; def s[6:7] 12635; GFX90A-NEXT: ;;#ASMEND 12636; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 12637; GFX90A-NEXT: s_mov_b32 s9, s6 12638; GFX90A-NEXT: ;;#ASMSTART 12639; GFX90A-NEXT: ; use s[8:9] 12640; GFX90A-NEXT: ;;#ASMEND 12641; GFX90A-NEXT: s_setpc_b64 s[30:31] 12642; 12643; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_4: 12644; GFX940: ; %bb.0: 12645; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12646; GFX940-NEXT: ;;#ASMSTART 12647; GFX940-NEXT: ; def s[0:1] 12648; GFX940-NEXT: ;;#ASMEND 12649; GFX940-NEXT: ;;#ASMSTART 12650; GFX940-NEXT: ; def s[2:3] 12651; GFX940-NEXT: ;;#ASMEND 12652; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 12653; GFX940-NEXT: s_mov_b32 s9, s2 12654; GFX940-NEXT: ;;#ASMSTART 12655; GFX940-NEXT: ; use s[8:9] 12656; GFX940-NEXT: ;;#ASMEND 12657; GFX940-NEXT: s_setpc_b64 s[30:31] 12658 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12659 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12660 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 4> 12661 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12662 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12663 ret void 12664} 12665 12666define void @s_shuffle_v3i16_v4i16__7_2_4() { 12667; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_4: 12668; GFX900: ; %bb.0: 12669; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12670; GFX900-NEXT: ;;#ASMSTART 12671; GFX900-NEXT: ; def s[4:5] 12672; GFX900-NEXT: ;;#ASMEND 12673; GFX900-NEXT: ;;#ASMSTART 12674; GFX900-NEXT: ; def s[6:7] 12675; GFX900-NEXT: ;;#ASMEND 12676; GFX900-NEXT: s_lshr_b32 s4, s7, 16 12677; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 12678; GFX900-NEXT: s_mov_b32 s9, s6 12679; GFX900-NEXT: ;;#ASMSTART 12680; GFX900-NEXT: ; use s[8:9] 12681; GFX900-NEXT: ;;#ASMEND 12682; GFX900-NEXT: s_setpc_b64 s[30:31] 12683; 12684; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_4: 12685; GFX90A: ; %bb.0: 12686; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12687; GFX90A-NEXT: ;;#ASMSTART 12688; GFX90A-NEXT: ; def s[4:5] 12689; GFX90A-NEXT: ;;#ASMEND 12690; GFX90A-NEXT: ;;#ASMSTART 12691; GFX90A-NEXT: ; def s[6:7] 12692; GFX90A-NEXT: ;;#ASMEND 12693; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 12694; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 12695; GFX90A-NEXT: s_mov_b32 s9, s6 12696; GFX90A-NEXT: ;;#ASMSTART 12697; GFX90A-NEXT: ; use s[8:9] 12698; GFX90A-NEXT: ;;#ASMEND 12699; GFX90A-NEXT: s_setpc_b64 s[30:31] 12700; 12701; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_4: 12702; GFX940: ; %bb.0: 12703; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12704; GFX940-NEXT: ;;#ASMSTART 12705; GFX940-NEXT: ; def s[0:1] 12706; GFX940-NEXT: ;;#ASMEND 12707; GFX940-NEXT: ;;#ASMSTART 12708; GFX940-NEXT: ; def s[2:3] 12709; GFX940-NEXT: ;;#ASMEND 12710; GFX940-NEXT: s_lshr_b32 s0, s3, 16 12711; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 12712; GFX940-NEXT: s_mov_b32 s9, s2 12713; GFX940-NEXT: ;;#ASMSTART 12714; GFX940-NEXT: ; use s[8:9] 12715; GFX940-NEXT: ;;#ASMEND 12716; GFX940-NEXT: s_setpc_b64 s[30:31] 12717 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12718 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12719 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 4> 12720 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12721 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12722 ret void 12723} 12724 12725define void @s_shuffle_v3i16_v4i16__7_3_4() { 12726; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_4: 12727; GFX900: ; %bb.0: 12728; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12729; GFX900-NEXT: ;;#ASMSTART 12730; GFX900-NEXT: ; def s[4:5] 12731; GFX900-NEXT: ;;#ASMEND 12732; GFX900-NEXT: ;;#ASMSTART 12733; GFX900-NEXT: ; def s[6:7] 12734; GFX900-NEXT: ;;#ASMEND 12735; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 12736; GFX900-NEXT: s_mov_b32 s9, s6 12737; GFX900-NEXT: ;;#ASMSTART 12738; GFX900-NEXT: ; use s[8:9] 12739; GFX900-NEXT: ;;#ASMEND 12740; GFX900-NEXT: s_setpc_b64 s[30:31] 12741; 12742; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_4: 12743; GFX90A: ; %bb.0: 12744; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12745; GFX90A-NEXT: ;;#ASMSTART 12746; GFX90A-NEXT: ; def s[4:5] 12747; GFX90A-NEXT: ;;#ASMEND 12748; GFX90A-NEXT: ;;#ASMSTART 12749; GFX90A-NEXT: ; def s[6:7] 12750; GFX90A-NEXT: ;;#ASMEND 12751; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 12752; GFX90A-NEXT: s_mov_b32 s9, s6 12753; GFX90A-NEXT: ;;#ASMSTART 12754; GFX90A-NEXT: ; use s[8:9] 12755; GFX90A-NEXT: ;;#ASMEND 12756; GFX90A-NEXT: s_setpc_b64 s[30:31] 12757; 12758; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_4: 12759; GFX940: ; %bb.0: 12760; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12761; GFX940-NEXT: ;;#ASMSTART 12762; GFX940-NEXT: ; def s[0:1] 12763; GFX940-NEXT: ;;#ASMEND 12764; GFX940-NEXT: ;;#ASMSTART 12765; GFX940-NEXT: ; def s[2:3] 12766; GFX940-NEXT: ;;#ASMEND 12767; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 12768; GFX940-NEXT: s_mov_b32 s9, s2 12769; GFX940-NEXT: ;;#ASMSTART 12770; GFX940-NEXT: ; use s[8:9] 12771; GFX940-NEXT: ;;#ASMEND 12772; GFX940-NEXT: s_setpc_b64 s[30:31] 12773 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12774 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12775 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 4> 12776 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12777 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12778 ret void 12779} 12780 12781define void @s_shuffle_v3i16_v4i16__7_5_4() { 12782; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_4: 12783; GFX900: ; %bb.0: 12784; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12785; GFX900-NEXT: ;;#ASMSTART 12786; GFX900-NEXT: ; def s[4:5] 12787; GFX900-NEXT: ;;#ASMEND 12788; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 12789; GFX900-NEXT: s_mov_b32 s9, s4 12790; GFX900-NEXT: ;;#ASMSTART 12791; GFX900-NEXT: ; use s[8:9] 12792; GFX900-NEXT: ;;#ASMEND 12793; GFX900-NEXT: s_setpc_b64 s[30:31] 12794; 12795; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_4: 12796; GFX90A: ; %bb.0: 12797; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12798; GFX90A-NEXT: ;;#ASMSTART 12799; GFX90A-NEXT: ; def s[4:5] 12800; GFX90A-NEXT: ;;#ASMEND 12801; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 12802; GFX90A-NEXT: s_mov_b32 s9, s4 12803; GFX90A-NEXT: ;;#ASMSTART 12804; GFX90A-NEXT: ; use s[8:9] 12805; GFX90A-NEXT: ;;#ASMEND 12806; GFX90A-NEXT: s_setpc_b64 s[30:31] 12807; 12808; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_4: 12809; GFX940: ; %bb.0: 12810; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12811; GFX940-NEXT: ;;#ASMSTART 12812; GFX940-NEXT: ; def s[0:1] 12813; GFX940-NEXT: ;;#ASMEND 12814; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 12815; GFX940-NEXT: s_mov_b32 s9, s0 12816; GFX940-NEXT: ;;#ASMSTART 12817; GFX940-NEXT: ; use s[8:9] 12818; GFX940-NEXT: ;;#ASMEND 12819; GFX940-NEXT: s_setpc_b64 s[30:31] 12820 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12821 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12822 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 4> 12823 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12824 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12825 ret void 12826} 12827 12828define void @s_shuffle_v3i16_v4i16__7_6_4() { 12829; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_4: 12830; GFX900: ; %bb.0: 12831; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12832; GFX900-NEXT: ;;#ASMSTART 12833; GFX900-NEXT: ; def s[4:5] 12834; GFX900-NEXT: ;;#ASMEND 12835; GFX900-NEXT: s_lshr_b32 s6, s5, 16 12836; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 12837; GFX900-NEXT: s_mov_b32 s9, s4 12838; GFX900-NEXT: ;;#ASMSTART 12839; GFX900-NEXT: ; use s[8:9] 12840; GFX900-NEXT: ;;#ASMEND 12841; GFX900-NEXT: s_setpc_b64 s[30:31] 12842; 12843; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_4: 12844; GFX90A: ; %bb.0: 12845; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12846; GFX90A-NEXT: ;;#ASMSTART 12847; GFX90A-NEXT: ; def s[4:5] 12848; GFX90A-NEXT: ;;#ASMEND 12849; GFX90A-NEXT: s_lshr_b32 s6, s5, 16 12850; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 12851; GFX90A-NEXT: s_mov_b32 s9, s4 12852; GFX90A-NEXT: ;;#ASMSTART 12853; GFX90A-NEXT: ; use s[8:9] 12854; GFX90A-NEXT: ;;#ASMEND 12855; GFX90A-NEXT: s_setpc_b64 s[30:31] 12856; 12857; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_4: 12858; GFX940: ; %bb.0: 12859; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12860; GFX940-NEXT: ;;#ASMSTART 12861; GFX940-NEXT: ; def s[0:1] 12862; GFX940-NEXT: ;;#ASMEND 12863; GFX940-NEXT: s_lshr_b32 s2, s1, 16 12864; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 12865; GFX940-NEXT: s_mov_b32 s9, s0 12866; GFX940-NEXT: ;;#ASMSTART 12867; GFX940-NEXT: ; use s[8:9] 12868; GFX940-NEXT: ;;#ASMEND 12869; GFX940-NEXT: s_setpc_b64 s[30:31] 12870 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12871 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12872 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 4> 12873 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12874 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12875 ret void 12876} 12877 12878define void @s_shuffle_v3i16_v4i16__u_5_5() { 12879; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_5_5: 12880; GFX9: ; %bb.0: 12881; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12882; GFX9-NEXT: ;;#ASMSTART 12883; GFX9-NEXT: ; def s[8:9] 12884; GFX9-NEXT: ;;#ASMEND 12885; GFX9-NEXT: s_lshr_b32 s9, s8, 16 12886; GFX9-NEXT: ;;#ASMSTART 12887; GFX9-NEXT: ; use s[8:9] 12888; GFX9-NEXT: ;;#ASMEND 12889; GFX9-NEXT: s_setpc_b64 s[30:31] 12890 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12891 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12892 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 5, i32 5> 12893 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12894 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12895 ret void 12896} 12897 12898define void @s_shuffle_v3i16_v4i16__0_5_5() { 12899; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_5_5: 12900; GFX900: ; %bb.0: 12901; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12902; GFX900-NEXT: ;;#ASMSTART 12903; GFX900-NEXT: ; def s[4:5] 12904; GFX900-NEXT: ;;#ASMEND 12905; GFX900-NEXT: ;;#ASMSTART 12906; GFX900-NEXT: ; def s[6:7] 12907; GFX900-NEXT: ;;#ASMEND 12908; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s6 12909; GFX900-NEXT: s_lshr_b32 s9, s6, 16 12910; GFX900-NEXT: ;;#ASMSTART 12911; GFX900-NEXT: ; use s[8:9] 12912; GFX900-NEXT: ;;#ASMEND 12913; GFX900-NEXT: s_setpc_b64 s[30:31] 12914; 12915; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_5_5: 12916; GFX90A: ; %bb.0: 12917; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12918; GFX90A-NEXT: ;;#ASMSTART 12919; GFX90A-NEXT: ; def s[4:5] 12920; GFX90A-NEXT: ;;#ASMEND 12921; GFX90A-NEXT: ;;#ASMSTART 12922; GFX90A-NEXT: ; def s[6:7] 12923; GFX90A-NEXT: ;;#ASMEND 12924; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s6 12925; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 12926; GFX90A-NEXT: ;;#ASMSTART 12927; GFX90A-NEXT: ; use s[8:9] 12928; GFX90A-NEXT: ;;#ASMEND 12929; GFX90A-NEXT: s_setpc_b64 s[30:31] 12930; 12931; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_5_5: 12932; GFX940: ; %bb.0: 12933; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12934; GFX940-NEXT: ;;#ASMSTART 12935; GFX940-NEXT: ; def s[0:1] 12936; GFX940-NEXT: ;;#ASMEND 12937; GFX940-NEXT: ;;#ASMSTART 12938; GFX940-NEXT: ; def s[2:3] 12939; GFX940-NEXT: ;;#ASMEND 12940; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 12941; GFX940-NEXT: s_lshr_b32 s9, s2, 16 12942; GFX940-NEXT: ;;#ASMSTART 12943; GFX940-NEXT: ; use s[8:9] 12944; GFX940-NEXT: ;;#ASMEND 12945; GFX940-NEXT: s_setpc_b64 s[30:31] 12946 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12947 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12948 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 5, i32 5> 12949 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12950 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 12951 ret void 12952} 12953 12954define void @s_shuffle_v3i16_v4i16__1_5_5() { 12955; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_5_5: 12956; GFX900: ; %bb.0: 12957; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12958; GFX900-NEXT: ;;#ASMSTART 12959; GFX900-NEXT: ; def s[4:5] 12960; GFX900-NEXT: ;;#ASMEND 12961; GFX900-NEXT: ;;#ASMSTART 12962; GFX900-NEXT: ; def s[6:7] 12963; GFX900-NEXT: ;;#ASMEND 12964; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s6 12965; GFX900-NEXT: s_lshr_b32 s9, s6, 16 12966; GFX900-NEXT: ;;#ASMSTART 12967; GFX900-NEXT: ; use s[8:9] 12968; GFX900-NEXT: ;;#ASMEND 12969; GFX900-NEXT: s_setpc_b64 s[30:31] 12970; 12971; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_5_5: 12972; GFX90A: ; %bb.0: 12973; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12974; GFX90A-NEXT: ;;#ASMSTART 12975; GFX90A-NEXT: ; def s[4:5] 12976; GFX90A-NEXT: ;;#ASMEND 12977; GFX90A-NEXT: ;;#ASMSTART 12978; GFX90A-NEXT: ; def s[6:7] 12979; GFX90A-NEXT: ;;#ASMEND 12980; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s6 12981; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 12982; GFX90A-NEXT: ;;#ASMSTART 12983; GFX90A-NEXT: ; use s[8:9] 12984; GFX90A-NEXT: ;;#ASMEND 12985; GFX90A-NEXT: s_setpc_b64 s[30:31] 12986; 12987; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_5_5: 12988; GFX940: ; %bb.0: 12989; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12990; GFX940-NEXT: ;;#ASMSTART 12991; GFX940-NEXT: ; def s[0:1] 12992; GFX940-NEXT: ;;#ASMEND 12993; GFX940-NEXT: ;;#ASMSTART 12994; GFX940-NEXT: ; def s[2:3] 12995; GFX940-NEXT: ;;#ASMEND 12996; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 12997; GFX940-NEXT: s_lshr_b32 s9, s2, 16 12998; GFX940-NEXT: ;;#ASMSTART 12999; GFX940-NEXT: ; use s[8:9] 13000; GFX940-NEXT: ;;#ASMEND 13001; GFX940-NEXT: s_setpc_b64 s[30:31] 13002 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13003 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13004 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 5, i32 5> 13005 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13006 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13007 ret void 13008} 13009 13010define void @s_shuffle_v3i16_v4i16__2_5_5() { 13011; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_5_5: 13012; GFX900: ; %bb.0: 13013; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13014; GFX900-NEXT: ;;#ASMSTART 13015; GFX900-NEXT: ; def s[4:5] 13016; GFX900-NEXT: ;;#ASMEND 13017; GFX900-NEXT: ;;#ASMSTART 13018; GFX900-NEXT: ; def s[6:7] 13019; GFX900-NEXT: ;;#ASMEND 13020; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s6 13021; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13022; GFX900-NEXT: ;;#ASMSTART 13023; GFX900-NEXT: ; use s[8:9] 13024; GFX900-NEXT: ;;#ASMEND 13025; GFX900-NEXT: s_setpc_b64 s[30:31] 13026; 13027; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_5_5: 13028; GFX90A: ; %bb.0: 13029; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13030; GFX90A-NEXT: ;;#ASMSTART 13031; GFX90A-NEXT: ; def s[4:5] 13032; GFX90A-NEXT: ;;#ASMEND 13033; GFX90A-NEXT: ;;#ASMSTART 13034; GFX90A-NEXT: ; def s[6:7] 13035; GFX90A-NEXT: ;;#ASMEND 13036; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s6 13037; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13038; GFX90A-NEXT: ;;#ASMSTART 13039; GFX90A-NEXT: ; use s[8:9] 13040; GFX90A-NEXT: ;;#ASMEND 13041; GFX90A-NEXT: s_setpc_b64 s[30:31] 13042; 13043; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_5_5: 13044; GFX940: ; %bb.0: 13045; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13046; GFX940-NEXT: ;;#ASMSTART 13047; GFX940-NEXT: ; def s[0:1] 13048; GFX940-NEXT: ;;#ASMEND 13049; GFX940-NEXT: ;;#ASMSTART 13050; GFX940-NEXT: ; def s[2:3] 13051; GFX940-NEXT: ;;#ASMEND 13052; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 13053; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13054; GFX940-NEXT: ;;#ASMSTART 13055; GFX940-NEXT: ; use s[8:9] 13056; GFX940-NEXT: ;;#ASMEND 13057; GFX940-NEXT: s_setpc_b64 s[30:31] 13058 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13059 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13060 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 5, i32 5> 13061 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13062 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13063 ret void 13064} 13065 13066define void @s_shuffle_v3i16_v4i16__3_5_5() { 13067; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_5_5: 13068; GFX900: ; %bb.0: 13069; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13070; GFX900-NEXT: ;;#ASMSTART 13071; GFX900-NEXT: ; def s[4:5] 13072; GFX900-NEXT: ;;#ASMEND 13073; GFX900-NEXT: ;;#ASMSTART 13074; GFX900-NEXT: ; def s[6:7] 13075; GFX900-NEXT: ;;#ASMEND 13076; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s6 13077; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13078; GFX900-NEXT: ;;#ASMSTART 13079; GFX900-NEXT: ; use s[8:9] 13080; GFX900-NEXT: ;;#ASMEND 13081; GFX900-NEXT: s_setpc_b64 s[30:31] 13082; 13083; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_5_5: 13084; GFX90A: ; %bb.0: 13085; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13086; GFX90A-NEXT: ;;#ASMSTART 13087; GFX90A-NEXT: ; def s[4:5] 13088; GFX90A-NEXT: ;;#ASMEND 13089; GFX90A-NEXT: ;;#ASMSTART 13090; GFX90A-NEXT: ; def s[6:7] 13091; GFX90A-NEXT: ;;#ASMEND 13092; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s6 13093; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13094; GFX90A-NEXT: ;;#ASMSTART 13095; GFX90A-NEXT: ; use s[8:9] 13096; GFX90A-NEXT: ;;#ASMEND 13097; GFX90A-NEXT: s_setpc_b64 s[30:31] 13098; 13099; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_5_5: 13100; GFX940: ; %bb.0: 13101; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13102; GFX940-NEXT: ;;#ASMSTART 13103; GFX940-NEXT: ; def s[0:1] 13104; GFX940-NEXT: ;;#ASMEND 13105; GFX940-NEXT: ;;#ASMSTART 13106; GFX940-NEXT: ; def s[2:3] 13107; GFX940-NEXT: ;;#ASMEND 13108; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s2 13109; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13110; GFX940-NEXT: ;;#ASMSTART 13111; GFX940-NEXT: ; use s[8:9] 13112; GFX940-NEXT: ;;#ASMEND 13113; GFX940-NEXT: s_setpc_b64 s[30:31] 13114 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13115 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13116 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 5, i32 5> 13117 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13118 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13119 ret void 13120} 13121 13122define void @s_shuffle_v3i16_v4i16__4_5_5() { 13123; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_5_5: 13124; GFX9: ; %bb.0: 13125; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13126; GFX9-NEXT: ;;#ASMSTART 13127; GFX9-NEXT: ; def s[8:9] 13128; GFX9-NEXT: ;;#ASMEND 13129; GFX9-NEXT: s_lshr_b32 s9, s8, 16 13130; GFX9-NEXT: ;;#ASMSTART 13131; GFX9-NEXT: ; use s[8:9] 13132; GFX9-NEXT: ;;#ASMEND 13133; GFX9-NEXT: s_setpc_b64 s[30:31] 13134 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13135 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13136 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 5, i32 5> 13137 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13138 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13139 ret void 13140} 13141 13142define void @s_shuffle_v3i16_v4i16__5_5_5() { 13143; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_5_5: 13144; GFX900: ; %bb.0: 13145; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13146; GFX900-NEXT: ;;#ASMSTART 13147; GFX900-NEXT: ; def s[4:5] 13148; GFX900-NEXT: ;;#ASMEND 13149; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13150; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4 13151; GFX900-NEXT: ;;#ASMSTART 13152; GFX900-NEXT: ; use s[8:9] 13153; GFX900-NEXT: ;;#ASMEND 13154; GFX900-NEXT: s_setpc_b64 s[30:31] 13155; 13156; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_5_5: 13157; GFX90A: ; %bb.0: 13158; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13159; GFX90A-NEXT: ;;#ASMSTART 13160; GFX90A-NEXT: ; def s[4:5] 13161; GFX90A-NEXT: ;;#ASMEND 13162; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13163; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4 13164; GFX90A-NEXT: ;;#ASMSTART 13165; GFX90A-NEXT: ; use s[8:9] 13166; GFX90A-NEXT: ;;#ASMEND 13167; GFX90A-NEXT: s_setpc_b64 s[30:31] 13168; 13169; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_5_5: 13170; GFX940: ; %bb.0: 13171; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13172; GFX940-NEXT: ;;#ASMSTART 13173; GFX940-NEXT: ; def s[0:1] 13174; GFX940-NEXT: ;;#ASMEND 13175; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13176; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 13177; GFX940-NEXT: ;;#ASMSTART 13178; GFX940-NEXT: ; use s[8:9] 13179; GFX940-NEXT: ;;#ASMEND 13180; GFX940-NEXT: s_setpc_b64 s[30:31] 13181 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13182 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13183 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 5, i32 5> 13184 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13185 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13186 ret void 13187} 13188 13189define void @s_shuffle_v3i16_v4i16__6_5_5() { 13190; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_5_5: 13191; GFX900: ; %bb.0: 13192; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13193; GFX900-NEXT: ;;#ASMSTART 13194; GFX900-NEXT: ; def s[4:5] 13195; GFX900-NEXT: ;;#ASMEND 13196; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 13197; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13198; GFX900-NEXT: ;;#ASMSTART 13199; GFX900-NEXT: ; use s[8:9] 13200; GFX900-NEXT: ;;#ASMEND 13201; GFX900-NEXT: s_setpc_b64 s[30:31] 13202; 13203; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_5_5: 13204; GFX90A: ; %bb.0: 13205; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13206; GFX90A-NEXT: ;;#ASMSTART 13207; GFX90A-NEXT: ; def s[4:5] 13208; GFX90A-NEXT: ;;#ASMEND 13209; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 13210; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13211; GFX90A-NEXT: ;;#ASMSTART 13212; GFX90A-NEXT: ; use s[8:9] 13213; GFX90A-NEXT: ;;#ASMEND 13214; GFX90A-NEXT: s_setpc_b64 s[30:31] 13215; 13216; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_5_5: 13217; GFX940: ; %bb.0: 13218; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13219; GFX940-NEXT: ;;#ASMSTART 13220; GFX940-NEXT: ; def s[0:1] 13221; GFX940-NEXT: ;;#ASMEND 13222; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 13223; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13224; GFX940-NEXT: ;;#ASMSTART 13225; GFX940-NEXT: ; use s[8:9] 13226; GFX940-NEXT: ;;#ASMEND 13227; GFX940-NEXT: s_setpc_b64 s[30:31] 13228 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13229 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13230 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 5, i32 5> 13231 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13232 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13233 ret void 13234} 13235 13236define void @s_shuffle_v3i16_v4i16__7_5_5() { 13237; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_5: 13238; GFX900: ; %bb.0: 13239; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13240; GFX900-NEXT: ;;#ASMSTART 13241; GFX900-NEXT: ; def s[4:5] 13242; GFX900-NEXT: ;;#ASMEND 13243; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 13244; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13245; GFX900-NEXT: ;;#ASMSTART 13246; GFX900-NEXT: ; use s[8:9] 13247; GFX900-NEXT: ;;#ASMEND 13248; GFX900-NEXT: s_setpc_b64 s[30:31] 13249; 13250; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_5: 13251; GFX90A: ; %bb.0: 13252; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13253; GFX90A-NEXT: ;;#ASMSTART 13254; GFX90A-NEXT: ; def s[4:5] 13255; GFX90A-NEXT: ;;#ASMEND 13256; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 13257; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13258; GFX90A-NEXT: ;;#ASMSTART 13259; GFX90A-NEXT: ; use s[8:9] 13260; GFX90A-NEXT: ;;#ASMEND 13261; GFX90A-NEXT: s_setpc_b64 s[30:31] 13262; 13263; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_5: 13264; GFX940: ; %bb.0: 13265; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13266; GFX940-NEXT: ;;#ASMSTART 13267; GFX940-NEXT: ; def s[0:1] 13268; GFX940-NEXT: ;;#ASMEND 13269; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 13270; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13271; GFX940-NEXT: ;;#ASMSTART 13272; GFX940-NEXT: ; use s[8:9] 13273; GFX940-NEXT: ;;#ASMEND 13274; GFX940-NEXT: s_setpc_b64 s[30:31] 13275 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13276 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13277 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 5> 13278 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13279 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13280 ret void 13281} 13282 13283define void @s_shuffle_v3i16_v4i16__7_u_5() { 13284; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_5: 13285; GFX900: ; %bb.0: 13286; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13287; GFX900-NEXT: ;;#ASMSTART 13288; GFX900-NEXT: ; def s[4:5] 13289; GFX900-NEXT: ;;#ASMEND 13290; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13291; GFX900-NEXT: s_lshr_b32 s8, s5, 16 13292; GFX900-NEXT: ;;#ASMSTART 13293; GFX900-NEXT: ; use s[8:9] 13294; GFX900-NEXT: ;;#ASMEND 13295; GFX900-NEXT: s_setpc_b64 s[30:31] 13296; 13297; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_5: 13298; GFX90A: ; %bb.0: 13299; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13300; GFX90A-NEXT: ;;#ASMSTART 13301; GFX90A-NEXT: ; def s[4:5] 13302; GFX90A-NEXT: ;;#ASMEND 13303; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13304; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 13305; GFX90A-NEXT: ;;#ASMSTART 13306; GFX90A-NEXT: ; use s[8:9] 13307; GFX90A-NEXT: ;;#ASMEND 13308; GFX90A-NEXT: s_setpc_b64 s[30:31] 13309; 13310; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_5: 13311; GFX940: ; %bb.0: 13312; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13313; GFX940-NEXT: ;;#ASMSTART 13314; GFX940-NEXT: ; def s[0:1] 13315; GFX940-NEXT: ;;#ASMEND 13316; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13317; GFX940-NEXT: s_lshr_b32 s8, s1, 16 13318; GFX940-NEXT: ;;#ASMSTART 13319; GFX940-NEXT: ; use s[8:9] 13320; GFX940-NEXT: ;;#ASMEND 13321; GFX940-NEXT: s_setpc_b64 s[30:31] 13322 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13323 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13324 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 5> 13325 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13326 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13327 ret void 13328} 13329 13330define void @s_shuffle_v3i16_v4i16__7_0_5() { 13331; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_5: 13332; GFX900: ; %bb.0: 13333; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13334; GFX900-NEXT: ;;#ASMSTART 13335; GFX900-NEXT: ; def s[4:5] 13336; GFX900-NEXT: ;;#ASMEND 13337; GFX900-NEXT: ;;#ASMSTART 13338; GFX900-NEXT: ; def s[6:7] 13339; GFX900-NEXT: ;;#ASMEND 13340; GFX900-NEXT: s_lshr_b32 s5, s7, 16 13341; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13342; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13343; GFX900-NEXT: ;;#ASMSTART 13344; GFX900-NEXT: ; use s[8:9] 13345; GFX900-NEXT: ;;#ASMEND 13346; GFX900-NEXT: s_setpc_b64 s[30:31] 13347; 13348; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_5: 13349; GFX90A: ; %bb.0: 13350; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13351; GFX90A-NEXT: ;;#ASMSTART 13352; GFX90A-NEXT: ; def s[4:5] 13353; GFX90A-NEXT: ;;#ASMEND 13354; GFX90A-NEXT: ;;#ASMSTART 13355; GFX90A-NEXT: ; def s[6:7] 13356; GFX90A-NEXT: ;;#ASMEND 13357; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 13358; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13359; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13360; GFX90A-NEXT: ;;#ASMSTART 13361; GFX90A-NEXT: ; use s[8:9] 13362; GFX90A-NEXT: ;;#ASMEND 13363; GFX90A-NEXT: s_setpc_b64 s[30:31] 13364; 13365; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_5: 13366; GFX940: ; %bb.0: 13367; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13368; GFX940-NEXT: ;;#ASMSTART 13369; GFX940-NEXT: ; def s[0:1] 13370; GFX940-NEXT: ;;#ASMEND 13371; GFX940-NEXT: ;;#ASMSTART 13372; GFX940-NEXT: ; def s[2:3] 13373; GFX940-NEXT: ;;#ASMEND 13374; GFX940-NEXT: s_lshr_b32 s1, s3, 16 13375; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 13376; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13377; GFX940-NEXT: ;;#ASMSTART 13378; GFX940-NEXT: ; use s[8:9] 13379; GFX940-NEXT: ;;#ASMEND 13380; GFX940-NEXT: s_setpc_b64 s[30:31] 13381 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13382 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13383 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 5> 13384 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13385 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13386 ret void 13387} 13388 13389define void @s_shuffle_v3i16_v4i16__7_1_5() { 13390; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_5: 13391; GFX900: ; %bb.0: 13392; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13393; GFX900-NEXT: ;;#ASMSTART 13394; GFX900-NEXT: ; def s[4:5] 13395; GFX900-NEXT: ;;#ASMEND 13396; GFX900-NEXT: ;;#ASMSTART 13397; GFX900-NEXT: ; def s[6:7] 13398; GFX900-NEXT: ;;#ASMEND 13399; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 13400; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13401; GFX900-NEXT: ;;#ASMSTART 13402; GFX900-NEXT: ; use s[8:9] 13403; GFX900-NEXT: ;;#ASMEND 13404; GFX900-NEXT: s_setpc_b64 s[30:31] 13405; 13406; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_5: 13407; GFX90A: ; %bb.0: 13408; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13409; GFX90A-NEXT: ;;#ASMSTART 13410; GFX90A-NEXT: ; def s[4:5] 13411; GFX90A-NEXT: ;;#ASMEND 13412; GFX90A-NEXT: ;;#ASMSTART 13413; GFX90A-NEXT: ; def s[6:7] 13414; GFX90A-NEXT: ;;#ASMEND 13415; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 13416; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13417; GFX90A-NEXT: ;;#ASMSTART 13418; GFX90A-NEXT: ; use s[8:9] 13419; GFX90A-NEXT: ;;#ASMEND 13420; GFX90A-NEXT: s_setpc_b64 s[30:31] 13421; 13422; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_5: 13423; GFX940: ; %bb.0: 13424; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13425; GFX940-NEXT: ;;#ASMSTART 13426; GFX940-NEXT: ; def s[0:1] 13427; GFX940-NEXT: ;;#ASMEND 13428; GFX940-NEXT: ;;#ASMSTART 13429; GFX940-NEXT: ; def s[2:3] 13430; GFX940-NEXT: ;;#ASMEND 13431; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 13432; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13433; GFX940-NEXT: ;;#ASMSTART 13434; GFX940-NEXT: ; use s[8:9] 13435; GFX940-NEXT: ;;#ASMEND 13436; GFX940-NEXT: s_setpc_b64 s[30:31] 13437 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13438 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13439 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 5> 13440 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13441 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13442 ret void 13443} 13444 13445define void @s_shuffle_v3i16_v4i16__7_2_5() { 13446; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_5: 13447; GFX900: ; %bb.0: 13448; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13449; GFX900-NEXT: ;;#ASMSTART 13450; GFX900-NEXT: ; def s[4:5] 13451; GFX900-NEXT: ;;#ASMEND 13452; GFX900-NEXT: ;;#ASMSTART 13453; GFX900-NEXT: ; def s[6:7] 13454; GFX900-NEXT: ;;#ASMEND 13455; GFX900-NEXT: s_lshr_b32 s4, s7, 16 13456; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 13457; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13458; GFX900-NEXT: ;;#ASMSTART 13459; GFX900-NEXT: ; use s[8:9] 13460; GFX900-NEXT: ;;#ASMEND 13461; GFX900-NEXT: s_setpc_b64 s[30:31] 13462; 13463; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_5: 13464; GFX90A: ; %bb.0: 13465; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13466; GFX90A-NEXT: ;;#ASMSTART 13467; GFX90A-NEXT: ; def s[4:5] 13468; GFX90A-NEXT: ;;#ASMEND 13469; GFX90A-NEXT: ;;#ASMSTART 13470; GFX90A-NEXT: ; def s[6:7] 13471; GFX90A-NEXT: ;;#ASMEND 13472; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 13473; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 13474; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13475; GFX90A-NEXT: ;;#ASMSTART 13476; GFX90A-NEXT: ; use s[8:9] 13477; GFX90A-NEXT: ;;#ASMEND 13478; GFX90A-NEXT: s_setpc_b64 s[30:31] 13479; 13480; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_5: 13481; GFX940: ; %bb.0: 13482; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13483; GFX940-NEXT: ;;#ASMSTART 13484; GFX940-NEXT: ; def s[0:1] 13485; GFX940-NEXT: ;;#ASMEND 13486; GFX940-NEXT: ;;#ASMSTART 13487; GFX940-NEXT: ; def s[2:3] 13488; GFX940-NEXT: ;;#ASMEND 13489; GFX940-NEXT: s_lshr_b32 s0, s3, 16 13490; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 13491; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13492; GFX940-NEXT: ;;#ASMSTART 13493; GFX940-NEXT: ; use s[8:9] 13494; GFX940-NEXT: ;;#ASMEND 13495; GFX940-NEXT: s_setpc_b64 s[30:31] 13496 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13497 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13498 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 5> 13499 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13500 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13501 ret void 13502} 13503 13504define void @s_shuffle_v3i16_v4i16__7_3_5() { 13505; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_5: 13506; GFX900: ; %bb.0: 13507; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13508; GFX900-NEXT: ;;#ASMSTART 13509; GFX900-NEXT: ; def s[4:5] 13510; GFX900-NEXT: ;;#ASMEND 13511; GFX900-NEXT: ;;#ASMSTART 13512; GFX900-NEXT: ; def s[6:7] 13513; GFX900-NEXT: ;;#ASMEND 13514; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 13515; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13516; GFX900-NEXT: ;;#ASMSTART 13517; GFX900-NEXT: ; use s[8:9] 13518; GFX900-NEXT: ;;#ASMEND 13519; GFX900-NEXT: s_setpc_b64 s[30:31] 13520; 13521; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_5: 13522; GFX90A: ; %bb.0: 13523; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13524; GFX90A-NEXT: ;;#ASMSTART 13525; GFX90A-NEXT: ; def s[4:5] 13526; GFX90A-NEXT: ;;#ASMEND 13527; GFX90A-NEXT: ;;#ASMSTART 13528; GFX90A-NEXT: ; def s[6:7] 13529; GFX90A-NEXT: ;;#ASMEND 13530; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 13531; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13532; GFX90A-NEXT: ;;#ASMSTART 13533; GFX90A-NEXT: ; use s[8:9] 13534; GFX90A-NEXT: ;;#ASMEND 13535; GFX90A-NEXT: s_setpc_b64 s[30:31] 13536; 13537; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_5: 13538; GFX940: ; %bb.0: 13539; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13540; GFX940-NEXT: ;;#ASMSTART 13541; GFX940-NEXT: ; def s[0:1] 13542; GFX940-NEXT: ;;#ASMEND 13543; GFX940-NEXT: ;;#ASMSTART 13544; GFX940-NEXT: ; def s[2:3] 13545; GFX940-NEXT: ;;#ASMEND 13546; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 13547; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13548; GFX940-NEXT: ;;#ASMSTART 13549; GFX940-NEXT: ; use s[8:9] 13550; GFX940-NEXT: ;;#ASMEND 13551; GFX940-NEXT: s_setpc_b64 s[30:31] 13552 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13553 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13554 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 5> 13555 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13556 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13557 ret void 13558} 13559 13560define void @s_shuffle_v3i16_v4i16__7_4_5() { 13561; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_5: 13562; GFX900: ; %bb.0: 13563; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13564; GFX900-NEXT: ;;#ASMSTART 13565; GFX900-NEXT: ; def s[4:5] 13566; GFX900-NEXT: ;;#ASMEND 13567; GFX900-NEXT: s_lshr_b32 s5, s5, 16 13568; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13569; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13570; GFX900-NEXT: ;;#ASMSTART 13571; GFX900-NEXT: ; use s[8:9] 13572; GFX900-NEXT: ;;#ASMEND 13573; GFX900-NEXT: s_setpc_b64 s[30:31] 13574; 13575; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_5: 13576; GFX90A: ; %bb.0: 13577; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13578; GFX90A-NEXT: ;;#ASMSTART 13579; GFX90A-NEXT: ; def s[4:5] 13580; GFX90A-NEXT: ;;#ASMEND 13581; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 13582; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13583; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13584; GFX90A-NEXT: ;;#ASMSTART 13585; GFX90A-NEXT: ; use s[8:9] 13586; GFX90A-NEXT: ;;#ASMEND 13587; GFX90A-NEXT: s_setpc_b64 s[30:31] 13588; 13589; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_5: 13590; GFX940: ; %bb.0: 13591; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13592; GFX940-NEXT: ;;#ASMSTART 13593; GFX940-NEXT: ; def s[0:1] 13594; GFX940-NEXT: ;;#ASMEND 13595; GFX940-NEXT: s_lshr_b32 s1, s1, 16 13596; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 13597; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13598; GFX940-NEXT: ;;#ASMSTART 13599; GFX940-NEXT: ; use s[8:9] 13600; GFX940-NEXT: ;;#ASMEND 13601; GFX940-NEXT: s_setpc_b64 s[30:31] 13602 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13603 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13604 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 5> 13605 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13606 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13607 ret void 13608} 13609 13610define void @s_shuffle_v3i16_v4i16__7_6_5() { 13611; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_5: 13612; GFX900: ; %bb.0: 13613; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13614; GFX900-NEXT: ;;#ASMSTART 13615; GFX900-NEXT: ; def s[4:5] 13616; GFX900-NEXT: ;;#ASMEND 13617; GFX900-NEXT: s_lshr_b32 s6, s5, 16 13618; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 13619; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13620; GFX900-NEXT: ;;#ASMSTART 13621; GFX900-NEXT: ; use s[8:9] 13622; GFX900-NEXT: ;;#ASMEND 13623; GFX900-NEXT: s_setpc_b64 s[30:31] 13624; 13625; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_5: 13626; GFX90A: ; %bb.0: 13627; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13628; GFX90A-NEXT: ;;#ASMSTART 13629; GFX90A-NEXT: ; def s[4:5] 13630; GFX90A-NEXT: ;;#ASMEND 13631; GFX90A-NEXT: s_lshr_b32 s6, s5, 16 13632; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 13633; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13634; GFX90A-NEXT: ;;#ASMSTART 13635; GFX90A-NEXT: ; use s[8:9] 13636; GFX90A-NEXT: ;;#ASMEND 13637; GFX90A-NEXT: s_setpc_b64 s[30:31] 13638; 13639; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_5: 13640; GFX940: ; %bb.0: 13641; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13642; GFX940-NEXT: ;;#ASMSTART 13643; GFX940-NEXT: ; def s[0:1] 13644; GFX940-NEXT: ;;#ASMEND 13645; GFX940-NEXT: s_lshr_b32 s2, s1, 16 13646; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 13647; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13648; GFX940-NEXT: ;;#ASMSTART 13649; GFX940-NEXT: ; use s[8:9] 13650; GFX940-NEXT: ;;#ASMEND 13651; GFX940-NEXT: s_setpc_b64 s[30:31] 13652 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13653 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13654 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 5> 13655 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13656 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13657 ret void 13658} 13659 13660define void @s_shuffle_v3i16_v4i16__u_6_6() { 13661; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_6_6: 13662; GFX9: ; %bb.0: 13663; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13664; GFX9-NEXT: ;;#ASMSTART 13665; GFX9-NEXT: ; def s[8:9] 13666; GFX9-NEXT: ;;#ASMEND 13667; GFX9-NEXT: s_lshl_b32 s8, s9, 16 13668; GFX9-NEXT: ;;#ASMSTART 13669; GFX9-NEXT: ; use s[8:9] 13670; GFX9-NEXT: ;;#ASMEND 13671; GFX9-NEXT: s_setpc_b64 s[30:31] 13672 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13673 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13674 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 6, i32 6> 13675 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13676 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13677 ret void 13678} 13679 13680define void @s_shuffle_v3i16_v4i16__0_6_6() { 13681; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_6_6: 13682; GFX900: ; %bb.0: 13683; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13684; GFX900-NEXT: ;;#ASMSTART 13685; GFX900-NEXT: ; def s[8:9] 13686; GFX900-NEXT: ;;#ASMEND 13687; GFX900-NEXT: ;;#ASMSTART 13688; GFX900-NEXT: ; def s[4:5] 13689; GFX900-NEXT: ;;#ASMEND 13690; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13691; GFX900-NEXT: ;;#ASMSTART 13692; GFX900-NEXT: ; use s[8:9] 13693; GFX900-NEXT: ;;#ASMEND 13694; GFX900-NEXT: s_setpc_b64 s[30:31] 13695; 13696; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_6_6: 13697; GFX90A: ; %bb.0: 13698; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13699; GFX90A-NEXT: ;;#ASMSTART 13700; GFX90A-NEXT: ; def s[8:9] 13701; GFX90A-NEXT: ;;#ASMEND 13702; GFX90A-NEXT: ;;#ASMSTART 13703; GFX90A-NEXT: ; def s[4:5] 13704; GFX90A-NEXT: ;;#ASMEND 13705; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13706; GFX90A-NEXT: ;;#ASMSTART 13707; GFX90A-NEXT: ; use s[8:9] 13708; GFX90A-NEXT: ;;#ASMEND 13709; GFX90A-NEXT: s_setpc_b64 s[30:31] 13710; 13711; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_6_6: 13712; GFX940: ; %bb.0: 13713; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13714; GFX940-NEXT: ;;#ASMSTART 13715; GFX940-NEXT: ; def s[8:9] 13716; GFX940-NEXT: ;;#ASMEND 13717; GFX940-NEXT: ;;#ASMSTART 13718; GFX940-NEXT: ; def s[0:1] 13719; GFX940-NEXT: ;;#ASMEND 13720; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 13721; GFX940-NEXT: ;;#ASMSTART 13722; GFX940-NEXT: ; use s[8:9] 13723; GFX940-NEXT: ;;#ASMEND 13724; GFX940-NEXT: s_setpc_b64 s[30:31] 13725 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13726 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13727 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 6, i32 6> 13728 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13729 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13730 ret void 13731} 13732 13733define void @s_shuffle_v3i16_v4i16__1_6_6() { 13734; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_6_6: 13735; GFX900: ; %bb.0: 13736; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13737; GFX900-NEXT: ;;#ASMSTART 13738; GFX900-NEXT: ; def s[4:5] 13739; GFX900-NEXT: ;;#ASMEND 13740; GFX900-NEXT: ;;#ASMSTART 13741; GFX900-NEXT: ; def s[8:9] 13742; GFX900-NEXT: ;;#ASMEND 13743; GFX900-NEXT: s_lshr_b32 s4, s4, 16 13744; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13745; GFX900-NEXT: ;;#ASMSTART 13746; GFX900-NEXT: ; use s[8:9] 13747; GFX900-NEXT: ;;#ASMEND 13748; GFX900-NEXT: s_setpc_b64 s[30:31] 13749; 13750; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_6_6: 13751; GFX90A: ; %bb.0: 13752; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13753; GFX90A-NEXT: ;;#ASMSTART 13754; GFX90A-NEXT: ; def s[4:5] 13755; GFX90A-NEXT: ;;#ASMEND 13756; GFX90A-NEXT: ;;#ASMSTART 13757; GFX90A-NEXT: ; def s[8:9] 13758; GFX90A-NEXT: ;;#ASMEND 13759; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 13760; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13761; GFX90A-NEXT: ;;#ASMSTART 13762; GFX90A-NEXT: ; use s[8:9] 13763; GFX90A-NEXT: ;;#ASMEND 13764; GFX90A-NEXT: s_setpc_b64 s[30:31] 13765; 13766; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_6_6: 13767; GFX940: ; %bb.0: 13768; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13769; GFX940-NEXT: ;;#ASMSTART 13770; GFX940-NEXT: ; def s[0:1] 13771; GFX940-NEXT: ;;#ASMEND 13772; GFX940-NEXT: ;;#ASMSTART 13773; GFX940-NEXT: ; def s[8:9] 13774; GFX940-NEXT: ;;#ASMEND 13775; GFX940-NEXT: s_lshr_b32 s0, s0, 16 13776; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 13777; GFX940-NEXT: ;;#ASMSTART 13778; GFX940-NEXT: ; use s[8:9] 13779; GFX940-NEXT: ;;#ASMEND 13780; GFX940-NEXT: s_setpc_b64 s[30:31] 13781 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13782 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13783 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 6, i32 6> 13784 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13785 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13786 ret void 13787} 13788 13789define void @s_shuffle_v3i16_v4i16__2_6_6() { 13790; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_6_6: 13791; GFX900: ; %bb.0: 13792; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13793; GFX900-NEXT: ;;#ASMSTART 13794; GFX900-NEXT: ; def s[8:9] 13795; GFX900-NEXT: ;;#ASMEND 13796; GFX900-NEXT: ;;#ASMSTART 13797; GFX900-NEXT: ; def s[4:5] 13798; GFX900-NEXT: ;;#ASMEND 13799; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9 13800; GFX900-NEXT: ;;#ASMSTART 13801; GFX900-NEXT: ; use s[8:9] 13802; GFX900-NEXT: ;;#ASMEND 13803; GFX900-NEXT: s_setpc_b64 s[30:31] 13804; 13805; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_6_6: 13806; GFX90A: ; %bb.0: 13807; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13808; GFX90A-NEXT: ;;#ASMSTART 13809; GFX90A-NEXT: ; def s[8:9] 13810; GFX90A-NEXT: ;;#ASMEND 13811; GFX90A-NEXT: ;;#ASMSTART 13812; GFX90A-NEXT: ; def s[4:5] 13813; GFX90A-NEXT: ;;#ASMEND 13814; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9 13815; GFX90A-NEXT: ;;#ASMSTART 13816; GFX90A-NEXT: ; use s[8:9] 13817; GFX90A-NEXT: ;;#ASMEND 13818; GFX90A-NEXT: s_setpc_b64 s[30:31] 13819; 13820; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_6_6: 13821; GFX940: ; %bb.0: 13822; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13823; GFX940-NEXT: ;;#ASMSTART 13824; GFX940-NEXT: ; def s[8:9] 13825; GFX940-NEXT: ;;#ASMEND 13826; GFX940-NEXT: ;;#ASMSTART 13827; GFX940-NEXT: ; def s[0:1] 13828; GFX940-NEXT: ;;#ASMEND 13829; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 13830; GFX940-NEXT: ;;#ASMSTART 13831; GFX940-NEXT: ; use s[8:9] 13832; GFX940-NEXT: ;;#ASMEND 13833; GFX940-NEXT: s_setpc_b64 s[30:31] 13834 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13835 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13836 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 6, i32 6> 13837 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13838 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13839 ret void 13840} 13841 13842define void @s_shuffle_v3i16_v4i16__3_6_6() { 13843; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_6_6: 13844; GFX900: ; %bb.0: 13845; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13846; GFX900-NEXT: ;;#ASMSTART 13847; GFX900-NEXT: ; def s[4:5] 13848; GFX900-NEXT: ;;#ASMEND 13849; GFX900-NEXT: ;;#ASMSTART 13850; GFX900-NEXT: ; def s[8:9] 13851; GFX900-NEXT: ;;#ASMEND 13852; GFX900-NEXT: s_lshr_b32 s4, s5, 16 13853; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13854; GFX900-NEXT: ;;#ASMSTART 13855; GFX900-NEXT: ; use s[8:9] 13856; GFX900-NEXT: ;;#ASMEND 13857; GFX900-NEXT: s_setpc_b64 s[30:31] 13858; 13859; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_6_6: 13860; GFX90A: ; %bb.0: 13861; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13862; GFX90A-NEXT: ;;#ASMSTART 13863; GFX90A-NEXT: ; def s[4:5] 13864; GFX90A-NEXT: ;;#ASMEND 13865; GFX90A-NEXT: ;;#ASMSTART 13866; GFX90A-NEXT: ; def s[8:9] 13867; GFX90A-NEXT: ;;#ASMEND 13868; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 13869; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13870; GFX90A-NEXT: ;;#ASMSTART 13871; GFX90A-NEXT: ; use s[8:9] 13872; GFX90A-NEXT: ;;#ASMEND 13873; GFX90A-NEXT: s_setpc_b64 s[30:31] 13874; 13875; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_6_6: 13876; GFX940: ; %bb.0: 13877; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13878; GFX940-NEXT: ;;#ASMSTART 13879; GFX940-NEXT: ; def s[0:1] 13880; GFX940-NEXT: ;;#ASMEND 13881; GFX940-NEXT: ;;#ASMSTART 13882; GFX940-NEXT: ; def s[8:9] 13883; GFX940-NEXT: ;;#ASMEND 13884; GFX940-NEXT: s_lshr_b32 s0, s1, 16 13885; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 13886; GFX940-NEXT: ;;#ASMSTART 13887; GFX940-NEXT: ; use s[8:9] 13888; GFX940-NEXT: ;;#ASMEND 13889; GFX940-NEXT: s_setpc_b64 s[30:31] 13890 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13891 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13892 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 6, i32 6> 13893 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13894 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13895 ret void 13896} 13897 13898define void @s_shuffle_v3i16_v4i16__4_6_6() { 13899; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_6_6: 13900; GFX9: ; %bb.0: 13901; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13902; GFX9-NEXT: ;;#ASMSTART 13903; GFX9-NEXT: ; def s[8:9] 13904; GFX9-NEXT: ;;#ASMEND 13905; GFX9-NEXT: s_pack_ll_b32_b16 s8, s8, s9 13906; GFX9-NEXT: ;;#ASMSTART 13907; GFX9-NEXT: ; use s[8:9] 13908; GFX9-NEXT: ;;#ASMEND 13909; GFX9-NEXT: s_setpc_b64 s[30:31] 13910 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13911 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13912 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 6, i32 6> 13913 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13914 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13915 ret void 13916} 13917 13918define void @s_shuffle_v3i16_v4i16__5_6_6() { 13919; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_6_6: 13920; GFX900: ; %bb.0: 13921; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13922; GFX900-NEXT: ;;#ASMSTART 13923; GFX900-NEXT: ; def s[8:9] 13924; GFX900-NEXT: ;;#ASMEND 13925; GFX900-NEXT: s_lshr_b32 s4, s8, 16 13926; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13927; GFX900-NEXT: ;;#ASMSTART 13928; GFX900-NEXT: ; use s[8:9] 13929; GFX900-NEXT: ;;#ASMEND 13930; GFX900-NEXT: s_setpc_b64 s[30:31] 13931; 13932; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_6_6: 13933; GFX90A: ; %bb.0: 13934; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13935; GFX90A-NEXT: ;;#ASMSTART 13936; GFX90A-NEXT: ; def s[8:9] 13937; GFX90A-NEXT: ;;#ASMEND 13938; GFX90A-NEXT: s_lshr_b32 s4, s8, 16 13939; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13940; GFX90A-NEXT: ;;#ASMSTART 13941; GFX90A-NEXT: ; use s[8:9] 13942; GFX90A-NEXT: ;;#ASMEND 13943; GFX90A-NEXT: s_setpc_b64 s[30:31] 13944; 13945; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_6_6: 13946; GFX940: ; %bb.0: 13947; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13948; GFX940-NEXT: ;;#ASMSTART 13949; GFX940-NEXT: ; def s[8:9] 13950; GFX940-NEXT: ;;#ASMEND 13951; GFX940-NEXT: s_lshr_b32 s0, s8, 16 13952; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 13953; GFX940-NEXT: ;;#ASMSTART 13954; GFX940-NEXT: ; use s[8:9] 13955; GFX940-NEXT: ;;#ASMEND 13956; GFX940-NEXT: s_setpc_b64 s[30:31] 13957 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13958 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13959 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 6, i32 6> 13960 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13961 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13962 ret void 13963} 13964 13965define void @s_shuffle_v3i16_v4i16__6_6_6() { 13966; GFX9-LABEL: s_shuffle_v3i16_v4i16__6_6_6: 13967; GFX9: ; %bb.0: 13968; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13969; GFX9-NEXT: ;;#ASMSTART 13970; GFX9-NEXT: ; def s[8:9] 13971; GFX9-NEXT: ;;#ASMEND 13972; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9 13973; GFX9-NEXT: ;;#ASMSTART 13974; GFX9-NEXT: ; use s[8:9] 13975; GFX9-NEXT: ;;#ASMEND 13976; GFX9-NEXT: s_setpc_b64 s[30:31] 13977 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13978 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13979 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 6, i32 6> 13980 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13981 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 13982 ret void 13983} 13984 13985define void @s_shuffle_v3i16_v4i16__7_6_6() { 13986; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_6: 13987; GFX900: ; %bb.0: 13988; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13989; GFX900-NEXT: ;;#ASMSTART 13990; GFX900-NEXT: ; def s[8:9] 13991; GFX900-NEXT: ;;#ASMEND 13992; GFX900-NEXT: s_lshr_b32 s4, s9, 16 13993; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13994; GFX900-NEXT: ;;#ASMSTART 13995; GFX900-NEXT: ; use s[8:9] 13996; GFX900-NEXT: ;;#ASMEND 13997; GFX900-NEXT: s_setpc_b64 s[30:31] 13998; 13999; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_6: 14000; GFX90A: ; %bb.0: 14001; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14002; GFX90A-NEXT: ;;#ASMSTART 14003; GFX90A-NEXT: ; def s[8:9] 14004; GFX90A-NEXT: ;;#ASMEND 14005; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 14006; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14007; GFX90A-NEXT: ;;#ASMSTART 14008; GFX90A-NEXT: ; use s[8:9] 14009; GFX90A-NEXT: ;;#ASMEND 14010; GFX90A-NEXT: s_setpc_b64 s[30:31] 14011; 14012; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_6: 14013; GFX940: ; %bb.0: 14014; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14015; GFX940-NEXT: ;;#ASMSTART 14016; GFX940-NEXT: ; def s[8:9] 14017; GFX940-NEXT: ;;#ASMEND 14018; GFX940-NEXT: s_lshr_b32 s0, s9, 16 14019; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 14020; GFX940-NEXT: ;;#ASMSTART 14021; GFX940-NEXT: ; use s[8:9] 14022; GFX940-NEXT: ;;#ASMEND 14023; GFX940-NEXT: s_setpc_b64 s[30:31] 14024 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14025 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14026 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 6> 14027 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14028 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14029 ret void 14030} 14031 14032define void @s_shuffle_v3i16_v4i16__7_u_6() { 14033; GFX9-LABEL: s_shuffle_v3i16_v4i16__7_u_6: 14034; GFX9: ; %bb.0: 14035; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14036; GFX9-NEXT: ;;#ASMSTART 14037; GFX9-NEXT: ; def s[8:9] 14038; GFX9-NEXT: ;;#ASMEND 14039; GFX9-NEXT: s_lshr_b32 s8, s9, 16 14040; GFX9-NEXT: ;;#ASMSTART 14041; GFX9-NEXT: ; use s[8:9] 14042; GFX9-NEXT: ;;#ASMEND 14043; GFX9-NEXT: s_setpc_b64 s[30:31] 14044 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14045 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14046 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 6> 14047 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14048 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14049 ret void 14050} 14051 14052define void @s_shuffle_v3i16_v4i16__7_0_6() { 14053; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_6: 14054; GFX900: ; %bb.0: 14055; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14056; GFX900-NEXT: ;;#ASMSTART 14057; GFX900-NEXT: ; def s[4:5] 14058; GFX900-NEXT: ;;#ASMEND 14059; GFX900-NEXT: ;;#ASMSTART 14060; GFX900-NEXT: ; def s[8:9] 14061; GFX900-NEXT: ;;#ASMEND 14062; GFX900-NEXT: s_lshr_b32 s5, s9, 16 14063; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14064; GFX900-NEXT: ;;#ASMSTART 14065; GFX900-NEXT: ; use s[8:9] 14066; GFX900-NEXT: ;;#ASMEND 14067; GFX900-NEXT: s_setpc_b64 s[30:31] 14068; 14069; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_6: 14070; GFX90A: ; %bb.0: 14071; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14072; GFX90A-NEXT: ;;#ASMSTART 14073; GFX90A-NEXT: ; def s[4:5] 14074; GFX90A-NEXT: ;;#ASMEND 14075; GFX90A-NEXT: ;;#ASMSTART 14076; GFX90A-NEXT: ; def s[8:9] 14077; GFX90A-NEXT: ;;#ASMEND 14078; GFX90A-NEXT: s_lshr_b32 s5, s9, 16 14079; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14080; GFX90A-NEXT: ;;#ASMSTART 14081; GFX90A-NEXT: ; use s[8:9] 14082; GFX90A-NEXT: ;;#ASMEND 14083; GFX90A-NEXT: s_setpc_b64 s[30:31] 14084; 14085; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_6: 14086; GFX940: ; %bb.0: 14087; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14088; GFX940-NEXT: ;;#ASMSTART 14089; GFX940-NEXT: ; def s[0:1] 14090; GFX940-NEXT: ;;#ASMEND 14091; GFX940-NEXT: ;;#ASMSTART 14092; GFX940-NEXT: ; def s[8:9] 14093; GFX940-NEXT: ;;#ASMEND 14094; GFX940-NEXT: s_lshr_b32 s1, s9, 16 14095; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 14096; GFX940-NEXT: ;;#ASMSTART 14097; GFX940-NEXT: ; use s[8:9] 14098; GFX940-NEXT: ;;#ASMEND 14099; GFX940-NEXT: s_setpc_b64 s[30:31] 14100 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14101 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14102 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 6> 14103 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14104 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14105 ret void 14106} 14107 14108define void @s_shuffle_v3i16_v4i16__7_1_6() { 14109; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_6: 14110; GFX900: ; %bb.0: 14111; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14112; GFX900-NEXT: ;;#ASMSTART 14113; GFX900-NEXT: ; def s[8:9] 14114; GFX900-NEXT: ;;#ASMEND 14115; GFX900-NEXT: ;;#ASMSTART 14116; GFX900-NEXT: ; def s[4:5] 14117; GFX900-NEXT: ;;#ASMEND 14118; GFX900-NEXT: s_pack_hh_b32_b16 s8, s9, s4 14119; GFX900-NEXT: ;;#ASMSTART 14120; GFX900-NEXT: ; use s[8:9] 14121; GFX900-NEXT: ;;#ASMEND 14122; GFX900-NEXT: s_setpc_b64 s[30:31] 14123; 14124; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_6: 14125; GFX90A: ; %bb.0: 14126; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14127; GFX90A-NEXT: ;;#ASMSTART 14128; GFX90A-NEXT: ; def s[8:9] 14129; GFX90A-NEXT: ;;#ASMEND 14130; GFX90A-NEXT: ;;#ASMSTART 14131; GFX90A-NEXT: ; def s[4:5] 14132; GFX90A-NEXT: ;;#ASMEND 14133; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s9, s4 14134; GFX90A-NEXT: ;;#ASMSTART 14135; GFX90A-NEXT: ; use s[8:9] 14136; GFX90A-NEXT: ;;#ASMEND 14137; GFX90A-NEXT: s_setpc_b64 s[30:31] 14138; 14139; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_6: 14140; GFX940: ; %bb.0: 14141; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14142; GFX940-NEXT: ;;#ASMSTART 14143; GFX940-NEXT: ; def s[8:9] 14144; GFX940-NEXT: ;;#ASMEND 14145; GFX940-NEXT: ;;#ASMSTART 14146; GFX940-NEXT: ; def s[0:1] 14147; GFX940-NEXT: ;;#ASMEND 14148; GFX940-NEXT: s_pack_hh_b32_b16 s8, s9, s0 14149; GFX940-NEXT: ;;#ASMSTART 14150; GFX940-NEXT: ; use s[8:9] 14151; GFX940-NEXT: ;;#ASMEND 14152; GFX940-NEXT: s_setpc_b64 s[30:31] 14153 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14154 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14155 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 6> 14156 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14157 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14158 ret void 14159} 14160 14161define void @s_shuffle_v3i16_v4i16__7_2_6() { 14162; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_6: 14163; GFX900: ; %bb.0: 14164; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14165; GFX900-NEXT: ;;#ASMSTART 14166; GFX900-NEXT: ; def s[4:5] 14167; GFX900-NEXT: ;;#ASMEND 14168; GFX900-NEXT: ;;#ASMSTART 14169; GFX900-NEXT: ; def s[8:9] 14170; GFX900-NEXT: ;;#ASMEND 14171; GFX900-NEXT: s_lshr_b32 s4, s9, 16 14172; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 14173; GFX900-NEXT: ;;#ASMSTART 14174; GFX900-NEXT: ; use s[8:9] 14175; GFX900-NEXT: ;;#ASMEND 14176; GFX900-NEXT: s_setpc_b64 s[30:31] 14177; 14178; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_6: 14179; GFX90A: ; %bb.0: 14180; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14181; GFX90A-NEXT: ;;#ASMSTART 14182; GFX90A-NEXT: ; def s[4:5] 14183; GFX90A-NEXT: ;;#ASMEND 14184; GFX90A-NEXT: ;;#ASMSTART 14185; GFX90A-NEXT: ; def s[8:9] 14186; GFX90A-NEXT: ;;#ASMEND 14187; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 14188; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 14189; GFX90A-NEXT: ;;#ASMSTART 14190; GFX90A-NEXT: ; use s[8:9] 14191; GFX90A-NEXT: ;;#ASMEND 14192; GFX90A-NEXT: s_setpc_b64 s[30:31] 14193; 14194; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_6: 14195; GFX940: ; %bb.0: 14196; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14197; GFX940-NEXT: ;;#ASMSTART 14198; GFX940-NEXT: ; def s[0:1] 14199; GFX940-NEXT: ;;#ASMEND 14200; GFX940-NEXT: ;;#ASMSTART 14201; GFX940-NEXT: ; def s[8:9] 14202; GFX940-NEXT: ;;#ASMEND 14203; GFX940-NEXT: s_lshr_b32 s0, s9, 16 14204; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 14205; GFX940-NEXT: ;;#ASMSTART 14206; GFX940-NEXT: ; use s[8:9] 14207; GFX940-NEXT: ;;#ASMEND 14208; GFX940-NEXT: s_setpc_b64 s[30:31] 14209 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14210 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14211 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 6> 14212 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14213 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14214 ret void 14215} 14216 14217define void @s_shuffle_v3i16_v4i16__7_3_6() { 14218; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_6: 14219; GFX900: ; %bb.0: 14220; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14221; GFX900-NEXT: ;;#ASMSTART 14222; GFX900-NEXT: ; def s[8:9] 14223; GFX900-NEXT: ;;#ASMEND 14224; GFX900-NEXT: ;;#ASMSTART 14225; GFX900-NEXT: ; def s[4:5] 14226; GFX900-NEXT: ;;#ASMEND 14227; GFX900-NEXT: s_pack_hh_b32_b16 s8, s9, s5 14228; GFX900-NEXT: ;;#ASMSTART 14229; GFX900-NEXT: ; use s[8:9] 14230; GFX900-NEXT: ;;#ASMEND 14231; GFX900-NEXT: s_setpc_b64 s[30:31] 14232; 14233; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_6: 14234; GFX90A: ; %bb.0: 14235; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14236; GFX90A-NEXT: ;;#ASMSTART 14237; GFX90A-NEXT: ; def s[8:9] 14238; GFX90A-NEXT: ;;#ASMEND 14239; GFX90A-NEXT: ;;#ASMSTART 14240; GFX90A-NEXT: ; def s[4:5] 14241; GFX90A-NEXT: ;;#ASMEND 14242; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s9, s5 14243; GFX90A-NEXT: ;;#ASMSTART 14244; GFX90A-NEXT: ; use s[8:9] 14245; GFX90A-NEXT: ;;#ASMEND 14246; GFX90A-NEXT: s_setpc_b64 s[30:31] 14247; 14248; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_6: 14249; GFX940: ; %bb.0: 14250; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14251; GFX940-NEXT: ;;#ASMSTART 14252; GFX940-NEXT: ; def s[8:9] 14253; GFX940-NEXT: ;;#ASMEND 14254; GFX940-NEXT: ;;#ASMSTART 14255; GFX940-NEXT: ; def s[0:1] 14256; GFX940-NEXT: ;;#ASMEND 14257; GFX940-NEXT: s_pack_hh_b32_b16 s8, s9, s1 14258; GFX940-NEXT: ;;#ASMSTART 14259; GFX940-NEXT: ; use s[8:9] 14260; GFX940-NEXT: ;;#ASMEND 14261; GFX940-NEXT: s_setpc_b64 s[30:31] 14262 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14263 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14264 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 6> 14265 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14266 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14267 ret void 14268} 14269 14270define void @s_shuffle_v3i16_v4i16__7_4_6() { 14271; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_6: 14272; GFX900: ; %bb.0: 14273; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14274; GFX900-NEXT: ;;#ASMSTART 14275; GFX900-NEXT: ; def s[8:9] 14276; GFX900-NEXT: ;;#ASMEND 14277; GFX900-NEXT: s_lshr_b32 s4, s9, 16 14278; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s8 14279; GFX900-NEXT: ;;#ASMSTART 14280; GFX900-NEXT: ; use s[8:9] 14281; GFX900-NEXT: ;;#ASMEND 14282; GFX900-NEXT: s_setpc_b64 s[30:31] 14283; 14284; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_6: 14285; GFX90A: ; %bb.0: 14286; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14287; GFX90A-NEXT: ;;#ASMSTART 14288; GFX90A-NEXT: ; def s[8:9] 14289; GFX90A-NEXT: ;;#ASMEND 14290; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 14291; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s8 14292; GFX90A-NEXT: ;;#ASMSTART 14293; GFX90A-NEXT: ; use s[8:9] 14294; GFX90A-NEXT: ;;#ASMEND 14295; GFX90A-NEXT: s_setpc_b64 s[30:31] 14296; 14297; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_6: 14298; GFX940: ; %bb.0: 14299; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14300; GFX940-NEXT: ;;#ASMSTART 14301; GFX940-NEXT: ; def s[8:9] 14302; GFX940-NEXT: ;;#ASMEND 14303; GFX940-NEXT: s_lshr_b32 s0, s9, 16 14304; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8 14305; GFX940-NEXT: ;;#ASMSTART 14306; GFX940-NEXT: ; use s[8:9] 14307; GFX940-NEXT: ;;#ASMEND 14308; GFX940-NEXT: s_setpc_b64 s[30:31] 14309 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14310 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14311 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 6> 14312 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14313 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14314 ret void 14315} 14316 14317define void @s_shuffle_v3i16_v4i16__7_5_6() { 14318; GFX9-LABEL: s_shuffle_v3i16_v4i16__7_5_6: 14319; GFX9: ; %bb.0: 14320; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14321; GFX9-NEXT: ;;#ASMSTART 14322; GFX9-NEXT: ; def s[8:9] 14323; GFX9-NEXT: ;;#ASMEND 14324; GFX9-NEXT: s_pack_hh_b32_b16 s8, s9, s8 14325; GFX9-NEXT: ;;#ASMSTART 14326; GFX9-NEXT: ; use s[8:9] 14327; GFX9-NEXT: ;;#ASMEND 14328; GFX9-NEXT: s_setpc_b64 s[30:31] 14329 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14330 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14331 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 6> 14332 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14333 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14334 ret void 14335} 14336 14337define void @s_shuffle_v3i16_v4i16__u_7_7() { 14338; GFX900-LABEL: s_shuffle_v3i16_v4i16__u_7_7: 14339; GFX900: ; %bb.0: 14340; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14341; GFX900-NEXT: ;;#ASMSTART 14342; GFX900-NEXT: ; def s[4:5] 14343; GFX900-NEXT: ;;#ASMEND 14344; GFX900-NEXT: s_lshr_b32 s9, s5, 16 14345; GFX900-NEXT: s_mov_b32 s8, s5 14346; GFX900-NEXT: ;;#ASMSTART 14347; GFX900-NEXT: ; use s[8:9] 14348; GFX900-NEXT: ;;#ASMEND 14349; GFX900-NEXT: s_setpc_b64 s[30:31] 14350; 14351; GFX90A-LABEL: s_shuffle_v3i16_v4i16__u_7_7: 14352; GFX90A: ; %bb.0: 14353; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14354; GFX90A-NEXT: ;;#ASMSTART 14355; GFX90A-NEXT: ; def s[4:5] 14356; GFX90A-NEXT: ;;#ASMEND 14357; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 14358; GFX90A-NEXT: s_mov_b32 s8, s5 14359; GFX90A-NEXT: ;;#ASMSTART 14360; GFX90A-NEXT: ; use s[8:9] 14361; GFX90A-NEXT: ;;#ASMEND 14362; GFX90A-NEXT: s_setpc_b64 s[30:31] 14363; 14364; GFX940-LABEL: s_shuffle_v3i16_v4i16__u_7_7: 14365; GFX940: ; %bb.0: 14366; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14367; GFX940-NEXT: ;;#ASMSTART 14368; GFX940-NEXT: ; def s[0:1] 14369; GFX940-NEXT: ;;#ASMEND 14370; GFX940-NEXT: s_lshr_b32 s9, s1, 16 14371; GFX940-NEXT: s_mov_b32 s8, s1 14372; GFX940-NEXT: ;;#ASMSTART 14373; GFX940-NEXT: ; use s[8:9] 14374; GFX940-NEXT: ;;#ASMEND 14375; GFX940-NEXT: s_setpc_b64 s[30:31] 14376 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14377 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14378 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 7, i32 7> 14379 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14380 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14381 ret void 14382} 14383 14384define void @s_shuffle_v3i16_v4i16__0_7_7() { 14385; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_7_7: 14386; GFX900: ; %bb.0: 14387; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14388; GFX900-NEXT: ;;#ASMSTART 14389; GFX900-NEXT: ; def s[4:5] 14390; GFX900-NEXT: ;;#ASMEND 14391; GFX900-NEXT: ;;#ASMSTART 14392; GFX900-NEXT: ; def s[6:7] 14393; GFX900-NEXT: ;;#ASMEND 14394; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s7 14395; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14396; GFX900-NEXT: ;;#ASMSTART 14397; GFX900-NEXT: ; use s[8:9] 14398; GFX900-NEXT: ;;#ASMEND 14399; GFX900-NEXT: s_setpc_b64 s[30:31] 14400; 14401; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_7_7: 14402; GFX90A: ; %bb.0: 14403; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14404; GFX90A-NEXT: ;;#ASMSTART 14405; GFX90A-NEXT: ; def s[4:5] 14406; GFX90A-NEXT: ;;#ASMEND 14407; GFX90A-NEXT: ;;#ASMSTART 14408; GFX90A-NEXT: ; def s[6:7] 14409; GFX90A-NEXT: ;;#ASMEND 14410; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s7 14411; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14412; GFX90A-NEXT: ;;#ASMSTART 14413; GFX90A-NEXT: ; use s[8:9] 14414; GFX90A-NEXT: ;;#ASMEND 14415; GFX90A-NEXT: s_setpc_b64 s[30:31] 14416; 14417; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_7_7: 14418; GFX940: ; %bb.0: 14419; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14420; GFX940-NEXT: ;;#ASMSTART 14421; GFX940-NEXT: ; def s[0:1] 14422; GFX940-NEXT: ;;#ASMEND 14423; GFX940-NEXT: ;;#ASMSTART 14424; GFX940-NEXT: ; def s[2:3] 14425; GFX940-NEXT: ;;#ASMEND 14426; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s3 14427; GFX940-NEXT: s_lshr_b32 s9, s3, 16 14428; GFX940-NEXT: ;;#ASMSTART 14429; GFX940-NEXT: ; use s[8:9] 14430; GFX940-NEXT: ;;#ASMEND 14431; GFX940-NEXT: s_setpc_b64 s[30:31] 14432 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14433 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14434 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 7, i32 7> 14435 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14436 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14437 ret void 14438} 14439 14440define void @s_shuffle_v3i16_v4i16__1_7_7() { 14441; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_7_7: 14442; GFX900: ; %bb.0: 14443; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14444; GFX900-NEXT: ;;#ASMSTART 14445; GFX900-NEXT: ; def s[4:5] 14446; GFX900-NEXT: ;;#ASMEND 14447; GFX900-NEXT: ;;#ASMSTART 14448; GFX900-NEXT: ; def s[6:7] 14449; GFX900-NEXT: ;;#ASMEND 14450; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s7 14451; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14452; GFX900-NEXT: ;;#ASMSTART 14453; GFX900-NEXT: ; use s[8:9] 14454; GFX900-NEXT: ;;#ASMEND 14455; GFX900-NEXT: s_setpc_b64 s[30:31] 14456; 14457; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_7_7: 14458; GFX90A: ; %bb.0: 14459; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14460; GFX90A-NEXT: ;;#ASMSTART 14461; GFX90A-NEXT: ; def s[4:5] 14462; GFX90A-NEXT: ;;#ASMEND 14463; GFX90A-NEXT: ;;#ASMSTART 14464; GFX90A-NEXT: ; def s[6:7] 14465; GFX90A-NEXT: ;;#ASMEND 14466; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s7 14467; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14468; GFX90A-NEXT: ;;#ASMSTART 14469; GFX90A-NEXT: ; use s[8:9] 14470; GFX90A-NEXT: ;;#ASMEND 14471; GFX90A-NEXT: s_setpc_b64 s[30:31] 14472; 14473; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_7_7: 14474; GFX940: ; %bb.0: 14475; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14476; GFX940-NEXT: ;;#ASMSTART 14477; GFX940-NEXT: ; def s[0:1] 14478; GFX940-NEXT: ;;#ASMEND 14479; GFX940-NEXT: ;;#ASMSTART 14480; GFX940-NEXT: ; def s[2:3] 14481; GFX940-NEXT: ;;#ASMEND 14482; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s3 14483; GFX940-NEXT: s_lshr_b32 s9, s3, 16 14484; GFX940-NEXT: ;;#ASMSTART 14485; GFX940-NEXT: ; use s[8:9] 14486; GFX940-NEXT: ;;#ASMEND 14487; GFX940-NEXT: s_setpc_b64 s[30:31] 14488 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14489 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14490 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 7, i32 7> 14491 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14492 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14493 ret void 14494} 14495 14496define void @s_shuffle_v3i16_v4i16__2_7_7() { 14497; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_7_7: 14498; GFX900: ; %bb.0: 14499; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14500; GFX900-NEXT: ;;#ASMSTART 14501; GFX900-NEXT: ; def s[4:5] 14502; GFX900-NEXT: ;;#ASMEND 14503; GFX900-NEXT: ;;#ASMSTART 14504; GFX900-NEXT: ; def s[6:7] 14505; GFX900-NEXT: ;;#ASMEND 14506; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s7 14507; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14508; GFX900-NEXT: ;;#ASMSTART 14509; GFX900-NEXT: ; use s[8:9] 14510; GFX900-NEXT: ;;#ASMEND 14511; GFX900-NEXT: s_setpc_b64 s[30:31] 14512; 14513; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_7_7: 14514; GFX90A: ; %bb.0: 14515; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14516; GFX90A-NEXT: ;;#ASMSTART 14517; GFX90A-NEXT: ; def s[4:5] 14518; GFX90A-NEXT: ;;#ASMEND 14519; GFX90A-NEXT: ;;#ASMSTART 14520; GFX90A-NEXT: ; def s[6:7] 14521; GFX90A-NEXT: ;;#ASMEND 14522; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s7 14523; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14524; GFX90A-NEXT: ;;#ASMSTART 14525; GFX90A-NEXT: ; use s[8:9] 14526; GFX90A-NEXT: ;;#ASMEND 14527; GFX90A-NEXT: s_setpc_b64 s[30:31] 14528; 14529; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_7_7: 14530; GFX940: ; %bb.0: 14531; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14532; GFX940-NEXT: ;;#ASMSTART 14533; GFX940-NEXT: ; def s[0:1] 14534; GFX940-NEXT: ;;#ASMEND 14535; GFX940-NEXT: ;;#ASMSTART 14536; GFX940-NEXT: ; def s[2:3] 14537; GFX940-NEXT: ;;#ASMEND 14538; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s3 14539; GFX940-NEXT: s_lshr_b32 s9, s3, 16 14540; GFX940-NEXT: ;;#ASMSTART 14541; GFX940-NEXT: ; use s[8:9] 14542; GFX940-NEXT: ;;#ASMEND 14543; GFX940-NEXT: s_setpc_b64 s[30:31] 14544 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14545 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14546 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 7, i32 7> 14547 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14548 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14549 ret void 14550} 14551 14552define void @s_shuffle_v3i16_v4i16__3_7_7() { 14553; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_7_7: 14554; GFX900: ; %bb.0: 14555; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14556; GFX900-NEXT: ;;#ASMSTART 14557; GFX900-NEXT: ; def s[4:5] 14558; GFX900-NEXT: ;;#ASMEND 14559; GFX900-NEXT: ;;#ASMSTART 14560; GFX900-NEXT: ; def s[6:7] 14561; GFX900-NEXT: ;;#ASMEND 14562; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s7 14563; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14564; GFX900-NEXT: ;;#ASMSTART 14565; GFX900-NEXT: ; use s[8:9] 14566; GFX900-NEXT: ;;#ASMEND 14567; GFX900-NEXT: s_setpc_b64 s[30:31] 14568; 14569; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_7_7: 14570; GFX90A: ; %bb.0: 14571; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14572; GFX90A-NEXT: ;;#ASMSTART 14573; GFX90A-NEXT: ; def s[4:5] 14574; GFX90A-NEXT: ;;#ASMEND 14575; GFX90A-NEXT: ;;#ASMSTART 14576; GFX90A-NEXT: ; def s[6:7] 14577; GFX90A-NEXT: ;;#ASMEND 14578; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s7 14579; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14580; GFX90A-NEXT: ;;#ASMSTART 14581; GFX90A-NEXT: ; use s[8:9] 14582; GFX90A-NEXT: ;;#ASMEND 14583; GFX90A-NEXT: s_setpc_b64 s[30:31] 14584; 14585; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_7_7: 14586; GFX940: ; %bb.0: 14587; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14588; GFX940-NEXT: ;;#ASMSTART 14589; GFX940-NEXT: ; def s[0:1] 14590; GFX940-NEXT: ;;#ASMEND 14591; GFX940-NEXT: ;;#ASMSTART 14592; GFX940-NEXT: ; def s[2:3] 14593; GFX940-NEXT: ;;#ASMEND 14594; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s3 14595; GFX940-NEXT: s_lshr_b32 s9, s3, 16 14596; GFX940-NEXT: ;;#ASMSTART 14597; GFX940-NEXT: ; use s[8:9] 14598; GFX940-NEXT: ;;#ASMEND 14599; GFX940-NEXT: s_setpc_b64 s[30:31] 14600 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14601 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14602 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 7, i32 7> 14603 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14604 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14605 ret void 14606} 14607 14608define void @s_shuffle_v3i16_v4i16__4_7_7() { 14609; GFX900-LABEL: s_shuffle_v3i16_v4i16__4_7_7: 14610; GFX900: ; %bb.0: 14611; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14612; GFX900-NEXT: ;;#ASMSTART 14613; GFX900-NEXT: ; def s[4:5] 14614; GFX900-NEXT: ;;#ASMEND 14615; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s5 14616; GFX900-NEXT: s_lshr_b32 s9, s5, 16 14617; GFX900-NEXT: ;;#ASMSTART 14618; GFX900-NEXT: ; use s[8:9] 14619; GFX900-NEXT: ;;#ASMEND 14620; GFX900-NEXT: s_setpc_b64 s[30:31] 14621; 14622; GFX90A-LABEL: s_shuffle_v3i16_v4i16__4_7_7: 14623; GFX90A: ; %bb.0: 14624; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14625; GFX90A-NEXT: ;;#ASMSTART 14626; GFX90A-NEXT: ; def s[4:5] 14627; GFX90A-NEXT: ;;#ASMEND 14628; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s5 14629; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 14630; GFX90A-NEXT: ;;#ASMSTART 14631; GFX90A-NEXT: ; use s[8:9] 14632; GFX90A-NEXT: ;;#ASMEND 14633; GFX90A-NEXT: s_setpc_b64 s[30:31] 14634; 14635; GFX940-LABEL: s_shuffle_v3i16_v4i16__4_7_7: 14636; GFX940: ; %bb.0: 14637; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14638; GFX940-NEXT: ;;#ASMSTART 14639; GFX940-NEXT: ; def s[0:1] 14640; GFX940-NEXT: ;;#ASMEND 14641; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 14642; GFX940-NEXT: s_lshr_b32 s9, s1, 16 14643; GFX940-NEXT: ;;#ASMSTART 14644; GFX940-NEXT: ; use s[8:9] 14645; GFX940-NEXT: ;;#ASMEND 14646; GFX940-NEXT: s_setpc_b64 s[30:31] 14647 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14648 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14649 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 7, i32 7> 14650 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14651 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14652 ret void 14653} 14654 14655define void @s_shuffle_v3i16_v4i16__5_7_7() { 14656; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_7_7: 14657; GFX900: ; %bb.0: 14658; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14659; GFX900-NEXT: ;;#ASMSTART 14660; GFX900-NEXT: ; def s[4:5] 14661; GFX900-NEXT: ;;#ASMEND 14662; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s5 14663; GFX900-NEXT: s_lshr_b32 s9, s5, 16 14664; GFX900-NEXT: ;;#ASMSTART 14665; GFX900-NEXT: ; use s[8:9] 14666; GFX900-NEXT: ;;#ASMEND 14667; GFX900-NEXT: s_setpc_b64 s[30:31] 14668; 14669; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_7_7: 14670; GFX90A: ; %bb.0: 14671; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14672; GFX90A-NEXT: ;;#ASMSTART 14673; GFX90A-NEXT: ; def s[4:5] 14674; GFX90A-NEXT: ;;#ASMEND 14675; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s5 14676; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 14677; GFX90A-NEXT: ;;#ASMSTART 14678; GFX90A-NEXT: ; use s[8:9] 14679; GFX90A-NEXT: ;;#ASMEND 14680; GFX90A-NEXT: s_setpc_b64 s[30:31] 14681; 14682; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_7_7: 14683; GFX940: ; %bb.0: 14684; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14685; GFX940-NEXT: ;;#ASMSTART 14686; GFX940-NEXT: ; def s[0:1] 14687; GFX940-NEXT: ;;#ASMEND 14688; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 14689; GFX940-NEXT: s_lshr_b32 s9, s1, 16 14690; GFX940-NEXT: ;;#ASMSTART 14691; GFX940-NEXT: ; use s[8:9] 14692; GFX940-NEXT: ;;#ASMEND 14693; GFX940-NEXT: s_setpc_b64 s[30:31] 14694 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14695 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14696 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 7, i32 7> 14697 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14698 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14699 ret void 14700} 14701 14702define void @s_shuffle_v3i16_v4i16__6_7_7() { 14703; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_7_7: 14704; GFX900: ; %bb.0: 14705; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14706; GFX900-NEXT: ;;#ASMSTART 14707; GFX900-NEXT: ; def s[4:5] 14708; GFX900-NEXT: ;;#ASMEND 14709; GFX900-NEXT: s_lshr_b32 s9, s5, 16 14710; GFX900-NEXT: s_mov_b32 s8, s5 14711; GFX900-NEXT: ;;#ASMSTART 14712; GFX900-NEXT: ; use s[8:9] 14713; GFX900-NEXT: ;;#ASMEND 14714; GFX900-NEXT: s_setpc_b64 s[30:31] 14715; 14716; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_7_7: 14717; GFX90A: ; %bb.0: 14718; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14719; GFX90A-NEXT: ;;#ASMSTART 14720; GFX90A-NEXT: ; def s[4:5] 14721; GFX90A-NEXT: ;;#ASMEND 14722; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 14723; GFX90A-NEXT: s_mov_b32 s8, s5 14724; GFX90A-NEXT: ;;#ASMSTART 14725; GFX90A-NEXT: ; use s[8:9] 14726; GFX90A-NEXT: ;;#ASMEND 14727; GFX90A-NEXT: s_setpc_b64 s[30:31] 14728; 14729; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_7_7: 14730; GFX940: ; %bb.0: 14731; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14732; GFX940-NEXT: ;;#ASMSTART 14733; GFX940-NEXT: ; def s[0:1] 14734; GFX940-NEXT: ;;#ASMEND 14735; GFX940-NEXT: s_lshr_b32 s9, s1, 16 14736; GFX940-NEXT: s_mov_b32 s8, s1 14737; GFX940-NEXT: ;;#ASMSTART 14738; GFX940-NEXT: ; use s[8:9] 14739; GFX940-NEXT: ;;#ASMEND 14740; GFX940-NEXT: s_setpc_b64 s[30:31] 14741 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14742 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14743 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 7, i32 7> 14744 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14745 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14746 ret void 14747} 14748 14749define void @s_shuffle_v3i16_v4i16__7_u_7() { 14750; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_7: 14751; GFX900: ; %bb.0: 14752; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14753; GFX900-NEXT: ;;#ASMSTART 14754; GFX900-NEXT: ; def s[4:5] 14755; GFX900-NEXT: ;;#ASMEND 14756; GFX900-NEXT: s_lshr_b32 s8, s5, 16 14757; GFX900-NEXT: s_mov_b32 s9, s8 14758; GFX900-NEXT: ;;#ASMSTART 14759; GFX900-NEXT: ; use s[8:9] 14760; GFX900-NEXT: ;;#ASMEND 14761; GFX900-NEXT: s_setpc_b64 s[30:31] 14762; 14763; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_7: 14764; GFX90A: ; %bb.0: 14765; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14766; GFX90A-NEXT: ;;#ASMSTART 14767; GFX90A-NEXT: ; def s[4:5] 14768; GFX90A-NEXT: ;;#ASMEND 14769; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 14770; GFX90A-NEXT: s_mov_b32 s9, s8 14771; GFX90A-NEXT: ;;#ASMSTART 14772; GFX90A-NEXT: ; use s[8:9] 14773; GFX90A-NEXT: ;;#ASMEND 14774; GFX90A-NEXT: s_setpc_b64 s[30:31] 14775; 14776; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_7: 14777; GFX940: ; %bb.0: 14778; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14779; GFX940-NEXT: ;;#ASMSTART 14780; GFX940-NEXT: ; def s[0:1] 14781; GFX940-NEXT: ;;#ASMEND 14782; GFX940-NEXT: s_lshr_b32 s8, s1, 16 14783; GFX940-NEXT: s_mov_b32 s9, s8 14784; GFX940-NEXT: ;;#ASMSTART 14785; GFX940-NEXT: ; use s[8:9] 14786; GFX940-NEXT: ;;#ASMEND 14787; GFX940-NEXT: s_setpc_b64 s[30:31] 14788 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14789 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14790 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 7> 14791 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14792 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14793 ret void 14794} 14795 14796define void @s_shuffle_v3i16_v4i16__7_0_7() { 14797; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_7: 14798; GFX900: ; %bb.0: 14799; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14800; GFX900-NEXT: ;;#ASMSTART 14801; GFX900-NEXT: ; def s[6:7] 14802; GFX900-NEXT: ;;#ASMEND 14803; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14804; GFX900-NEXT: ;;#ASMSTART 14805; GFX900-NEXT: ; def s[4:5] 14806; GFX900-NEXT: ;;#ASMEND 14807; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4 14808; GFX900-NEXT: ;;#ASMSTART 14809; GFX900-NEXT: ; use s[8:9] 14810; GFX900-NEXT: ;;#ASMEND 14811; GFX900-NEXT: s_setpc_b64 s[30:31] 14812; 14813; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_7: 14814; GFX90A: ; %bb.0: 14815; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14816; GFX90A-NEXT: ;;#ASMSTART 14817; GFX90A-NEXT: ; def s[6:7] 14818; GFX90A-NEXT: ;;#ASMEND 14819; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14820; GFX90A-NEXT: ;;#ASMSTART 14821; GFX90A-NEXT: ; def s[4:5] 14822; GFX90A-NEXT: ;;#ASMEND 14823; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4 14824; GFX90A-NEXT: ;;#ASMSTART 14825; GFX90A-NEXT: ; use s[8:9] 14826; GFX90A-NEXT: ;;#ASMEND 14827; GFX90A-NEXT: s_setpc_b64 s[30:31] 14828; 14829; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_7: 14830; GFX940: ; %bb.0: 14831; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14832; GFX940-NEXT: ;;#ASMSTART 14833; GFX940-NEXT: ; def s[2:3] 14834; GFX940-NEXT: ;;#ASMEND 14835; GFX940-NEXT: s_lshr_b32 s9, s3, 16 14836; GFX940-NEXT: ;;#ASMSTART 14837; GFX940-NEXT: ; def s[0:1] 14838; GFX940-NEXT: ;;#ASMEND 14839; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 14840; GFX940-NEXT: ;;#ASMSTART 14841; GFX940-NEXT: ; use s[8:9] 14842; GFX940-NEXT: ;;#ASMEND 14843; GFX940-NEXT: s_setpc_b64 s[30:31] 14844 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14845 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14846 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 7> 14847 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14848 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14849 ret void 14850} 14851 14852define void @s_shuffle_v3i16_v4i16__7_1_7() { 14853; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_7: 14854; GFX900: ; %bb.0: 14855; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14856; GFX900-NEXT: ;;#ASMSTART 14857; GFX900-NEXT: ; def s[4:5] 14858; GFX900-NEXT: ;;#ASMEND 14859; GFX900-NEXT: ;;#ASMSTART 14860; GFX900-NEXT: ; def s[6:7] 14861; GFX900-NEXT: ;;#ASMEND 14862; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 14863; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14864; GFX900-NEXT: ;;#ASMSTART 14865; GFX900-NEXT: ; use s[8:9] 14866; GFX900-NEXT: ;;#ASMEND 14867; GFX900-NEXT: s_setpc_b64 s[30:31] 14868; 14869; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_7: 14870; GFX90A: ; %bb.0: 14871; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14872; GFX90A-NEXT: ;;#ASMSTART 14873; GFX90A-NEXT: ; def s[4:5] 14874; GFX90A-NEXT: ;;#ASMEND 14875; GFX90A-NEXT: ;;#ASMSTART 14876; GFX90A-NEXT: ; def s[6:7] 14877; GFX90A-NEXT: ;;#ASMEND 14878; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 14879; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14880; GFX90A-NEXT: ;;#ASMSTART 14881; GFX90A-NEXT: ; use s[8:9] 14882; GFX90A-NEXT: ;;#ASMEND 14883; GFX90A-NEXT: s_setpc_b64 s[30:31] 14884; 14885; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_7: 14886; GFX940: ; %bb.0: 14887; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14888; GFX940-NEXT: ;;#ASMSTART 14889; GFX940-NEXT: ; def s[0:1] 14890; GFX940-NEXT: ;;#ASMEND 14891; GFX940-NEXT: ;;#ASMSTART 14892; GFX940-NEXT: ; def s[2:3] 14893; GFX940-NEXT: ;;#ASMEND 14894; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 14895; GFX940-NEXT: s_lshr_b32 s9, s3, 16 14896; GFX940-NEXT: ;;#ASMSTART 14897; GFX940-NEXT: ; use s[8:9] 14898; GFX940-NEXT: ;;#ASMEND 14899; GFX940-NEXT: s_setpc_b64 s[30:31] 14900 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14901 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14902 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 7> 14903 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14904 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14905 ret void 14906} 14907 14908define void @s_shuffle_v3i16_v4i16__7_2_7() { 14909; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_7: 14910; GFX900: ; %bb.0: 14911; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14912; GFX900-NEXT: ;;#ASMSTART 14913; GFX900-NEXT: ; def s[6:7] 14914; GFX900-NEXT: ;;#ASMEND 14915; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14916; GFX900-NEXT: ;;#ASMSTART 14917; GFX900-NEXT: ; def s[4:5] 14918; GFX900-NEXT: ;;#ASMEND 14919; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s5 14920; GFX900-NEXT: ;;#ASMSTART 14921; GFX900-NEXT: ; use s[8:9] 14922; GFX900-NEXT: ;;#ASMEND 14923; GFX900-NEXT: s_setpc_b64 s[30:31] 14924; 14925; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_7: 14926; GFX90A: ; %bb.0: 14927; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14928; GFX90A-NEXT: ;;#ASMSTART 14929; GFX90A-NEXT: ; def s[6:7] 14930; GFX90A-NEXT: ;;#ASMEND 14931; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14932; GFX90A-NEXT: ;;#ASMSTART 14933; GFX90A-NEXT: ; def s[4:5] 14934; GFX90A-NEXT: ;;#ASMEND 14935; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s5 14936; GFX90A-NEXT: ;;#ASMSTART 14937; GFX90A-NEXT: ; use s[8:9] 14938; GFX90A-NEXT: ;;#ASMEND 14939; GFX90A-NEXT: s_setpc_b64 s[30:31] 14940; 14941; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_7: 14942; GFX940: ; %bb.0: 14943; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14944; GFX940-NEXT: ;;#ASMSTART 14945; GFX940-NEXT: ; def s[2:3] 14946; GFX940-NEXT: ;;#ASMEND 14947; GFX940-NEXT: s_lshr_b32 s9, s3, 16 14948; GFX940-NEXT: ;;#ASMSTART 14949; GFX940-NEXT: ; def s[0:1] 14950; GFX940-NEXT: ;;#ASMEND 14951; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 14952; GFX940-NEXT: ;;#ASMSTART 14953; GFX940-NEXT: ; use s[8:9] 14954; GFX940-NEXT: ;;#ASMEND 14955; GFX940-NEXT: s_setpc_b64 s[30:31] 14956 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14957 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14958 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 7> 14959 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14960 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 14961 ret void 14962} 14963 14964define void @s_shuffle_v3i16_v4i16__7_3_7() { 14965; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_7: 14966; GFX900: ; %bb.0: 14967; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14968; GFX900-NEXT: ;;#ASMSTART 14969; GFX900-NEXT: ; def s[4:5] 14970; GFX900-NEXT: ;;#ASMEND 14971; GFX900-NEXT: ;;#ASMSTART 14972; GFX900-NEXT: ; def s[6:7] 14973; GFX900-NEXT: ;;#ASMEND 14974; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 14975; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14976; GFX900-NEXT: ;;#ASMSTART 14977; GFX900-NEXT: ; use s[8:9] 14978; GFX900-NEXT: ;;#ASMEND 14979; GFX900-NEXT: s_setpc_b64 s[30:31] 14980; 14981; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_7: 14982; GFX90A: ; %bb.0: 14983; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14984; GFX90A-NEXT: ;;#ASMSTART 14985; GFX90A-NEXT: ; def s[4:5] 14986; GFX90A-NEXT: ;;#ASMEND 14987; GFX90A-NEXT: ;;#ASMSTART 14988; GFX90A-NEXT: ; def s[6:7] 14989; GFX90A-NEXT: ;;#ASMEND 14990; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 14991; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14992; GFX90A-NEXT: ;;#ASMSTART 14993; GFX90A-NEXT: ; use s[8:9] 14994; GFX90A-NEXT: ;;#ASMEND 14995; GFX90A-NEXT: s_setpc_b64 s[30:31] 14996; 14997; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_7: 14998; GFX940: ; %bb.0: 14999; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15000; GFX940-NEXT: ;;#ASMSTART 15001; GFX940-NEXT: ; def s[0:1] 15002; GFX940-NEXT: ;;#ASMEND 15003; GFX940-NEXT: ;;#ASMSTART 15004; GFX940-NEXT: ; def s[2:3] 15005; GFX940-NEXT: ;;#ASMEND 15006; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 15007; GFX940-NEXT: s_lshr_b32 s9, s3, 16 15008; GFX940-NEXT: ;;#ASMSTART 15009; GFX940-NEXT: ; use s[8:9] 15010; GFX940-NEXT: ;;#ASMEND 15011; GFX940-NEXT: s_setpc_b64 s[30:31] 15012 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15013 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15014 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 7> 15015 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15016 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 15017 ret void 15018} 15019 15020define void @s_shuffle_v3i16_v4i16__7_4_7() { 15021; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_7: 15022; GFX900: ; %bb.0: 15023; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15024; GFX900-NEXT: ;;#ASMSTART 15025; GFX900-NEXT: ; def s[4:5] 15026; GFX900-NEXT: ;;#ASMEND 15027; GFX900-NEXT: s_lshr_b32 s9, s5, 16 15028; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15029; GFX900-NEXT: ;;#ASMSTART 15030; GFX900-NEXT: ; use s[8:9] 15031; GFX900-NEXT: ;;#ASMEND 15032; GFX900-NEXT: s_setpc_b64 s[30:31] 15033; 15034; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_7: 15035; GFX90A: ; %bb.0: 15036; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15037; GFX90A-NEXT: ;;#ASMSTART 15038; GFX90A-NEXT: ; def s[4:5] 15039; GFX90A-NEXT: ;;#ASMEND 15040; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 15041; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15042; GFX90A-NEXT: ;;#ASMSTART 15043; GFX90A-NEXT: ; use s[8:9] 15044; GFX90A-NEXT: ;;#ASMEND 15045; GFX90A-NEXT: s_setpc_b64 s[30:31] 15046; 15047; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_7: 15048; GFX940: ; %bb.0: 15049; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15050; GFX940-NEXT: ;;#ASMSTART 15051; GFX940-NEXT: ; def s[0:1] 15052; GFX940-NEXT: ;;#ASMEND 15053; GFX940-NEXT: s_lshr_b32 s9, s1, 16 15054; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 15055; GFX940-NEXT: ;;#ASMSTART 15056; GFX940-NEXT: ; use s[8:9] 15057; GFX940-NEXT: ;;#ASMEND 15058; GFX940-NEXT: s_setpc_b64 s[30:31] 15059 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15060 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15061 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 7> 15062 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15063 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 15064 ret void 15065} 15066 15067define void @s_shuffle_v3i16_v4i16__7_5_7() { 15068; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_7: 15069; GFX900: ; %bb.0: 15070; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15071; GFX900-NEXT: ;;#ASMSTART 15072; GFX900-NEXT: ; def s[4:5] 15073; GFX900-NEXT: ;;#ASMEND 15074; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 15075; GFX900-NEXT: s_lshr_b32 s9, s5, 16 15076; GFX900-NEXT: ;;#ASMSTART 15077; GFX900-NEXT: ; use s[8:9] 15078; GFX900-NEXT: ;;#ASMEND 15079; GFX900-NEXT: s_setpc_b64 s[30:31] 15080; 15081; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_7: 15082; GFX90A: ; %bb.0: 15083; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15084; GFX90A-NEXT: ;;#ASMSTART 15085; GFX90A-NEXT: ; def s[4:5] 15086; GFX90A-NEXT: ;;#ASMEND 15087; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 15088; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 15089; GFX90A-NEXT: ;;#ASMSTART 15090; GFX90A-NEXT: ; use s[8:9] 15091; GFX90A-NEXT: ;;#ASMEND 15092; GFX90A-NEXT: s_setpc_b64 s[30:31] 15093; 15094; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_7: 15095; GFX940: ; %bb.0: 15096; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15097; GFX940-NEXT: ;;#ASMSTART 15098; GFX940-NEXT: ; def s[0:1] 15099; GFX940-NEXT: ;;#ASMEND 15100; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 15101; GFX940-NEXT: s_lshr_b32 s9, s1, 16 15102; GFX940-NEXT: ;;#ASMSTART 15103; GFX940-NEXT: ; use s[8:9] 15104; GFX940-NEXT: ;;#ASMEND 15105; GFX940-NEXT: s_setpc_b64 s[30:31] 15106 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15107 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15108 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 7> 15109 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15110 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 15111 ret void 15112} 15113 15114define void @s_shuffle_v3i16_v4i16__7_6_7() { 15115; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_7: 15116; GFX900: ; %bb.0: 15117; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15118; GFX900-NEXT: ;;#ASMSTART 15119; GFX900-NEXT: ; def s[4:5] 15120; GFX900-NEXT: ;;#ASMEND 15121; GFX900-NEXT: s_lshr_b32 s9, s5, 16 15122; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s5 15123; GFX900-NEXT: ;;#ASMSTART 15124; GFX900-NEXT: ; use s[8:9] 15125; GFX900-NEXT: ;;#ASMEND 15126; GFX900-NEXT: s_setpc_b64 s[30:31] 15127; 15128; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_7: 15129; GFX90A: ; %bb.0: 15130; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15131; GFX90A-NEXT: ;;#ASMSTART 15132; GFX90A-NEXT: ; def s[4:5] 15133; GFX90A-NEXT: ;;#ASMEND 15134; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 15135; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s5 15136; GFX90A-NEXT: ;;#ASMSTART 15137; GFX90A-NEXT: ; use s[8:9] 15138; GFX90A-NEXT: ;;#ASMEND 15139; GFX90A-NEXT: s_setpc_b64 s[30:31] 15140; 15141; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_7: 15142; GFX940: ; %bb.0: 15143; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15144; GFX940-NEXT: ;;#ASMSTART 15145; GFX940-NEXT: ; def s[0:1] 15146; GFX940-NEXT: ;;#ASMEND 15147; GFX940-NEXT: s_lshr_b32 s9, s1, 16 15148; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 15149; GFX940-NEXT: ;;#ASMSTART 15150; GFX940-NEXT: ; use s[8:9] 15151; GFX940-NEXT: ;;#ASMEND 15152; GFX940-NEXT: s_setpc_b64 s[30:31] 15153 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15154 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15155 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 7> 15156 %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15157 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3) 15158 ret void 15159} 15160;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 15161; GFX90APLUS: {{.*}} 15162