1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v4i16_v4i16__u_u_u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v4i16_v4i16__u_u_u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <4 x i16> asm "; def $0", "=v"() 13 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> poison 14 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 15 ret void 16} 17 18define void @v_shuffle_v4i16_v4i16__0_u_u_u(ptr addrspace(1) inreg %ptr) { 19; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_u_u_u: 20; GFX900: ; %bb.0: 21; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX900-NEXT: v_mov_b32_e32 v2, 0 23; GFX900-NEXT: ;;#ASMSTART 24; GFX900-NEXT: ; def v[0:1] 25; GFX900-NEXT: ;;#ASMEND 26; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 27; GFX900-NEXT: s_waitcnt vmcnt(0) 28; GFX900-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_u_u_u: 31; GFX90A: ; %bb.0: 32; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX90A-NEXT: v_mov_b32_e32 v2, 0 34; GFX90A-NEXT: ;;#ASMSTART 35; GFX90A-NEXT: ; def v[0:1] 36; GFX90A-NEXT: ;;#ASMEND 37; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 38; GFX90A-NEXT: s_waitcnt vmcnt(0) 39; GFX90A-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_u_u_u: 42; GFX940: ; %bb.0: 43; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX940-NEXT: v_mov_b32_e32 v2, 0 45; GFX940-NEXT: ;;#ASMSTART 46; GFX940-NEXT: ; def v[0:1] 47; GFX940-NEXT: ;;#ASMEND 48; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 49; GFX940-NEXT: s_waitcnt vmcnt(0) 50; GFX940-NEXT: s_setpc_b64 s[30:31] 51 %vec0 = call <4 x i16> asm "; def $0", "=v"() 52 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 53 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 54 ret void 55} 56 57define void @v_shuffle_v4i16_v4i16__1_u_u_u(ptr addrspace(1) inreg %ptr) { 58; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_u_u_u: 59; GFX900: ; %bb.0: 60; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GFX900-NEXT: ;;#ASMSTART 62; GFX900-NEXT: ; def v[0:1] 63; GFX900-NEXT: ;;#ASMEND 64; GFX900-NEXT: v_mov_b32_e32 v2, 0 65; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 66; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 67; GFX900-NEXT: s_waitcnt vmcnt(0) 68; GFX900-NEXT: s_setpc_b64 s[30:31] 69; 70; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_u_u_u: 71; GFX90A: ; %bb.0: 72; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX90A-NEXT: ;;#ASMSTART 74; GFX90A-NEXT: ; def v[0:1] 75; GFX90A-NEXT: ;;#ASMEND 76; GFX90A-NEXT: v_mov_b32_e32 v2, 0 77; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 78; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 79; GFX90A-NEXT: s_waitcnt vmcnt(0) 80; GFX90A-NEXT: s_setpc_b64 s[30:31] 81; 82; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_u_u_u: 83; GFX940: ; %bb.0: 84; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GFX940-NEXT: ;;#ASMSTART 86; GFX940-NEXT: ; def v[0:1] 87; GFX940-NEXT: ;;#ASMEND 88; GFX940-NEXT: v_mov_b32_e32 v2, 0 89; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 90; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 91; GFX940-NEXT: s_waitcnt vmcnt(0) 92; GFX940-NEXT: s_setpc_b64 s[30:31] 93 %vec0 = call <4 x i16> asm "; def $0", "=v"() 94 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 95 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 96 ret void 97} 98 99define void @v_shuffle_v4i16_v4i16__2_u_u_u(ptr addrspace(1) inreg %ptr) { 100; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_u_u_u: 101; GFX900: ; %bb.0: 102; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX900-NEXT: ;;#ASMSTART 104; GFX900-NEXT: ; def v[0:1] 105; GFX900-NEXT: ;;#ASMEND 106; GFX900-NEXT: v_mov_b32_e32 v2, 0 107; GFX900-NEXT: v_mov_b32_e32 v0, v1 108; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 109; GFX900-NEXT: s_waitcnt vmcnt(0) 110; GFX900-NEXT: s_setpc_b64 s[30:31] 111; 112; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_u_u_u: 113; GFX90A: ; %bb.0: 114; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; GFX90A-NEXT: ;;#ASMSTART 116; GFX90A-NEXT: ; def v[0:1] 117; GFX90A-NEXT: ;;#ASMEND 118; GFX90A-NEXT: v_mov_b32_e32 v2, 0 119; GFX90A-NEXT: v_mov_b32_e32 v0, v1 120; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 121; GFX90A-NEXT: s_waitcnt vmcnt(0) 122; GFX90A-NEXT: s_setpc_b64 s[30:31] 123; 124; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_u_u_u: 125; GFX940: ; %bb.0: 126; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX940-NEXT: ;;#ASMSTART 128; GFX940-NEXT: ; def v[0:1] 129; GFX940-NEXT: ;;#ASMEND 130; GFX940-NEXT: v_mov_b32_e32 v2, 0 131; GFX940-NEXT: v_mov_b32_e32 v0, v1 132; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 133; GFX940-NEXT: s_waitcnt vmcnt(0) 134; GFX940-NEXT: s_setpc_b64 s[30:31] 135 %vec0 = call <4 x i16> asm "; def $0", "=v"() 136 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 137 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 138 ret void 139} 140 141define void @v_shuffle_v4i16_v4i16__3_u_u_u(ptr addrspace(1) inreg %ptr) { 142; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_u_u_u: 143; GFX900: ; %bb.0: 144; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GFX900-NEXT: ;;#ASMSTART 146; GFX900-NEXT: ; def v[0:1] 147; GFX900-NEXT: ;;#ASMEND 148; GFX900-NEXT: v_mov_b32_e32 v2, 0 149; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 150; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 151; GFX900-NEXT: s_waitcnt vmcnt(0) 152; GFX900-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_u_u_u: 155; GFX90A: ; %bb.0: 156; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX90A-NEXT: ;;#ASMSTART 158; GFX90A-NEXT: ; def v[0:1] 159; GFX90A-NEXT: ;;#ASMEND 160; GFX90A-NEXT: v_mov_b32_e32 v2, 0 161; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 162; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 163; GFX90A-NEXT: s_waitcnt vmcnt(0) 164; GFX90A-NEXT: s_setpc_b64 s[30:31] 165; 166; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_u_u_u: 167; GFX940: ; %bb.0: 168; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; GFX940-NEXT: ;;#ASMSTART 170; GFX940-NEXT: ; def v[0:1] 171; GFX940-NEXT: ;;#ASMEND 172; GFX940-NEXT: v_mov_b32_e32 v2, 0 173; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 174; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 175; GFX940-NEXT: s_waitcnt vmcnt(0) 176; GFX940-NEXT: s_setpc_b64 s[30:31] 177 %vec0 = call <4 x i16> asm "; def $0", "=v"() 178 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison> 179 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 180 ret void 181} 182 183define void @v_shuffle_v4i16_v4i16__4_u_u_u(ptr addrspace(1) inreg %ptr) { 184; GFX9-LABEL: v_shuffle_v4i16_v4i16__4_u_u_u: 185; GFX9: ; %bb.0: 186; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; GFX9-NEXT: s_setpc_b64 s[30:31] 188 %vec0 = call <4 x i16> asm "; def $0", "=v"() 189 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison> 190 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 191 ret void 192} 193 194define void @v_shuffle_v4i16_v4i16__5_u_u_u(ptr addrspace(1) inreg %ptr) { 195; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_u_u_u: 196; GFX900: ; %bb.0: 197; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 198; GFX900-NEXT: ;;#ASMSTART 199; GFX900-NEXT: ; def v[0:1] 200; GFX900-NEXT: ;;#ASMEND 201; GFX900-NEXT: v_mov_b32_e32 v2, 0 202; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 203; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 204; GFX900-NEXT: s_waitcnt vmcnt(0) 205; GFX900-NEXT: s_setpc_b64 s[30:31] 206; 207; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_u_u_u: 208; GFX90A: ; %bb.0: 209; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; GFX90A-NEXT: ;;#ASMSTART 211; GFX90A-NEXT: ; def v[0:1] 212; GFX90A-NEXT: ;;#ASMEND 213; GFX90A-NEXT: v_mov_b32_e32 v2, 0 214; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 215; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 216; GFX90A-NEXT: s_waitcnt vmcnt(0) 217; GFX90A-NEXT: s_setpc_b64 s[30:31] 218; 219; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_u_u_u: 220; GFX940: ; %bb.0: 221; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 222; GFX940-NEXT: ;;#ASMSTART 223; GFX940-NEXT: ; def v[0:1] 224; GFX940-NEXT: ;;#ASMEND 225; GFX940-NEXT: v_mov_b32_e32 v2, 0 226; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 227; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 228; GFX940-NEXT: s_waitcnt vmcnt(0) 229; GFX940-NEXT: s_setpc_b64 s[30:31] 230 %vec0 = call <4 x i16> asm "; def $0", "=v"() 231 %vec1 = call <4 x i16> asm "; def $0", "=v"() 232 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison> 233 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 234 ret void 235} 236 237define void @v_shuffle_v4i16_v4i16__6_u_u_u(ptr addrspace(1) inreg %ptr) { 238; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_u_u_u: 239; GFX900: ; %bb.0: 240; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 241; GFX900-NEXT: ;;#ASMSTART 242; GFX900-NEXT: ; def v[0:1] 243; GFX900-NEXT: ;;#ASMEND 244; GFX900-NEXT: v_mov_b32_e32 v2, 0 245; GFX900-NEXT: v_mov_b32_e32 v0, v1 246; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 247; GFX900-NEXT: s_waitcnt vmcnt(0) 248; GFX900-NEXT: s_setpc_b64 s[30:31] 249; 250; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_u_u_u: 251; GFX90A: ; %bb.0: 252; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 253; GFX90A-NEXT: ;;#ASMSTART 254; GFX90A-NEXT: ; def v[0:1] 255; GFX90A-NEXT: ;;#ASMEND 256; GFX90A-NEXT: v_mov_b32_e32 v2, 0 257; GFX90A-NEXT: v_mov_b32_e32 v0, v1 258; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 259; GFX90A-NEXT: s_waitcnt vmcnt(0) 260; GFX90A-NEXT: s_setpc_b64 s[30:31] 261; 262; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_u_u_u: 263; GFX940: ; %bb.0: 264; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 265; GFX940-NEXT: ;;#ASMSTART 266; GFX940-NEXT: ; def v[0:1] 267; GFX940-NEXT: ;;#ASMEND 268; GFX940-NEXT: v_mov_b32_e32 v2, 0 269; GFX940-NEXT: v_mov_b32_e32 v0, v1 270; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 271; GFX940-NEXT: s_waitcnt vmcnt(0) 272; GFX940-NEXT: s_setpc_b64 s[30:31] 273 %vec0 = call <4 x i16> asm "; def $0", "=v"() 274 %vec1 = call <4 x i16> asm "; def $0", "=v"() 275 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 poison, i32 poison, i32 poison> 276 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 277 ret void 278} 279 280define void @v_shuffle_v4i16_v4i16__7_u_u_u(ptr addrspace(1) inreg %ptr) { 281; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_u_u: 282; GFX900: ; %bb.0: 283; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 284; GFX900-NEXT: ;;#ASMSTART 285; GFX900-NEXT: ; def v[0:1] 286; GFX900-NEXT: ;;#ASMEND 287; GFX900-NEXT: v_mov_b32_e32 v2, 0 288; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 289; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 290; GFX900-NEXT: s_waitcnt vmcnt(0) 291; GFX900-NEXT: s_setpc_b64 s[30:31] 292; 293; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_u_u: 294; GFX90A: ; %bb.0: 295; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 296; GFX90A-NEXT: ;;#ASMSTART 297; GFX90A-NEXT: ; def v[0:1] 298; GFX90A-NEXT: ;;#ASMEND 299; GFX90A-NEXT: v_mov_b32_e32 v2, 0 300; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 301; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 302; GFX90A-NEXT: s_waitcnt vmcnt(0) 303; GFX90A-NEXT: s_setpc_b64 s[30:31] 304; 305; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_u_u: 306; GFX940: ; %bb.0: 307; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX940-NEXT: ;;#ASMSTART 309; GFX940-NEXT: ; def v[0:1] 310; GFX940-NEXT: ;;#ASMEND 311; GFX940-NEXT: v_mov_b32_e32 v2, 0 312; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 313; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 314; GFX940-NEXT: s_waitcnt vmcnt(0) 315; GFX940-NEXT: s_setpc_b64 s[30:31] 316 %vec0 = call <4 x i16> asm "; def $0", "=v"() 317 %vec1 = call <4 x i16> asm "; def $0", "=v"() 318 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 poison, i32 poison> 319 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 320 ret void 321} 322 323define void @v_shuffle_v4i16_v4i16__7_0_u_u(ptr addrspace(1) inreg %ptr) { 324; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_u_u: 325; GFX900: ; %bb.0: 326; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 327; GFX900-NEXT: ;;#ASMSTART 328; GFX900-NEXT: ; def v[0:1] 329; GFX900-NEXT: ;;#ASMEND 330; GFX900-NEXT: v_mov_b32_e32 v3, 0 331; GFX900-NEXT: ;;#ASMSTART 332; GFX900-NEXT: ; def v[1:2] 333; GFX900-NEXT: ;;#ASMEND 334; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 335; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 336; GFX900-NEXT: s_waitcnt vmcnt(0) 337; GFX900-NEXT: s_setpc_b64 s[30:31] 338; 339; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_u_u: 340; GFX90A: ; %bb.0: 341; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 342; GFX90A-NEXT: ;;#ASMSTART 343; GFX90A-NEXT: ; def v[0:1] 344; GFX90A-NEXT: ;;#ASMEND 345; GFX90A-NEXT: v_mov_b32_e32 v4, 0 346; GFX90A-NEXT: ;;#ASMSTART 347; GFX90A-NEXT: ; def v[2:3] 348; GFX90A-NEXT: ;;#ASMEND 349; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 350; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 351; GFX90A-NEXT: s_waitcnt vmcnt(0) 352; GFX90A-NEXT: s_setpc_b64 s[30:31] 353; 354; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_u_u: 355; GFX940: ; %bb.0: 356; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 357; GFX940-NEXT: ;;#ASMSTART 358; GFX940-NEXT: ; def v[0:1] 359; GFX940-NEXT: ;;#ASMEND 360; GFX940-NEXT: v_mov_b32_e32 v4, 0 361; GFX940-NEXT: ;;#ASMSTART 362; GFX940-NEXT: ; def v[2:3] 363; GFX940-NEXT: ;;#ASMEND 364; GFX940-NEXT: s_nop 0 365; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 366; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 367; GFX940-NEXT: s_waitcnt vmcnt(0) 368; GFX940-NEXT: s_setpc_b64 s[30:31] 369 %vec0 = call <4 x i16> asm "; def $0", "=v"() 370 %vec1 = call <4 x i16> asm "; def $0", "=v"() 371 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 poison, i32 poison> 372 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 373 ret void 374} 375 376define void @v_shuffle_v4i16_v4i16__7_1_u_u(ptr addrspace(1) inreg %ptr) { 377; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_u_u: 378; GFX900: ; %bb.0: 379; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 380; GFX900-NEXT: ;;#ASMSTART 381; GFX900-NEXT: ; def v[0:1] 382; GFX900-NEXT: ;;#ASMEND 383; GFX900-NEXT: s_mov_b32 s4, 0x7060302 384; GFX900-NEXT: v_mov_b32_e32 v3, 0 385; GFX900-NEXT: ;;#ASMSTART 386; GFX900-NEXT: ; def v[1:2] 387; GFX900-NEXT: ;;#ASMEND 388; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 389; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 390; GFX900-NEXT: s_waitcnt vmcnt(0) 391; GFX900-NEXT: s_setpc_b64 s[30:31] 392; 393; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_u_u: 394; GFX90A: ; %bb.0: 395; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; GFX90A-NEXT: ;;#ASMSTART 397; GFX90A-NEXT: ; def v[0:1] 398; GFX90A-NEXT: ;;#ASMEND 399; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 400; GFX90A-NEXT: v_mov_b32_e32 v4, 0 401; GFX90A-NEXT: ;;#ASMSTART 402; GFX90A-NEXT: ; def v[2:3] 403; GFX90A-NEXT: ;;#ASMEND 404; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 405; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 406; GFX90A-NEXT: s_waitcnt vmcnt(0) 407; GFX90A-NEXT: s_setpc_b64 s[30:31] 408; 409; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_u_u: 410; GFX940: ; %bb.0: 411; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 412; GFX940-NEXT: ;;#ASMSTART 413; GFX940-NEXT: ; def v[0:1] 414; GFX940-NEXT: ;;#ASMEND 415; GFX940-NEXT: s_mov_b32 s2, 0x7060302 416; GFX940-NEXT: v_mov_b32_e32 v4, 0 417; GFX940-NEXT: ;;#ASMSTART 418; GFX940-NEXT: ; def v[2:3] 419; GFX940-NEXT: ;;#ASMEND 420; GFX940-NEXT: s_nop 0 421; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 422; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 423; GFX940-NEXT: s_waitcnt vmcnt(0) 424; GFX940-NEXT: s_setpc_b64 s[30:31] 425 %vec0 = call <4 x i16> asm "; def $0", "=v"() 426 %vec1 = call <4 x i16> asm "; def $0", "=v"() 427 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 poison, i32 poison> 428 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 429 ret void 430} 431 432define void @v_shuffle_v4i16_v4i16__7_2_u_u(ptr addrspace(1) inreg %ptr) { 433; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_u_u: 434; GFX900: ; %bb.0: 435; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 436; GFX900-NEXT: ;;#ASMSTART 437; GFX900-NEXT: ; def v[0:1] 438; GFX900-NEXT: ;;#ASMEND 439; GFX900-NEXT: v_mov_b32_e32 v4, 0 440; GFX900-NEXT: ;;#ASMSTART 441; GFX900-NEXT: ; def v[2:3] 442; GFX900-NEXT: ;;#ASMEND 443; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 444; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 445; GFX900-NEXT: s_waitcnt vmcnt(0) 446; GFX900-NEXT: s_setpc_b64 s[30:31] 447; 448; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_u_u: 449; GFX90A: ; %bb.0: 450; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 451; GFX90A-NEXT: ;;#ASMSTART 452; GFX90A-NEXT: ; def v[0:1] 453; GFX90A-NEXT: ;;#ASMEND 454; GFX90A-NEXT: v_mov_b32_e32 v4, 0 455; GFX90A-NEXT: ;;#ASMSTART 456; GFX90A-NEXT: ; def v[2:3] 457; GFX90A-NEXT: ;;#ASMEND 458; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 459; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 460; GFX90A-NEXT: s_waitcnt vmcnt(0) 461; GFX90A-NEXT: s_setpc_b64 s[30:31] 462; 463; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_u_u: 464; GFX940: ; %bb.0: 465; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 466; GFX940-NEXT: ;;#ASMSTART 467; GFX940-NEXT: ; def v[0:1] 468; GFX940-NEXT: ;;#ASMEND 469; GFX940-NEXT: v_mov_b32_e32 v4, 0 470; GFX940-NEXT: ;;#ASMSTART 471; GFX940-NEXT: ; def v[2:3] 472; GFX940-NEXT: ;;#ASMEND 473; GFX940-NEXT: s_nop 0 474; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 475; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 476; GFX940-NEXT: s_waitcnt vmcnt(0) 477; GFX940-NEXT: s_setpc_b64 s[30:31] 478 %vec0 = call <4 x i16> asm "; def $0", "=v"() 479 %vec1 = call <4 x i16> asm "; def $0", "=v"() 480 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 poison, i32 poison> 481 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 482 ret void 483} 484 485define void @v_shuffle_v4i16_v4i16__7_3_u_u(ptr addrspace(1) inreg %ptr) { 486; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_u_u: 487; GFX900: ; %bb.0: 488; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 489; GFX900-NEXT: ;;#ASMSTART 490; GFX900-NEXT: ; def v[0:1] 491; GFX900-NEXT: ;;#ASMEND 492; GFX900-NEXT: s_mov_b32 s4, 0x7060302 493; GFX900-NEXT: v_mov_b32_e32 v4, 0 494; GFX900-NEXT: ;;#ASMSTART 495; GFX900-NEXT: ; def v[2:3] 496; GFX900-NEXT: ;;#ASMEND 497; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 498; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 499; GFX900-NEXT: s_waitcnt vmcnt(0) 500; GFX900-NEXT: s_setpc_b64 s[30:31] 501; 502; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_u_u: 503; GFX90A: ; %bb.0: 504; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 505; GFX90A-NEXT: ;;#ASMSTART 506; GFX90A-NEXT: ; def v[0:1] 507; GFX90A-NEXT: ;;#ASMEND 508; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 509; GFX90A-NEXT: v_mov_b32_e32 v4, 0 510; GFX90A-NEXT: ;;#ASMSTART 511; GFX90A-NEXT: ; def v[2:3] 512; GFX90A-NEXT: ;;#ASMEND 513; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 514; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 515; GFX90A-NEXT: s_waitcnt vmcnt(0) 516; GFX90A-NEXT: s_setpc_b64 s[30:31] 517; 518; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_u_u: 519; GFX940: ; %bb.0: 520; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 521; GFX940-NEXT: ;;#ASMSTART 522; GFX940-NEXT: ; def v[0:1] 523; GFX940-NEXT: ;;#ASMEND 524; GFX940-NEXT: s_mov_b32 s2, 0x7060302 525; GFX940-NEXT: v_mov_b32_e32 v4, 0 526; GFX940-NEXT: ;;#ASMSTART 527; GFX940-NEXT: ; def v[2:3] 528; GFX940-NEXT: ;;#ASMEND 529; GFX940-NEXT: s_nop 0 530; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 531; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 532; GFX940-NEXT: s_waitcnt vmcnt(0) 533; GFX940-NEXT: s_setpc_b64 s[30:31] 534 %vec0 = call <4 x i16> asm "; def $0", "=v"() 535 %vec1 = call <4 x i16> asm "; def $0", "=v"() 536 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 poison, i32 poison> 537 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 538 ret void 539} 540 541define void @v_shuffle_v4i16_v4i16__7_4_u_u(ptr addrspace(1) inreg %ptr) { 542; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_u_u: 543; GFX900: ; %bb.0: 544; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 545; GFX900-NEXT: ;;#ASMSTART 546; GFX900-NEXT: ; def v[0:1] 547; GFX900-NEXT: ;;#ASMEND 548; GFX900-NEXT: v_mov_b32_e32 v2, 0 549; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 550; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 551; GFX900-NEXT: s_waitcnt vmcnt(0) 552; GFX900-NEXT: s_setpc_b64 s[30:31] 553; 554; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_u_u: 555; GFX90A: ; %bb.0: 556; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 557; GFX90A-NEXT: ;;#ASMSTART 558; GFX90A-NEXT: ; def v[0:1] 559; GFX90A-NEXT: ;;#ASMEND 560; GFX90A-NEXT: v_mov_b32_e32 v2, 0 561; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 562; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 563; GFX90A-NEXT: s_waitcnt vmcnt(0) 564; GFX90A-NEXT: s_setpc_b64 s[30:31] 565; 566; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_u_u: 567; GFX940: ; %bb.0: 568; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 569; GFX940-NEXT: ;;#ASMSTART 570; GFX940-NEXT: ; def v[0:1] 571; GFX940-NEXT: ;;#ASMEND 572; GFX940-NEXT: v_mov_b32_e32 v2, 0 573; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 574; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 575; GFX940-NEXT: s_waitcnt vmcnt(0) 576; GFX940-NEXT: s_setpc_b64 s[30:31] 577 %vec0 = call <4 x i16> asm "; def $0", "=v"() 578 %vec1 = call <4 x i16> asm "; def $0", "=v"() 579 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 poison, i32 poison> 580 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 581 ret void 582} 583 584define void @v_shuffle_v4i16_v4i16__7_5_u_u(ptr addrspace(1) inreg %ptr) { 585; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_u_u: 586; GFX900: ; %bb.0: 587; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 588; GFX900-NEXT: ;;#ASMSTART 589; GFX900-NEXT: ; def v[0:1] 590; GFX900-NEXT: ;;#ASMEND 591; GFX900-NEXT: s_mov_b32 s4, 0x7060302 592; GFX900-NEXT: v_mov_b32_e32 v2, 0 593; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 594; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 595; GFX900-NEXT: s_waitcnt vmcnt(0) 596; GFX900-NEXT: s_setpc_b64 s[30:31] 597; 598; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_u_u: 599; GFX90A: ; %bb.0: 600; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 601; GFX90A-NEXT: ;;#ASMSTART 602; GFX90A-NEXT: ; def v[0:1] 603; GFX90A-NEXT: ;;#ASMEND 604; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 605; GFX90A-NEXT: v_mov_b32_e32 v2, 0 606; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 607; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 608; GFX90A-NEXT: s_waitcnt vmcnt(0) 609; GFX90A-NEXT: s_setpc_b64 s[30:31] 610; 611; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_u_u: 612; GFX940: ; %bb.0: 613; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 614; GFX940-NEXT: ;;#ASMSTART 615; GFX940-NEXT: ; def v[0:1] 616; GFX940-NEXT: ;;#ASMEND 617; GFX940-NEXT: s_mov_b32 s2, 0x7060302 618; GFX940-NEXT: v_mov_b32_e32 v2, 0 619; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 620; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 621; GFX940-NEXT: s_waitcnt vmcnt(0) 622; GFX940-NEXT: s_setpc_b64 s[30:31] 623 %vec0 = call <4 x i16> asm "; def $0", "=v"() 624 %vec1 = call <4 x i16> asm "; def $0", "=v"() 625 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 poison, i32 poison> 626 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 627 ret void 628} 629 630define void @v_shuffle_v4i16_v4i16__7_6_u_u(ptr addrspace(1) inreg %ptr) { 631; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_u_u: 632; GFX900: ; %bb.0: 633; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 634; GFX900-NEXT: ;;#ASMSTART 635; GFX900-NEXT: ; def v[0:1] 636; GFX900-NEXT: ;;#ASMEND 637; GFX900-NEXT: v_mov_b32_e32 v2, 0 638; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 639; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 640; GFX900-NEXT: s_waitcnt vmcnt(0) 641; GFX900-NEXT: s_setpc_b64 s[30:31] 642; 643; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_u_u: 644; GFX90A: ; %bb.0: 645; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; GFX90A-NEXT: ;;#ASMSTART 647; GFX90A-NEXT: ; def v[0:1] 648; GFX90A-NEXT: ;;#ASMEND 649; GFX90A-NEXT: v_mov_b32_e32 v2, 0 650; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 651; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 652; GFX90A-NEXT: s_waitcnt vmcnt(0) 653; GFX90A-NEXT: s_setpc_b64 s[30:31] 654; 655; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_u_u: 656; GFX940: ; %bb.0: 657; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 658; GFX940-NEXT: ;;#ASMSTART 659; GFX940-NEXT: ; def v[0:1] 660; GFX940-NEXT: ;;#ASMEND 661; GFX940-NEXT: v_mov_b32_e32 v2, 0 662; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 663; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 664; GFX940-NEXT: s_waitcnt vmcnt(0) 665; GFX940-NEXT: s_setpc_b64 s[30:31] 666 %vec0 = call <4 x i16> asm "; def $0", "=v"() 667 %vec1 = call <4 x i16> asm "; def $0", "=v"() 668 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 poison, i32 poison> 669 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 670 ret void 671} 672 673define void @v_shuffle_v4i16_v4i16__7_7_u_u(ptr addrspace(1) inreg %ptr) { 674; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_u: 675; GFX900: ; %bb.0: 676; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 677; GFX900-NEXT: ;;#ASMSTART 678; GFX900-NEXT: ; def v[0:1] 679; GFX900-NEXT: ;;#ASMEND 680; GFX900-NEXT: s_mov_b32 s4, 0x7060302 681; GFX900-NEXT: v_mov_b32_e32 v2, 0 682; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 683; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 684; GFX900-NEXT: s_waitcnt vmcnt(0) 685; GFX900-NEXT: s_setpc_b64 s[30:31] 686; 687; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_u: 688; GFX90A: ; %bb.0: 689; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 690; GFX90A-NEXT: ;;#ASMSTART 691; GFX90A-NEXT: ; def v[0:1] 692; GFX90A-NEXT: ;;#ASMEND 693; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 694; GFX90A-NEXT: v_mov_b32_e32 v2, 0 695; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 696; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 697; GFX90A-NEXT: s_waitcnt vmcnt(0) 698; GFX90A-NEXT: s_setpc_b64 s[30:31] 699; 700; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_u: 701; GFX940: ; %bb.0: 702; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 703; GFX940-NEXT: ;;#ASMSTART 704; GFX940-NEXT: ; def v[0:1] 705; GFX940-NEXT: ;;#ASMEND 706; GFX940-NEXT: s_mov_b32 s2, 0x7060302 707; GFX940-NEXT: v_mov_b32_e32 v2, 0 708; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 709; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 710; GFX940-NEXT: s_waitcnt vmcnt(0) 711; GFX940-NEXT: s_setpc_b64 s[30:31] 712 %vec0 = call <4 x i16> asm "; def $0", "=v"() 713 %vec1 = call <4 x i16> asm "; def $0", "=v"() 714 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 poison> 715 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 716 ret void 717} 718 719define void @v_shuffle_v4i16_v4i16__7_7_0_u(ptr addrspace(1) inreg %ptr) { 720; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_u: 721; GFX900: ; %bb.0: 722; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 723; GFX900-NEXT: ;;#ASMSTART 724; GFX900-NEXT: ; def v[0:1] 725; GFX900-NEXT: ;;#ASMEND 726; GFX900-NEXT: ;;#ASMSTART 727; GFX900-NEXT: ; def v[1:2] 728; GFX900-NEXT: ;;#ASMEND 729; GFX900-NEXT: s_mov_b32 s4, 0x7060302 730; GFX900-NEXT: v_mov_b32_e32 v3, 0 731; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 732; GFX900-NEXT: v_mov_b32_e32 v2, v0 733; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 734; GFX900-NEXT: s_waitcnt vmcnt(0) 735; GFX900-NEXT: s_setpc_b64 s[30:31] 736; 737; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_u: 738; GFX90A: ; %bb.0: 739; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 740; GFX90A-NEXT: ;;#ASMSTART 741; GFX90A-NEXT: ; def v[2:3] 742; GFX90A-NEXT: ;;#ASMEND 743; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 744; GFX90A-NEXT: v_mov_b32_e32 v4, 0 745; GFX90A-NEXT: ;;#ASMSTART 746; GFX90A-NEXT: ; def v[0:1] 747; GFX90A-NEXT: ;;#ASMEND 748; GFX90A-NEXT: v_perm_b32 v2, v3, v3, s4 749; GFX90A-NEXT: v_mov_b32_e32 v3, v0 750; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 751; GFX90A-NEXT: s_waitcnt vmcnt(0) 752; GFX90A-NEXT: s_setpc_b64 s[30:31] 753; 754; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_u: 755; GFX940: ; %bb.0: 756; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 757; GFX940-NEXT: ;;#ASMSTART 758; GFX940-NEXT: ; def v[2:3] 759; GFX940-NEXT: ;;#ASMEND 760; GFX940-NEXT: s_mov_b32 s2, 0x7060302 761; GFX940-NEXT: v_mov_b32_e32 v4, 0 762; GFX940-NEXT: ;;#ASMSTART 763; GFX940-NEXT: ; def v[0:1] 764; GFX940-NEXT: ;;#ASMEND 765; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 766; GFX940-NEXT: v_mov_b32_e32 v3, v0 767; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 768; GFX940-NEXT: s_waitcnt vmcnt(0) 769; GFX940-NEXT: s_setpc_b64 s[30:31] 770 %vec0 = call <4 x i16> asm "; def $0", "=v"() 771 %vec1 = call <4 x i16> asm "; def $0", "=v"() 772 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 poison> 773 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 774 ret void 775} 776 777define void @v_shuffle_v4i16_v4i16__7_7_1_u(ptr addrspace(1) inreg %ptr) { 778; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_u: 779; GFX900: ; %bb.0: 780; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 781; GFX900-NEXT: ;;#ASMSTART 782; GFX900-NEXT: ; def v[0:1] 783; GFX900-NEXT: ;;#ASMEND 784; GFX900-NEXT: ;;#ASMSTART 785; GFX900-NEXT: ; def v[1:2] 786; GFX900-NEXT: ;;#ASMEND 787; GFX900-NEXT: v_alignbit_b32 v1, s4, v0, 16 788; GFX900-NEXT: s_mov_b32 s4, 0x7060302 789; GFX900-NEXT: v_mov_b32_e32 v3, 0 790; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 791; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 792; GFX900-NEXT: s_waitcnt vmcnt(0) 793; GFX900-NEXT: s_setpc_b64 s[30:31] 794; 795; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_u: 796; GFX90A: ; %bb.0: 797; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 798; GFX90A-NEXT: ;;#ASMSTART 799; GFX90A-NEXT: ; def v[0:1] 800; GFX90A-NEXT: ;;#ASMEND 801; GFX90A-NEXT: v_alignbit_b32 v1, s4, v0, 16 802; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 803; GFX90A-NEXT: v_mov_b32_e32 v4, 0 804; GFX90A-NEXT: ;;#ASMSTART 805; GFX90A-NEXT: ; def v[2:3] 806; GFX90A-NEXT: ;;#ASMEND 807; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 808; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 809; GFX90A-NEXT: s_waitcnt vmcnt(0) 810; GFX90A-NEXT: s_setpc_b64 s[30:31] 811; 812; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_u: 813; GFX940: ; %bb.0: 814; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 815; GFX940-NEXT: ;;#ASMSTART 816; GFX940-NEXT: ; def v[0:1] 817; GFX940-NEXT: ;;#ASMEND 818; GFX940-NEXT: s_mov_b32 s2, 0x7060302 819; GFX940-NEXT: v_mov_b32_e32 v4, 0 820; GFX940-NEXT: ;;#ASMSTART 821; GFX940-NEXT: ; def v[2:3] 822; GFX940-NEXT: ;;#ASMEND 823; GFX940-NEXT: v_alignbit_b32 v1, s0, v0, 16 824; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 825; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 826; GFX940-NEXT: s_waitcnt vmcnt(0) 827; GFX940-NEXT: s_setpc_b64 s[30:31] 828 %vec0 = call <4 x i16> asm "; def $0", "=v"() 829 %vec1 = call <4 x i16> asm "; def $0", "=v"() 830 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 poison> 831 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 832 ret void 833} 834 835define void @v_shuffle_v4i16_v4i16__7_7_2_u(ptr addrspace(1) inreg %ptr) { 836; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_u: 837; GFX900: ; %bb.0: 838; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 839; GFX900-NEXT: ;;#ASMSTART 840; GFX900-NEXT: ; def v[0:1] 841; GFX900-NEXT: ;;#ASMEND 842; GFX900-NEXT: s_mov_b32 s4, 0x7060302 843; GFX900-NEXT: v_mov_b32_e32 v4, 0 844; GFX900-NEXT: ;;#ASMSTART 845; GFX900-NEXT: ; def v[2:3] 846; GFX900-NEXT: ;;#ASMEND 847; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 848; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 849; GFX900-NEXT: s_waitcnt vmcnt(0) 850; GFX900-NEXT: s_setpc_b64 s[30:31] 851; 852; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_u: 853; GFX90A: ; %bb.0: 854; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 855; GFX90A-NEXT: ;;#ASMSTART 856; GFX90A-NEXT: ; def v[0:1] 857; GFX90A-NEXT: ;;#ASMEND 858; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 859; GFX90A-NEXT: v_mov_b32_e32 v4, 0 860; GFX90A-NEXT: ;;#ASMSTART 861; GFX90A-NEXT: ; def v[2:3] 862; GFX90A-NEXT: ;;#ASMEND 863; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 864; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 865; GFX90A-NEXT: s_waitcnt vmcnt(0) 866; GFX90A-NEXT: s_setpc_b64 s[30:31] 867; 868; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_u: 869; GFX940: ; %bb.0: 870; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 871; GFX940-NEXT: ;;#ASMSTART 872; GFX940-NEXT: ; def v[0:1] 873; GFX940-NEXT: ;;#ASMEND 874; GFX940-NEXT: s_mov_b32 s2, 0x7060302 875; GFX940-NEXT: v_mov_b32_e32 v4, 0 876; GFX940-NEXT: ;;#ASMSTART 877; GFX940-NEXT: ; def v[2:3] 878; GFX940-NEXT: ;;#ASMEND 879; GFX940-NEXT: s_nop 0 880; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 881; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 882; GFX940-NEXT: s_waitcnt vmcnt(0) 883; GFX940-NEXT: s_setpc_b64 s[30:31] 884 %vec0 = call <4 x i16> asm "; def $0", "=v"() 885 %vec1 = call <4 x i16> asm "; def $0", "=v"() 886 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 poison> 887 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 888 ret void 889} 890 891define void @v_shuffle_v4i16_v4i16__7_7_3_u(ptr addrspace(1) inreg %ptr) { 892; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_u: 893; GFX900: ; %bb.0: 894; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 895; GFX900-NEXT: ;;#ASMSTART 896; GFX900-NEXT: ; def v[0:1] 897; GFX900-NEXT: ;;#ASMEND 898; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16 899; GFX900-NEXT: s_mov_b32 s4, 0x7060302 900; GFX900-NEXT: v_mov_b32_e32 v4, 0 901; GFX900-NEXT: ;;#ASMSTART 902; GFX900-NEXT: ; def v[2:3] 903; GFX900-NEXT: ;;#ASMEND 904; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 905; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 906; GFX900-NEXT: s_waitcnt vmcnt(0) 907; GFX900-NEXT: s_setpc_b64 s[30:31] 908; 909; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_u: 910; GFX90A: ; %bb.0: 911; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 912; GFX90A-NEXT: ;;#ASMSTART 913; GFX90A-NEXT: ; def v[0:1] 914; GFX90A-NEXT: ;;#ASMEND 915; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16 916; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 917; GFX90A-NEXT: v_mov_b32_e32 v4, 0 918; GFX90A-NEXT: ;;#ASMSTART 919; GFX90A-NEXT: ; def v[2:3] 920; GFX90A-NEXT: ;;#ASMEND 921; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 922; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 923; GFX90A-NEXT: s_waitcnt vmcnt(0) 924; GFX90A-NEXT: s_setpc_b64 s[30:31] 925; 926; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_u: 927; GFX940: ; %bb.0: 928; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 929; GFX940-NEXT: ;;#ASMSTART 930; GFX940-NEXT: ; def v[0:1] 931; GFX940-NEXT: ;;#ASMEND 932; GFX940-NEXT: s_mov_b32 s2, 0x7060302 933; GFX940-NEXT: v_mov_b32_e32 v4, 0 934; GFX940-NEXT: ;;#ASMSTART 935; GFX940-NEXT: ; def v[2:3] 936; GFX940-NEXT: ;;#ASMEND 937; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 938; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 939; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 940; GFX940-NEXT: s_waitcnt vmcnt(0) 941; GFX940-NEXT: s_setpc_b64 s[30:31] 942 %vec0 = call <4 x i16> asm "; def $0", "=v"() 943 %vec1 = call <4 x i16> asm "; def $0", "=v"() 944 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 poison> 945 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 946 ret void 947} 948 949define void @v_shuffle_v4i16_v4i16__7_7_4_u(ptr addrspace(1) inreg %ptr) { 950; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_u: 951; GFX900: ; %bb.0: 952; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 953; GFX900-NEXT: ;;#ASMSTART 954; GFX900-NEXT: ; def v[0:1] 955; GFX900-NEXT: ;;#ASMEND 956; GFX900-NEXT: s_mov_b32 s4, 0x7060302 957; GFX900-NEXT: v_mov_b32_e32 v3, 0 958; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 959; GFX900-NEXT: v_mov_b32_e32 v2, v0 960; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 961; GFX900-NEXT: s_waitcnt vmcnt(0) 962; GFX900-NEXT: s_setpc_b64 s[30:31] 963; 964; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_u: 965; GFX90A: ; %bb.0: 966; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 967; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 968; GFX90A-NEXT: v_mov_b32_e32 v4, 0 969; GFX90A-NEXT: ;;#ASMSTART 970; GFX90A-NEXT: ; def v[0:1] 971; GFX90A-NEXT: ;;#ASMEND 972; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 973; GFX90A-NEXT: v_mov_b32_e32 v3, v0 974; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 975; GFX90A-NEXT: s_waitcnt vmcnt(0) 976; GFX90A-NEXT: s_setpc_b64 s[30:31] 977; 978; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_u: 979; GFX940: ; %bb.0: 980; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 981; GFX940-NEXT: s_mov_b32 s2, 0x7060302 982; GFX940-NEXT: v_mov_b32_e32 v4, 0 983; GFX940-NEXT: ;;#ASMSTART 984; GFX940-NEXT: ; def v[0:1] 985; GFX940-NEXT: ;;#ASMEND 986; GFX940-NEXT: s_nop 0 987; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 988; GFX940-NEXT: v_mov_b32_e32 v3, v0 989; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 990; GFX940-NEXT: s_waitcnt vmcnt(0) 991; GFX940-NEXT: s_setpc_b64 s[30:31] 992 %vec0 = call <4 x i16> asm "; def $0", "=v"() 993 %vec1 = call <4 x i16> asm "; def $0", "=v"() 994 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 poison> 995 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 996 ret void 997} 998 999define void @v_shuffle_v4i16_v4i16__7_7_5_u(ptr addrspace(1) inreg %ptr) { 1000; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_u: 1001; GFX900: ; %bb.0: 1002; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1003; GFX900-NEXT: ;;#ASMSTART 1004; GFX900-NEXT: ; def v[0:1] 1005; GFX900-NEXT: ;;#ASMEND 1006; GFX900-NEXT: v_alignbit_b32 v2, s4, v0, 16 1007; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1008; GFX900-NEXT: v_mov_b32_e32 v3, 0 1009; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 1010; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1011; GFX900-NEXT: s_waitcnt vmcnt(0) 1012; GFX900-NEXT: s_setpc_b64 s[30:31] 1013; 1014; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_u: 1015; GFX90A: ; %bb.0: 1016; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1017; GFX90A-NEXT: ;;#ASMSTART 1018; GFX90A-NEXT: ; def v[0:1] 1019; GFX90A-NEXT: ;;#ASMEND 1020; GFX90A-NEXT: v_alignbit_b32 v3, s4, v0, 16 1021; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1022; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1023; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 1024; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1025; GFX90A-NEXT: s_waitcnt vmcnt(0) 1026; GFX90A-NEXT: s_setpc_b64 s[30:31] 1027; 1028; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_u: 1029; GFX940: ; %bb.0: 1030; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1031; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1032; GFX940-NEXT: v_mov_b32_e32 v4, 0 1033; GFX940-NEXT: ;;#ASMSTART 1034; GFX940-NEXT: ; def v[0:1] 1035; GFX940-NEXT: ;;#ASMEND 1036; GFX940-NEXT: s_nop 0 1037; GFX940-NEXT: v_alignbit_b32 v3, s0, v0, 16 1038; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 1039; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1040; GFX940-NEXT: s_waitcnt vmcnt(0) 1041; GFX940-NEXT: s_setpc_b64 s[30:31] 1042 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1043 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1044 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 poison> 1045 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1046 ret void 1047} 1048 1049define void @v_shuffle_v4i16_v4i16__7_7_6_u(ptr addrspace(1) inreg %ptr) { 1050; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_u: 1051; GFX900: ; %bb.0: 1052; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1053; GFX900-NEXT: ;;#ASMSTART 1054; GFX900-NEXT: ; def v[0:1] 1055; GFX900-NEXT: ;;#ASMEND 1056; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1057; GFX900-NEXT: v_mov_b32_e32 v2, 0 1058; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 1059; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1060; GFX900-NEXT: s_waitcnt vmcnt(0) 1061; GFX900-NEXT: s_setpc_b64 s[30:31] 1062; 1063; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_u: 1064; GFX90A: ; %bb.0: 1065; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1066; GFX90A-NEXT: ;;#ASMSTART 1067; GFX90A-NEXT: ; def v[0:1] 1068; GFX90A-NEXT: ;;#ASMEND 1069; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1070; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1071; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 1072; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1073; GFX90A-NEXT: s_waitcnt vmcnt(0) 1074; GFX90A-NEXT: s_setpc_b64 s[30:31] 1075; 1076; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_u: 1077; GFX940: ; %bb.0: 1078; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1079; GFX940-NEXT: ;;#ASMSTART 1080; GFX940-NEXT: ; def v[0:1] 1081; GFX940-NEXT: ;;#ASMEND 1082; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1083; GFX940-NEXT: v_mov_b32_e32 v2, 0 1084; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 1085; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1086; GFX940-NEXT: s_waitcnt vmcnt(0) 1087; GFX940-NEXT: s_setpc_b64 s[30:31] 1088 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1089 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1090 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison> 1091 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1092 ret void 1093} 1094 1095define void @v_shuffle_v4i16_v4i16__7_7_7_u(ptr addrspace(1) inreg %ptr) { 1096; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_u: 1097; GFX900: ; %bb.0: 1098; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1099; GFX900-NEXT: ;;#ASMSTART 1100; GFX900-NEXT: ; def v[0:1] 1101; GFX900-NEXT: ;;#ASMEND 1102; GFX900-NEXT: v_alignbit_b32 v2, s4, v1, 16 1103; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1104; GFX900-NEXT: v_mov_b32_e32 v3, 0 1105; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 1106; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1107; GFX900-NEXT: s_waitcnt vmcnt(0) 1108; GFX900-NEXT: s_setpc_b64 s[30:31] 1109; 1110; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_u: 1111; GFX90A: ; %bb.0: 1112; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1113; GFX90A-NEXT: ;;#ASMSTART 1114; GFX90A-NEXT: ; def v[0:1] 1115; GFX90A-NEXT: ;;#ASMEND 1116; GFX90A-NEXT: v_alignbit_b32 v3, s4, v1, 16 1117; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1118; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1119; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 1120; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1121; GFX90A-NEXT: s_waitcnt vmcnt(0) 1122; GFX90A-NEXT: s_setpc_b64 s[30:31] 1123; 1124; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_u: 1125; GFX940: ; %bb.0: 1126; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1127; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1128; GFX940-NEXT: v_mov_b32_e32 v4, 0 1129; GFX940-NEXT: ;;#ASMSTART 1130; GFX940-NEXT: ; def v[0:1] 1131; GFX940-NEXT: ;;#ASMEND 1132; GFX940-NEXT: s_nop 0 1133; GFX940-NEXT: v_alignbit_b32 v3, s0, v1, 16 1134; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 1135; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1136; GFX940-NEXT: s_waitcnt vmcnt(0) 1137; GFX940-NEXT: s_setpc_b64 s[30:31] 1138 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1139 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1140 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 poison> 1141 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1142 ret void 1143} 1144 1145define void @v_shuffle_v4i16_v4i16__7_7_7_0(ptr addrspace(1) inreg %ptr) { 1146; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_0: 1147; GFX900: ; %bb.0: 1148; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1149; GFX900-NEXT: ;;#ASMSTART 1150; GFX900-NEXT: ; def v[0:1] 1151; GFX900-NEXT: ;;#ASMEND 1152; GFX900-NEXT: ;;#ASMSTART 1153; GFX900-NEXT: ; def v[1:2] 1154; GFX900-NEXT: ;;#ASMEND 1155; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1156; GFX900-NEXT: v_mov_b32_e32 v3, 0 1157; GFX900-NEXT: v_alignbit_b32 v1, v0, v2, 16 1158; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 1159; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 1160; GFX900-NEXT: s_waitcnt vmcnt(0) 1161; GFX900-NEXT: s_setpc_b64 s[30:31] 1162; 1163; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_0: 1164; GFX90A: ; %bb.0: 1165; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1166; GFX90A-NEXT: ;;#ASMSTART 1167; GFX90A-NEXT: ; def v[0:1] 1168; GFX90A-NEXT: ;;#ASMEND 1169; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1170; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1171; GFX90A-NEXT: ;;#ASMSTART 1172; GFX90A-NEXT: ; def v[2:3] 1173; GFX90A-NEXT: ;;#ASMEND 1174; GFX90A-NEXT: v_alignbit_b32 v1, v0, v3, 16 1175; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 1176; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1177; GFX90A-NEXT: s_waitcnt vmcnt(0) 1178; GFX90A-NEXT: s_setpc_b64 s[30:31] 1179; 1180; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_0: 1181; GFX940: ; %bb.0: 1182; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1183; GFX940-NEXT: ;;#ASMSTART 1184; GFX940-NEXT: ; def v[0:1] 1185; GFX940-NEXT: ;;#ASMEND 1186; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1187; GFX940-NEXT: v_mov_b32_e32 v4, 0 1188; GFX940-NEXT: ;;#ASMSTART 1189; GFX940-NEXT: ; def v[2:3] 1190; GFX940-NEXT: ;;#ASMEND 1191; GFX940-NEXT: s_nop 0 1192; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 1193; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 1194; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 1195; GFX940-NEXT: s_waitcnt vmcnt(0) 1196; GFX940-NEXT: s_setpc_b64 s[30:31] 1197 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1198 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1199 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 0> 1200 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1201 ret void 1202} 1203 1204define void @v_shuffle_v4i16_v4i16__7_7_7_1(ptr addrspace(1) inreg %ptr) { 1205; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_1: 1206; GFX900: ; %bb.0: 1207; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1208; GFX900-NEXT: ;;#ASMSTART 1209; GFX900-NEXT: ; def v[0:1] 1210; GFX900-NEXT: ;;#ASMEND 1211; GFX900-NEXT: ;;#ASMSTART 1212; GFX900-NEXT: ; def v[1:2] 1213; GFX900-NEXT: ;;#ASMEND 1214; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1215; GFX900-NEXT: v_mov_b32_e32 v3, 0 1216; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 1217; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 1218; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 1219; GFX900-NEXT: s_waitcnt vmcnt(0) 1220; GFX900-NEXT: s_setpc_b64 s[30:31] 1221; 1222; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_1: 1223; GFX90A: ; %bb.0: 1224; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1225; GFX90A-NEXT: ;;#ASMSTART 1226; GFX90A-NEXT: ; def v[0:1] 1227; GFX90A-NEXT: ;;#ASMEND 1228; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1229; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1230; GFX90A-NEXT: ;;#ASMSTART 1231; GFX90A-NEXT: ; def v[2:3] 1232; GFX90A-NEXT: ;;#ASMEND 1233; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4 1234; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 1235; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1236; GFX90A-NEXT: s_waitcnt vmcnt(0) 1237; GFX90A-NEXT: s_setpc_b64 s[30:31] 1238; 1239; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_1: 1240; GFX940: ; %bb.0: 1241; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1242; GFX940-NEXT: ;;#ASMSTART 1243; GFX940-NEXT: ; def v[0:1] 1244; GFX940-NEXT: ;;#ASMEND 1245; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1246; GFX940-NEXT: v_mov_b32_e32 v4, 0 1247; GFX940-NEXT: ;;#ASMSTART 1248; GFX940-NEXT: ; def v[2:3] 1249; GFX940-NEXT: ;;#ASMEND 1250; GFX940-NEXT: s_nop 0 1251; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 1252; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 1253; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 1254; GFX940-NEXT: s_waitcnt vmcnt(0) 1255; GFX940-NEXT: s_setpc_b64 s[30:31] 1256 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1257 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1258 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 1> 1259 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1260 ret void 1261} 1262 1263define void @v_shuffle_v4i16_v4i16__7_7_7_2(ptr addrspace(1) inreg %ptr) { 1264; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_2: 1265; GFX900: ; %bb.0: 1266; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1267; GFX900-NEXT: ;;#ASMSTART 1268; GFX900-NEXT: ; def v[0:1] 1269; GFX900-NEXT: ;;#ASMEND 1270; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1271; GFX900-NEXT: v_mov_b32_e32 v4, 0 1272; GFX900-NEXT: ;;#ASMSTART 1273; GFX900-NEXT: ; def v[2:3] 1274; GFX900-NEXT: ;;#ASMEND 1275; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 1276; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 1277; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1278; GFX900-NEXT: s_waitcnt vmcnt(0) 1279; GFX900-NEXT: s_setpc_b64 s[30:31] 1280; 1281; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_2: 1282; GFX90A: ; %bb.0: 1283; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1284; GFX90A-NEXT: ;;#ASMSTART 1285; GFX90A-NEXT: ; def v[0:1] 1286; GFX90A-NEXT: ;;#ASMEND 1287; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1288; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1289; GFX90A-NEXT: ;;#ASMSTART 1290; GFX90A-NEXT: ; def v[2:3] 1291; GFX90A-NEXT: ;;#ASMEND 1292; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 1293; GFX90A-NEXT: v_alignbit_b32 v1, v1, v3, 16 1294; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1295; GFX90A-NEXT: s_waitcnt vmcnt(0) 1296; GFX90A-NEXT: s_setpc_b64 s[30:31] 1297; 1298; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_2: 1299; GFX940: ; %bb.0: 1300; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1301; GFX940-NEXT: ;;#ASMSTART 1302; GFX940-NEXT: ; def v[0:1] 1303; GFX940-NEXT: ;;#ASMEND 1304; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1305; GFX940-NEXT: v_mov_b32_e32 v4, 0 1306; GFX940-NEXT: ;;#ASMSTART 1307; GFX940-NEXT: ; def v[2:3] 1308; GFX940-NEXT: ;;#ASMEND 1309; GFX940-NEXT: s_nop 0 1310; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 1311; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 1312; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 1313; GFX940-NEXT: s_waitcnt vmcnt(0) 1314; GFX940-NEXT: s_setpc_b64 s[30:31] 1315 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1316 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1317 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 2> 1318 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1319 ret void 1320} 1321 1322define void @v_shuffle_v4i16_v4i16__7_7_7_3(ptr addrspace(1) inreg %ptr) { 1323; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_3: 1324; GFX900: ; %bb.0: 1325; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1326; GFX900-NEXT: ;;#ASMSTART 1327; GFX900-NEXT: ; def v[0:1] 1328; GFX900-NEXT: ;;#ASMEND 1329; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1330; GFX900-NEXT: v_mov_b32_e32 v4, 0 1331; GFX900-NEXT: ;;#ASMSTART 1332; GFX900-NEXT: ; def v[2:3] 1333; GFX900-NEXT: ;;#ASMEND 1334; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 1335; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 1336; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1337; GFX900-NEXT: s_waitcnt vmcnt(0) 1338; GFX900-NEXT: s_setpc_b64 s[30:31] 1339; 1340; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_3: 1341; GFX90A: ; %bb.0: 1342; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1343; GFX90A-NEXT: ;;#ASMSTART 1344; GFX90A-NEXT: ; def v[0:1] 1345; GFX90A-NEXT: ;;#ASMEND 1346; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1347; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1348; GFX90A-NEXT: ;;#ASMSTART 1349; GFX90A-NEXT: ; def v[2:3] 1350; GFX90A-NEXT: ;;#ASMEND 1351; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4 1352; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 1353; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1354; GFX90A-NEXT: s_waitcnt vmcnt(0) 1355; GFX90A-NEXT: s_setpc_b64 s[30:31] 1356; 1357; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_3: 1358; GFX940: ; %bb.0: 1359; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1360; GFX940-NEXT: ;;#ASMSTART 1361; GFX940-NEXT: ; def v[0:1] 1362; GFX940-NEXT: ;;#ASMEND 1363; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1364; GFX940-NEXT: v_mov_b32_e32 v4, 0 1365; GFX940-NEXT: ;;#ASMSTART 1366; GFX940-NEXT: ; def v[2:3] 1367; GFX940-NEXT: ;;#ASMEND 1368; GFX940-NEXT: s_nop 0 1369; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 1370; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 1371; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 1372; GFX940-NEXT: s_waitcnt vmcnt(0) 1373; GFX940-NEXT: s_setpc_b64 s[30:31] 1374 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1375 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1376 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 3> 1377 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1378 ret void 1379} 1380 1381define void @v_shuffle_v4i16_v4i16__7_7_7_4(ptr addrspace(1) inreg %ptr) { 1382; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_4: 1383; GFX900: ; %bb.0: 1384; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1385; GFX900-NEXT: ;;#ASMSTART 1386; GFX900-NEXT: ; def v[0:1] 1387; GFX900-NEXT: ;;#ASMEND 1388; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1389; GFX900-NEXT: v_mov_b32_e32 v3, 0 1390; GFX900-NEXT: v_alignbit_b32 v2, v0, v1, 16 1391; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 1392; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1393; GFX900-NEXT: s_waitcnt vmcnt(0) 1394; GFX900-NEXT: s_setpc_b64 s[30:31] 1395; 1396; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_4: 1397; GFX90A: ; %bb.0: 1398; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1399; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1400; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1401; GFX90A-NEXT: ;;#ASMSTART 1402; GFX90A-NEXT: ; def v[0:1] 1403; GFX90A-NEXT: ;;#ASMEND 1404; GFX90A-NEXT: v_alignbit_b32 v3, v0, v1, 16 1405; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 1406; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1407; GFX90A-NEXT: s_waitcnt vmcnt(0) 1408; GFX90A-NEXT: s_setpc_b64 s[30:31] 1409; 1410; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_4: 1411; GFX940: ; %bb.0: 1412; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1413; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1414; GFX940-NEXT: v_mov_b32_e32 v4, 0 1415; GFX940-NEXT: ;;#ASMSTART 1416; GFX940-NEXT: ; def v[0:1] 1417; GFX940-NEXT: ;;#ASMEND 1418; GFX940-NEXT: s_nop 0 1419; GFX940-NEXT: v_alignbit_b32 v3, v0, v1, 16 1420; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 1421; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1422; GFX940-NEXT: s_waitcnt vmcnt(0) 1423; GFX940-NEXT: s_setpc_b64 s[30:31] 1424 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1425 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1426 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 4> 1427 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1428 ret void 1429} 1430 1431define void @v_shuffle_v4i16_v4i16__7_7_7_5(ptr addrspace(1) inreg %ptr) { 1432; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_5: 1433; GFX900: ; %bb.0: 1434; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1435; GFX900-NEXT: ;;#ASMSTART 1436; GFX900-NEXT: ; def v[0:1] 1437; GFX900-NEXT: ;;#ASMEND 1438; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1439; GFX900-NEXT: v_mov_b32_e32 v3, 0 1440; GFX900-NEXT: v_perm_b32 v2, v0, v1, s4 1441; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 1442; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1443; GFX900-NEXT: s_waitcnt vmcnt(0) 1444; GFX900-NEXT: s_setpc_b64 s[30:31] 1445; 1446; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_5: 1447; GFX90A: ; %bb.0: 1448; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1449; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1450; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1451; GFX90A-NEXT: ;;#ASMSTART 1452; GFX90A-NEXT: ; def v[0:1] 1453; GFX90A-NEXT: ;;#ASMEND 1454; GFX90A-NEXT: v_perm_b32 v3, v0, v1, s4 1455; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 1456; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1457; GFX90A-NEXT: s_waitcnt vmcnt(0) 1458; GFX90A-NEXT: s_setpc_b64 s[30:31] 1459; 1460; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_5: 1461; GFX940: ; %bb.0: 1462; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1463; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1464; GFX940-NEXT: v_mov_b32_e32 v4, 0 1465; GFX940-NEXT: ;;#ASMSTART 1466; GFX940-NEXT: ; def v[0:1] 1467; GFX940-NEXT: ;;#ASMEND 1468; GFX940-NEXT: s_nop 0 1469; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 1470; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 1471; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1472; GFX940-NEXT: s_waitcnt vmcnt(0) 1473; GFX940-NEXT: s_setpc_b64 s[30:31] 1474 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1475 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1476 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 5> 1477 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1478 ret void 1479} 1480 1481define void @v_shuffle_v4i16_v4i16__7_7_7_6(ptr addrspace(1) inreg %ptr) { 1482; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_6: 1483; GFX900: ; %bb.0: 1484; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1485; GFX900-NEXT: ;;#ASMSTART 1486; GFX900-NEXT: ; def v[0:1] 1487; GFX900-NEXT: ;;#ASMEND 1488; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1489; GFX900-NEXT: v_mov_b32_e32 v2, 0 1490; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 1491; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16 1492; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1493; GFX900-NEXT: s_waitcnt vmcnt(0) 1494; GFX900-NEXT: s_setpc_b64 s[30:31] 1495; 1496; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_6: 1497; GFX90A: ; %bb.0: 1498; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1499; GFX90A-NEXT: ;;#ASMSTART 1500; GFX90A-NEXT: ; def v[0:1] 1501; GFX90A-NEXT: ;;#ASMEND 1502; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1503; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1504; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 1505; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16 1506; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1507; GFX90A-NEXT: s_waitcnt vmcnt(0) 1508; GFX90A-NEXT: s_setpc_b64 s[30:31] 1509; 1510; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_6: 1511; GFX940: ; %bb.0: 1512; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1513; GFX940-NEXT: ;;#ASMSTART 1514; GFX940-NEXT: ; def v[0:1] 1515; GFX940-NEXT: ;;#ASMEND 1516; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1517; GFX940-NEXT: v_mov_b32_e32 v2, 0 1518; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 1519; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 1520; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1521; GFX940-NEXT: s_waitcnt vmcnt(0) 1522; GFX940-NEXT: s_setpc_b64 s[30:31] 1523 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1524 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1525 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 6> 1526 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1527 ret void 1528} 1529 1530define void @v_shuffle_v4i16_v4i16__7_7_7_7(ptr addrspace(1) inreg %ptr) { 1531; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_7: 1532; GFX900: ; %bb.0: 1533; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1534; GFX900-NEXT: ;;#ASMSTART 1535; GFX900-NEXT: ; def v[0:1] 1536; GFX900-NEXT: ;;#ASMEND 1537; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1538; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 1539; GFX900-NEXT: v_mov_b32_e32 v2, 0 1540; GFX900-NEXT: v_mov_b32_e32 v1, v0 1541; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1542; GFX900-NEXT: s_waitcnt vmcnt(0) 1543; GFX900-NEXT: s_setpc_b64 s[30:31] 1544; 1545; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_7: 1546; GFX90A: ; %bb.0: 1547; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1548; GFX90A-NEXT: ;;#ASMSTART 1549; GFX90A-NEXT: ; def v[0:1] 1550; GFX90A-NEXT: ;;#ASMEND 1551; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1552; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 1553; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1554; GFX90A-NEXT: v_mov_b32_e32 v1, v0 1555; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1556; GFX90A-NEXT: s_waitcnt vmcnt(0) 1557; GFX90A-NEXT: s_setpc_b64 s[30:31] 1558; 1559; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_7: 1560; GFX940: ; %bb.0: 1561; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1562; GFX940-NEXT: ;;#ASMSTART 1563; GFX940-NEXT: ; def v[0:1] 1564; GFX940-NEXT: ;;#ASMEND 1565; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1566; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 1567; GFX940-NEXT: v_mov_b32_e32 v2, 0 1568; GFX940-NEXT: v_mov_b32_e32 v1, v0 1569; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1570; GFX940-NEXT: s_waitcnt vmcnt(0) 1571; GFX940-NEXT: s_setpc_b64 s[30:31] 1572 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1573 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1574 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1575 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1576 ret void 1577} 1578 1579define void @v_shuffle_v4i16_v4i16__u_0_0_0(ptr addrspace(1) inreg %ptr) { 1580; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_0_0_0: 1581; GFX900: ; %bb.0: 1582; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1583; GFX900-NEXT: ;;#ASMSTART 1584; GFX900-NEXT: ; def v[0:1] 1585; GFX900-NEXT: ;;#ASMEND 1586; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1587; GFX900-NEXT: v_mov_b32_e32 v2, 0 1588; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1589; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1590; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1591; GFX900-NEXT: s_waitcnt vmcnt(0) 1592; GFX900-NEXT: s_setpc_b64 s[30:31] 1593; 1594; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_0_0_0: 1595; GFX90A: ; %bb.0: 1596; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1597; GFX90A-NEXT: ;;#ASMSTART 1598; GFX90A-NEXT: ; def v[0:1] 1599; GFX90A-NEXT: ;;#ASMEND 1600; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1601; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1602; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1603; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1604; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1605; GFX90A-NEXT: s_waitcnt vmcnt(0) 1606; GFX90A-NEXT: s_setpc_b64 s[30:31] 1607; 1608; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_0_0_0: 1609; GFX940: ; %bb.0: 1610; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1611; GFX940-NEXT: ;;#ASMSTART 1612; GFX940-NEXT: ; def v[0:1] 1613; GFX940-NEXT: ;;#ASMEND 1614; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1615; GFX940-NEXT: v_mov_b32_e32 v2, 0 1616; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1617; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1618; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1619; GFX940-NEXT: s_waitcnt vmcnt(0) 1620; GFX940-NEXT: s_setpc_b64 s[30:31] 1621 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1622 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0> 1623 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1624 ret void 1625} 1626 1627define void @v_shuffle_v4i16_v4i16__0_0_0_0(ptr addrspace(1) inreg %ptr) { 1628; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_0_0_0: 1629; GFX900: ; %bb.0: 1630; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1631; GFX900-NEXT: ;;#ASMSTART 1632; GFX900-NEXT: ; def v[0:1] 1633; GFX900-NEXT: ;;#ASMEND 1634; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1635; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 1636; GFX900-NEXT: v_mov_b32_e32 v2, 0 1637; GFX900-NEXT: v_mov_b32_e32 v1, v0 1638; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1639; GFX900-NEXT: s_waitcnt vmcnt(0) 1640; GFX900-NEXT: s_setpc_b64 s[30:31] 1641; 1642; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_0_0_0: 1643; GFX90A: ; %bb.0: 1644; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1645; GFX90A-NEXT: ;;#ASMSTART 1646; GFX90A-NEXT: ; def v[0:1] 1647; GFX90A-NEXT: ;;#ASMEND 1648; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1649; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 1650; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1651; GFX90A-NEXT: v_mov_b32_e32 v1, v0 1652; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1653; GFX90A-NEXT: s_waitcnt vmcnt(0) 1654; GFX90A-NEXT: s_setpc_b64 s[30:31] 1655; 1656; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_0_0_0: 1657; GFX940: ; %bb.0: 1658; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1659; GFX940-NEXT: ;;#ASMSTART 1660; GFX940-NEXT: ; def v[0:1] 1661; GFX940-NEXT: ;;#ASMEND 1662; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1663; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 1664; GFX940-NEXT: v_mov_b32_e32 v2, 0 1665; GFX940-NEXT: v_mov_b32_e32 v1, v0 1666; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1667; GFX940-NEXT: s_waitcnt vmcnt(0) 1668; GFX940-NEXT: s_setpc_b64 s[30:31] 1669 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1670 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> zeroinitializer 1671 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1672 ret void 1673} 1674 1675define void @v_shuffle_v4i16_v4i16__1_0_0_0(ptr addrspace(1) inreg %ptr) { 1676; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_0_0_0: 1677; GFX900: ; %bb.0: 1678; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1679; GFX900-NEXT: ;;#ASMSTART 1680; GFX900-NEXT: ; def v[0:1] 1681; GFX900-NEXT: ;;#ASMEND 1682; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1683; GFX900-NEXT: v_mov_b32_e32 v2, 0 1684; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1685; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16 1686; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1687; GFX900-NEXT: s_waitcnt vmcnt(0) 1688; GFX900-NEXT: s_setpc_b64 s[30:31] 1689; 1690; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_0_0_0: 1691; GFX90A: ; %bb.0: 1692; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1693; GFX90A-NEXT: ;;#ASMSTART 1694; GFX90A-NEXT: ; def v[0:1] 1695; GFX90A-NEXT: ;;#ASMEND 1696; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1697; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1698; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1699; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16 1700; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1701; GFX90A-NEXT: s_waitcnt vmcnt(0) 1702; GFX90A-NEXT: s_setpc_b64 s[30:31] 1703; 1704; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_0_0_0: 1705; GFX940: ; %bb.0: 1706; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1707; GFX940-NEXT: ;;#ASMSTART 1708; GFX940-NEXT: ; def v[0:1] 1709; GFX940-NEXT: ;;#ASMEND 1710; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1711; GFX940-NEXT: v_mov_b32_e32 v2, 0 1712; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1713; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 1714; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1715; GFX940-NEXT: s_waitcnt vmcnt(0) 1716; GFX940-NEXT: s_setpc_b64 s[30:31] 1717 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1718 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 1719 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1720 ret void 1721} 1722 1723define void @v_shuffle_v4i16_v4i16__2_0_0_0(ptr addrspace(1) inreg %ptr) { 1724; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_0_0_0: 1725; GFX900: ; %bb.0: 1726; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1727; GFX900-NEXT: ;;#ASMSTART 1728; GFX900-NEXT: ; def v[0:1] 1729; GFX900-NEXT: ;;#ASMEND 1730; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1731; GFX900-NEXT: v_mov_b32_e32 v3, 0 1732; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 1733; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 1734; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1735; GFX900-NEXT: s_waitcnt vmcnt(0) 1736; GFX900-NEXT: s_setpc_b64 s[30:31] 1737; 1738; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_0_0_0: 1739; GFX90A: ; %bb.0: 1740; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1741; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1742; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1743; GFX90A-NEXT: ;;#ASMSTART 1744; GFX90A-NEXT: ; def v[0:1] 1745; GFX90A-NEXT: ;;#ASMEND 1746; GFX90A-NEXT: v_perm_b32 v2, v0, v1, s4 1747; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 1748; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1749; GFX90A-NEXT: s_waitcnt vmcnt(0) 1750; GFX90A-NEXT: s_setpc_b64 s[30:31] 1751; 1752; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_0_0_0: 1753; GFX940: ; %bb.0: 1754; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1755; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1756; GFX940-NEXT: v_mov_b32_e32 v4, 0 1757; GFX940-NEXT: ;;#ASMSTART 1758; GFX940-NEXT: ; def v[0:1] 1759; GFX940-NEXT: ;;#ASMEND 1760; GFX940-NEXT: s_nop 0 1761; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 1762; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 1763; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1764; GFX940-NEXT: s_waitcnt vmcnt(0) 1765; GFX940-NEXT: s_setpc_b64 s[30:31] 1766 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1767 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0> 1768 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1769 ret void 1770} 1771 1772define void @v_shuffle_v4i16_v4i16__3_0_0_0(ptr addrspace(1) inreg %ptr) { 1773; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_0_0_0: 1774; GFX900: ; %bb.0: 1775; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1776; GFX900-NEXT: ;;#ASMSTART 1777; GFX900-NEXT: ; def v[0:1] 1778; GFX900-NEXT: ;;#ASMEND 1779; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1780; GFX900-NEXT: v_mov_b32_e32 v3, 0 1781; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 1782; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 1783; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1784; GFX900-NEXT: s_waitcnt vmcnt(0) 1785; GFX900-NEXT: s_setpc_b64 s[30:31] 1786; 1787; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_0_0_0: 1788; GFX90A: ; %bb.0: 1789; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1790; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1791; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1792; GFX90A-NEXT: ;;#ASMSTART 1793; GFX90A-NEXT: ; def v[0:1] 1794; GFX90A-NEXT: ;;#ASMEND 1795; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 1796; GFX90A-NEXT: v_alignbit_b32 v2, v0, v1, 16 1797; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1798; GFX90A-NEXT: s_waitcnt vmcnt(0) 1799; GFX90A-NEXT: s_setpc_b64 s[30:31] 1800; 1801; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_0_0_0: 1802; GFX940: ; %bb.0: 1803; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1804; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1805; GFX940-NEXT: v_mov_b32_e32 v4, 0 1806; GFX940-NEXT: ;;#ASMSTART 1807; GFX940-NEXT: ; def v[0:1] 1808; GFX940-NEXT: ;;#ASMEND 1809; GFX940-NEXT: s_nop 0 1810; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 1811; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 1812; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1813; GFX940-NEXT: s_waitcnt vmcnt(0) 1814; GFX940-NEXT: s_setpc_b64 s[30:31] 1815 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1816 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 1817 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1818 ret void 1819} 1820 1821define void @v_shuffle_v4i16_v4i16__4_0_0_0(ptr addrspace(1) inreg %ptr) { 1822; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_0_0_0: 1823; GFX900: ; %bb.0: 1824; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1825; GFX900-NEXT: ;;#ASMSTART 1826; GFX900-NEXT: ; def v[0:1] 1827; GFX900-NEXT: ;;#ASMEND 1828; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1829; GFX900-NEXT: v_mov_b32_e32 v2, 0 1830; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1831; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1832; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1833; GFX900-NEXT: s_waitcnt vmcnt(0) 1834; GFX900-NEXT: s_setpc_b64 s[30:31] 1835; 1836; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_0_0_0: 1837; GFX90A: ; %bb.0: 1838; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1839; GFX90A-NEXT: ;;#ASMSTART 1840; GFX90A-NEXT: ; def v[0:1] 1841; GFX90A-NEXT: ;;#ASMEND 1842; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1843; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1844; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1845; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1846; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 1847; GFX90A-NEXT: s_waitcnt vmcnt(0) 1848; GFX90A-NEXT: s_setpc_b64 s[30:31] 1849; 1850; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_0_0_0: 1851; GFX940: ; %bb.0: 1852; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1853; GFX940-NEXT: ;;#ASMSTART 1854; GFX940-NEXT: ; def v[0:1] 1855; GFX940-NEXT: ;;#ASMEND 1856; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1857; GFX940-NEXT: v_mov_b32_e32 v2, 0 1858; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1859; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1860; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 1861; GFX940-NEXT: s_waitcnt vmcnt(0) 1862; GFX940-NEXT: s_setpc_b64 s[30:31] 1863 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1864 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 0, i32 0, i32 0> 1865 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1866 ret void 1867} 1868 1869define void @v_shuffle_v4i16_v4i16__5_0_0_0(ptr addrspace(1) inreg %ptr) { 1870; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_0_0_0: 1871; GFX900: ; %bb.0: 1872; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1873; GFX900-NEXT: ;;#ASMSTART 1874; GFX900-NEXT: ; def v[0:1] 1875; GFX900-NEXT: ;;#ASMEND 1876; GFX900-NEXT: ;;#ASMSTART 1877; GFX900-NEXT: ; def v[1:2] 1878; GFX900-NEXT: ;;#ASMEND 1879; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1880; GFX900-NEXT: v_mov_b32_e32 v3, 0 1881; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 1882; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 1883; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1884; GFX900-NEXT: s_waitcnt vmcnt(0) 1885; GFX900-NEXT: s_setpc_b64 s[30:31] 1886; 1887; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_0_0_0: 1888; GFX90A: ; %bb.0: 1889; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1890; GFX90A-NEXT: ;;#ASMSTART 1891; GFX90A-NEXT: ; def v[0:1] 1892; GFX90A-NEXT: ;;#ASMEND 1893; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1894; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1895; GFX90A-NEXT: ;;#ASMSTART 1896; GFX90A-NEXT: ; def v[2:3] 1897; GFX90A-NEXT: ;;#ASMEND 1898; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1899; GFX90A-NEXT: v_alignbit_b32 v0, v0, v2, 16 1900; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 1901; GFX90A-NEXT: s_waitcnt vmcnt(0) 1902; GFX90A-NEXT: s_setpc_b64 s[30:31] 1903; 1904; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_0_0_0: 1905; GFX940: ; %bb.0: 1906; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1907; GFX940-NEXT: ;;#ASMSTART 1908; GFX940-NEXT: ; def v[0:1] 1909; GFX940-NEXT: ;;#ASMEND 1910; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1911; GFX940-NEXT: v_mov_b32_e32 v4, 0 1912; GFX940-NEXT: ;;#ASMSTART 1913; GFX940-NEXT: ; def v[2:3] 1914; GFX940-NEXT: ;;#ASMEND 1915; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1916; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 1917; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 1918; GFX940-NEXT: s_waitcnt vmcnt(0) 1919; GFX940-NEXT: s_setpc_b64 s[30:31] 1920 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1921 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1922 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0> 1923 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1924 ret void 1925} 1926 1927define void @v_shuffle_v4i16_v4i16__6_0_0_0(ptr addrspace(1) inreg %ptr) { 1928; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_0_0_0: 1929; GFX900: ; %bb.0: 1930; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1931; GFX900-NEXT: ;;#ASMSTART 1932; GFX900-NEXT: ; def v[0:1] 1933; GFX900-NEXT: ;;#ASMEND 1934; GFX900-NEXT: ;;#ASMSTART 1935; GFX900-NEXT: ; def v[1:2] 1936; GFX900-NEXT: ;;#ASMEND 1937; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1938; GFX900-NEXT: v_mov_b32_e32 v3, 0 1939; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 1940; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 1941; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 1942; GFX900-NEXT: s_waitcnt vmcnt(0) 1943; GFX900-NEXT: s_setpc_b64 s[30:31] 1944; 1945; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_0_0_0: 1946; GFX90A: ; %bb.0: 1947; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1948; GFX90A-NEXT: ;;#ASMSTART 1949; GFX90A-NEXT: ; def v[2:3] 1950; GFX90A-NEXT: ;;#ASMEND 1951; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1952; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1953; GFX90A-NEXT: ;;#ASMSTART 1954; GFX90A-NEXT: ; def v[0:1] 1955; GFX90A-NEXT: ;;#ASMEND 1956; GFX90A-NEXT: v_perm_b32 v2, v0, v3, s4 1957; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 1958; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 1959; GFX90A-NEXT: s_waitcnt vmcnt(0) 1960; GFX90A-NEXT: s_setpc_b64 s[30:31] 1961; 1962; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_0_0_0: 1963; GFX940: ; %bb.0: 1964; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1965; GFX940-NEXT: ;;#ASMSTART 1966; GFX940-NEXT: ; def v[2:3] 1967; GFX940-NEXT: ;;#ASMEND 1968; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1969; GFX940-NEXT: v_mov_b32_e32 v4, 0 1970; GFX940-NEXT: ;;#ASMSTART 1971; GFX940-NEXT: ; def v[0:1] 1972; GFX940-NEXT: ;;#ASMEND 1973; GFX940-NEXT: s_nop 0 1974; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 1975; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 1976; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 1977; GFX940-NEXT: s_waitcnt vmcnt(0) 1978; GFX940-NEXT: s_setpc_b64 s[30:31] 1979 %vec0 = call <4 x i16> asm "; def $0", "=v"() 1980 %vec1 = call <4 x i16> asm "; def $0", "=v"() 1981 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 0, i32 0, i32 0> 1982 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 1983 ret void 1984} 1985 1986define void @v_shuffle_v4i16_v4i16__7_0_0_0(ptr addrspace(1) inreg %ptr) { 1987; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_0_0: 1988; GFX900: ; %bb.0: 1989; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1990; GFX900-NEXT: ;;#ASMSTART 1991; GFX900-NEXT: ; def v[0:1] 1992; GFX900-NEXT: ;;#ASMEND 1993; GFX900-NEXT: ;;#ASMSTART 1994; GFX900-NEXT: ; def v[1:2] 1995; GFX900-NEXT: ;;#ASMEND 1996; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1997; GFX900-NEXT: v_mov_b32_e32 v3, 0 1998; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1999; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 2000; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2001; GFX900-NEXT: s_waitcnt vmcnt(0) 2002; GFX900-NEXT: s_setpc_b64 s[30:31] 2003; 2004; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_0_0: 2005; GFX90A: ; %bb.0: 2006; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2007; GFX90A-NEXT: ;;#ASMSTART 2008; GFX90A-NEXT: ; def v[0:1] 2009; GFX90A-NEXT: ;;#ASMEND 2010; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2011; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2012; GFX90A-NEXT: ;;#ASMSTART 2013; GFX90A-NEXT: ; def v[2:3] 2014; GFX90A-NEXT: ;;#ASMEND 2015; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2016; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 2017; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2018; GFX90A-NEXT: s_waitcnt vmcnt(0) 2019; GFX90A-NEXT: s_setpc_b64 s[30:31] 2020; 2021; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_0_0: 2022; GFX940: ; %bb.0: 2023; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2024; GFX940-NEXT: ;;#ASMSTART 2025; GFX940-NEXT: ; def v[0:1] 2026; GFX940-NEXT: ;;#ASMEND 2027; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2028; GFX940-NEXT: v_mov_b32_e32 v4, 0 2029; GFX940-NEXT: ;;#ASMSTART 2030; GFX940-NEXT: ; def v[2:3] 2031; GFX940-NEXT: ;;#ASMEND 2032; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2033; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 2034; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2035; GFX940-NEXT: s_waitcnt vmcnt(0) 2036; GFX940-NEXT: s_setpc_b64 s[30:31] 2037 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2038 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2039 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 0, i32 0> 2040 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2041 ret void 2042} 2043 2044define void @v_shuffle_v4i16_v4i16__7_u_0_0(ptr addrspace(1) inreg %ptr) { 2045; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_0_0: 2046; GFX900: ; %bb.0: 2047; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2048; GFX900-NEXT: ;;#ASMSTART 2049; GFX900-NEXT: ; def v[0:1] 2050; GFX900-NEXT: ;;#ASMEND 2051; GFX900-NEXT: ;;#ASMSTART 2052; GFX900-NEXT: ; def v[1:2] 2053; GFX900-NEXT: ;;#ASMEND 2054; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2055; GFX900-NEXT: v_mov_b32_e32 v3, 0 2056; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2057; GFX900-NEXT: v_alignbit_b32 v0, s4, v2, 16 2058; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2059; GFX900-NEXT: s_waitcnt vmcnt(0) 2060; GFX900-NEXT: s_setpc_b64 s[30:31] 2061; 2062; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_0_0: 2063; GFX90A: ; %bb.0: 2064; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2065; GFX90A-NEXT: ;;#ASMSTART 2066; GFX90A-NEXT: ; def v[0:1] 2067; GFX90A-NEXT: ;;#ASMEND 2068; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2069; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2070; GFX90A-NEXT: ;;#ASMSTART 2071; GFX90A-NEXT: ; def v[2:3] 2072; GFX90A-NEXT: ;;#ASMEND 2073; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2074; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16 2075; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2076; GFX90A-NEXT: s_waitcnt vmcnt(0) 2077; GFX90A-NEXT: s_setpc_b64 s[30:31] 2078; 2079; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_0_0: 2080; GFX940: ; %bb.0: 2081; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2082; GFX940-NEXT: ;;#ASMSTART 2083; GFX940-NEXT: ; def v[0:1] 2084; GFX940-NEXT: ;;#ASMEND 2085; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2086; GFX940-NEXT: v_mov_b32_e32 v4, 0 2087; GFX940-NEXT: ;;#ASMSTART 2088; GFX940-NEXT: ; def v[2:3] 2089; GFX940-NEXT: ;;#ASMEND 2090; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2091; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 2092; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2093; GFX940-NEXT: s_waitcnt vmcnt(0) 2094; GFX940-NEXT: s_setpc_b64 s[30:31] 2095 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2096 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2097 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 0, i32 0> 2098 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2099 ret void 2100} 2101 2102define void @v_shuffle_v4i16_v4i16__7_1_0_0(ptr addrspace(1) inreg %ptr) { 2103; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_0_0: 2104; GFX900: ; %bb.0: 2105; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2106; GFX900-NEXT: ;;#ASMSTART 2107; GFX900-NEXT: ; def v[0:1] 2108; GFX900-NEXT: ;;#ASMEND 2109; GFX900-NEXT: ;;#ASMSTART 2110; GFX900-NEXT: ; def v[1:2] 2111; GFX900-NEXT: ;;#ASMEND 2112; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2113; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 2114; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2115; GFX900-NEXT: v_mov_b32_e32 v3, 0 2116; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 2117; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 2118; GFX900-NEXT: s_waitcnt vmcnt(0) 2119; GFX900-NEXT: s_setpc_b64 s[30:31] 2120; 2121; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_0_0: 2122; GFX90A: ; %bb.0: 2123; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2124; GFX90A-NEXT: ;;#ASMSTART 2125; GFX90A-NEXT: ; def v[2:3] 2126; GFX90A-NEXT: ;;#ASMEND 2127; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2128; GFX90A-NEXT: ;;#ASMSTART 2129; GFX90A-NEXT: ; def v[0:1] 2130; GFX90A-NEXT: ;;#ASMEND 2131; GFX90A-NEXT: v_perm_b32 v2, v0, v3, s4 2132; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2133; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2134; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 2135; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2136; GFX90A-NEXT: s_waitcnt vmcnt(0) 2137; GFX90A-NEXT: s_setpc_b64 s[30:31] 2138; 2139; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_0_0: 2140; GFX940: ; %bb.0: 2141; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2142; GFX940-NEXT: ;;#ASMSTART 2143; GFX940-NEXT: ; def v[2:3] 2144; GFX940-NEXT: ;;#ASMEND 2145; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2146; GFX940-NEXT: ;;#ASMSTART 2147; GFX940-NEXT: ; def v[0:1] 2148; GFX940-NEXT: ;;#ASMEND 2149; GFX940-NEXT: v_mov_b32_e32 v4, 0 2150; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 2151; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2152; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 2153; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2154; GFX940-NEXT: s_waitcnt vmcnt(0) 2155; GFX940-NEXT: s_setpc_b64 s[30:31] 2156 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2157 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2158 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 0, i32 0> 2159 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2160 ret void 2161} 2162 2163define void @v_shuffle_v4i16_v4i16__7_2_0_0(ptr addrspace(1) inreg %ptr) { 2164; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_0_0: 2165; GFX900: ; %bb.0: 2166; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2167; GFX900-NEXT: ;;#ASMSTART 2168; GFX900-NEXT: ; def v[0:1] 2169; GFX900-NEXT: ;;#ASMEND 2170; GFX900-NEXT: ;;#ASMSTART 2171; GFX900-NEXT: ; def v[2:3] 2172; GFX900-NEXT: ;;#ASMEND 2173; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2174; GFX900-NEXT: v_mov_b32_e32 v4, 0 2175; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 2176; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 2177; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 2178; GFX900-NEXT: s_waitcnt vmcnt(0) 2179; GFX900-NEXT: s_setpc_b64 s[30:31] 2180; 2181; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_0_0: 2182; GFX90A: ; %bb.0: 2183; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2184; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2185; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2186; GFX90A-NEXT: ;;#ASMSTART 2187; GFX90A-NEXT: ; def v[0:1] 2188; GFX90A-NEXT: ;;#ASMEND 2189; GFX90A-NEXT: ;;#ASMSTART 2190; GFX90A-NEXT: ; def v[2:3] 2191; GFX90A-NEXT: ;;#ASMEND 2192; GFX90A-NEXT: v_perm_b32 v5, v0, v0, s4 2193; GFX90A-NEXT: v_alignbit_b32 v4, v1, v3, 16 2194; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] 2195; GFX90A-NEXT: s_waitcnt vmcnt(0) 2196; GFX90A-NEXT: s_setpc_b64 s[30:31] 2197; 2198; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_0_0: 2199; GFX940: ; %bb.0: 2200; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2201; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2202; GFX940-NEXT: v_mov_b32_e32 v6, 0 2203; GFX940-NEXT: ;;#ASMSTART 2204; GFX940-NEXT: ; def v[0:1] 2205; GFX940-NEXT: ;;#ASMEND 2206; GFX940-NEXT: ;;#ASMSTART 2207; GFX940-NEXT: ; def v[2:3] 2208; GFX940-NEXT: ;;#ASMEND 2209; GFX940-NEXT: s_nop 0 2210; GFX940-NEXT: v_perm_b32 v5, v0, v0, s2 2211; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 2212; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 2213; GFX940-NEXT: s_waitcnt vmcnt(0) 2214; GFX940-NEXT: s_setpc_b64 s[30:31] 2215 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2216 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2217 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 0, i32 0> 2218 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2219 ret void 2220} 2221 2222define void @v_shuffle_v4i16_v4i16__7_3_0_0(ptr addrspace(1) inreg %ptr) { 2223; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_0_0: 2224; GFX900: ; %bb.0: 2225; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2226; GFX900-NEXT: ;;#ASMSTART 2227; GFX900-NEXT: ; def v[0:1] 2228; GFX900-NEXT: ;;#ASMEND 2229; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2230; GFX900-NEXT: ;;#ASMSTART 2231; GFX900-NEXT: ; def v[2:3] 2232; GFX900-NEXT: ;;#ASMEND 2233; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 2234; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2235; GFX900-NEXT: v_mov_b32_e32 v4, 0 2236; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 2237; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 2238; GFX900-NEXT: s_waitcnt vmcnt(0) 2239; GFX900-NEXT: s_setpc_b64 s[30:31] 2240; 2241; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_0_0: 2242; GFX90A: ; %bb.0: 2243; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2244; GFX90A-NEXT: ;;#ASMSTART 2245; GFX90A-NEXT: ; def v[2:3] 2246; GFX90A-NEXT: ;;#ASMEND 2247; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2248; GFX90A-NEXT: ;;#ASMSTART 2249; GFX90A-NEXT: ; def v[0:1] 2250; GFX90A-NEXT: ;;#ASMEND 2251; GFX90A-NEXT: v_perm_b32 v2, v1, v3, s4 2252; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2253; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2254; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 2255; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2256; GFX90A-NEXT: s_waitcnt vmcnt(0) 2257; GFX90A-NEXT: s_setpc_b64 s[30:31] 2258; 2259; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_0_0: 2260; GFX940: ; %bb.0: 2261; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2262; GFX940-NEXT: ;;#ASMSTART 2263; GFX940-NEXT: ; def v[2:3] 2264; GFX940-NEXT: ;;#ASMEND 2265; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2266; GFX940-NEXT: ;;#ASMSTART 2267; GFX940-NEXT: ; def v[0:1] 2268; GFX940-NEXT: ;;#ASMEND 2269; GFX940-NEXT: v_mov_b32_e32 v4, 0 2270; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 2271; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2272; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 2273; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2274; GFX940-NEXT: s_waitcnt vmcnt(0) 2275; GFX940-NEXT: s_setpc_b64 s[30:31] 2276 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2277 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2278 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 0, i32 0> 2279 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2280 ret void 2281} 2282 2283define void @v_shuffle_v4i16_v4i16__7_4_0_0(ptr addrspace(1) inreg %ptr) { 2284; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_0_0: 2285; GFX900: ; %bb.0: 2286; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2287; GFX900-NEXT: ;;#ASMSTART 2288; GFX900-NEXT: ; def v[0:1] 2289; GFX900-NEXT: ;;#ASMEND 2290; GFX900-NEXT: ;;#ASMSTART 2291; GFX900-NEXT: ; def v[1:2] 2292; GFX900-NEXT: ;;#ASMEND 2293; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2294; GFX900-NEXT: v_mov_b32_e32 v4, 0 2295; GFX900-NEXT: v_perm_b32 v3, v0, v0, s4 2296; GFX900-NEXT: v_alignbit_b32 v2, v1, v2, 16 2297; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2298; GFX900-NEXT: s_waitcnt vmcnt(0) 2299; GFX900-NEXT: s_setpc_b64 s[30:31] 2300; 2301; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_0_0: 2302; GFX90A: ; %bb.0: 2303; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2304; GFX90A-NEXT: ;;#ASMSTART 2305; GFX90A-NEXT: ; def v[0:1] 2306; GFX90A-NEXT: ;;#ASMEND 2307; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2308; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2309; GFX90A-NEXT: ;;#ASMSTART 2310; GFX90A-NEXT: ; def v[2:3] 2311; GFX90A-NEXT: ;;#ASMEND 2312; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2313; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16 2314; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2315; GFX90A-NEXT: s_waitcnt vmcnt(0) 2316; GFX90A-NEXT: s_setpc_b64 s[30:31] 2317; 2318; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_0_0: 2319; GFX940: ; %bb.0: 2320; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2321; GFX940-NEXT: ;;#ASMSTART 2322; GFX940-NEXT: ; def v[0:1] 2323; GFX940-NEXT: ;;#ASMEND 2324; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2325; GFX940-NEXT: v_mov_b32_e32 v4, 0 2326; GFX940-NEXT: ;;#ASMSTART 2327; GFX940-NEXT: ; def v[2:3] 2328; GFX940-NEXT: ;;#ASMEND 2329; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2330; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 2331; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2332; GFX940-NEXT: s_waitcnt vmcnt(0) 2333; GFX940-NEXT: s_setpc_b64 s[30:31] 2334 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2335 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2336 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 0, i32 0> 2337 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2338 ret void 2339} 2340 2341define void @v_shuffle_v4i16_v4i16__7_5_0_0(ptr addrspace(1) inreg %ptr) { 2342; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_0_0: 2343; GFX900: ; %bb.0: 2344; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2345; GFX900-NEXT: ;;#ASMSTART 2346; GFX900-NEXT: ; def v[0:1] 2347; GFX900-NEXT: ;;#ASMEND 2348; GFX900-NEXT: ;;#ASMSTART 2349; GFX900-NEXT: ; def v[1:2] 2350; GFX900-NEXT: ;;#ASMEND 2351; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2352; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 2353; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2354; GFX900-NEXT: v_mov_b32_e32 v3, 0 2355; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 2356; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 2357; GFX900-NEXT: s_waitcnt vmcnt(0) 2358; GFX900-NEXT: s_setpc_b64 s[30:31] 2359; 2360; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_0_0: 2361; GFX90A: ; %bb.0: 2362; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2363; GFX90A-NEXT: ;;#ASMSTART 2364; GFX90A-NEXT: ; def v[2:3] 2365; GFX90A-NEXT: ;;#ASMEND 2366; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2367; GFX90A-NEXT: v_perm_b32 v2, v2, v3, s4 2368; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2369; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2370; GFX90A-NEXT: ;;#ASMSTART 2371; GFX90A-NEXT: ; def v[0:1] 2372; GFX90A-NEXT: ;;#ASMEND 2373; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 2374; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2375; GFX90A-NEXT: s_waitcnt vmcnt(0) 2376; GFX90A-NEXT: s_setpc_b64 s[30:31] 2377; 2378; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_0_0: 2379; GFX940: ; %bb.0: 2380; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2381; GFX940-NEXT: ;;#ASMSTART 2382; GFX940-NEXT: ; def v[2:3] 2383; GFX940-NEXT: ;;#ASMEND 2384; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2385; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 2386; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2387; GFX940-NEXT: v_mov_b32_e32 v4, 0 2388; GFX940-NEXT: ;;#ASMSTART 2389; GFX940-NEXT: ; def v[0:1] 2390; GFX940-NEXT: ;;#ASMEND 2391; GFX940-NEXT: s_nop 0 2392; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 2393; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2394; GFX940-NEXT: s_waitcnt vmcnt(0) 2395; GFX940-NEXT: s_setpc_b64 s[30:31] 2396 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2397 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2398 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 0, i32 0> 2399 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2400 ret void 2401} 2402 2403define void @v_shuffle_v4i16_v4i16__7_6_0_0(ptr addrspace(1) inreg %ptr) { 2404; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_0_0: 2405; GFX900: ; %bb.0: 2406; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2407; GFX900-NEXT: ;;#ASMSTART 2408; GFX900-NEXT: ; def v[0:1] 2409; GFX900-NEXT: ;;#ASMEND 2410; GFX900-NEXT: ;;#ASMSTART 2411; GFX900-NEXT: ; def v[1:2] 2412; GFX900-NEXT: ;;#ASMEND 2413; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2414; GFX900-NEXT: v_mov_b32_e32 v3, 0 2415; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2416; GFX900-NEXT: v_alignbit_b32 v0, v2, v2, 16 2417; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2418; GFX900-NEXT: s_waitcnt vmcnt(0) 2419; GFX900-NEXT: s_setpc_b64 s[30:31] 2420; 2421; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_0_0: 2422; GFX90A: ; %bb.0: 2423; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2424; GFX90A-NEXT: ;;#ASMSTART 2425; GFX90A-NEXT: ; def v[0:1] 2426; GFX90A-NEXT: ;;#ASMEND 2427; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2428; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2429; GFX90A-NEXT: ;;#ASMSTART 2430; GFX90A-NEXT: ; def v[2:3] 2431; GFX90A-NEXT: ;;#ASMEND 2432; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2433; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16 2434; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2435; GFX90A-NEXT: s_waitcnt vmcnt(0) 2436; GFX90A-NEXT: s_setpc_b64 s[30:31] 2437; 2438; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_0_0: 2439; GFX940: ; %bb.0: 2440; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2441; GFX940-NEXT: ;;#ASMSTART 2442; GFX940-NEXT: ; def v[0:1] 2443; GFX940-NEXT: ;;#ASMEND 2444; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2445; GFX940-NEXT: v_mov_b32_e32 v4, 0 2446; GFX940-NEXT: ;;#ASMSTART 2447; GFX940-NEXT: ; def v[2:3] 2448; GFX940-NEXT: ;;#ASMEND 2449; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2450; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 2451; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2452; GFX940-NEXT: s_waitcnt vmcnt(0) 2453; GFX940-NEXT: s_setpc_b64 s[30:31] 2454 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2455 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2456 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 0, i32 0> 2457 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2458 ret void 2459} 2460 2461define void @v_shuffle_v4i16_v4i16__7_7_0_0(ptr addrspace(1) inreg %ptr) { 2462; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_0: 2463; GFX900: ; %bb.0: 2464; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2465; GFX900-NEXT: ;;#ASMSTART 2466; GFX900-NEXT: ; def v[0:1] 2467; GFX900-NEXT: ;;#ASMEND 2468; GFX900-NEXT: ;;#ASMSTART 2469; GFX900-NEXT: ; def v[1:2] 2470; GFX900-NEXT: ;;#ASMEND 2471; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2472; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2473; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2474; GFX900-NEXT: v_mov_b32_e32 v3, 0 2475; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 2476; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2477; GFX900-NEXT: s_waitcnt vmcnt(0) 2478; GFX900-NEXT: s_setpc_b64 s[30:31] 2479; 2480; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_0: 2481; GFX90A: ; %bb.0: 2482; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2483; GFX90A-NEXT: ;;#ASMSTART 2484; GFX90A-NEXT: ; def v[0:1] 2485; GFX90A-NEXT: ;;#ASMEND 2486; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2487; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2488; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2489; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2490; GFX90A-NEXT: ;;#ASMSTART 2491; GFX90A-NEXT: ; def v[2:3] 2492; GFX90A-NEXT: ;;#ASMEND 2493; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2494; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2495; GFX90A-NEXT: s_waitcnt vmcnt(0) 2496; GFX90A-NEXT: s_setpc_b64 s[30:31] 2497; 2498; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_0: 2499; GFX940: ; %bb.0: 2500; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2501; GFX940-NEXT: ;;#ASMSTART 2502; GFX940-NEXT: ; def v[0:1] 2503; GFX940-NEXT: ;;#ASMEND 2504; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2505; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2506; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2507; GFX940-NEXT: v_mov_b32_e32 v4, 0 2508; GFX940-NEXT: ;;#ASMSTART 2509; GFX940-NEXT: ; def v[2:3] 2510; GFX940-NEXT: ;;#ASMEND 2511; GFX940-NEXT: s_nop 0 2512; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 2513; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2514; GFX940-NEXT: s_waitcnt vmcnt(0) 2515; GFX940-NEXT: s_setpc_b64 s[30:31] 2516 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2517 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2518 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 0> 2519 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2520 ret void 2521} 2522 2523define void @v_shuffle_v4i16_v4i16__7_7_u_0(ptr addrspace(1) inreg %ptr) { 2524; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_0: 2525; GFX900: ; %bb.0: 2526; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2527; GFX900-NEXT: ;;#ASMSTART 2528; GFX900-NEXT: ; def v[0:1] 2529; GFX900-NEXT: ;;#ASMEND 2530; GFX900-NEXT: ;;#ASMSTART 2531; GFX900-NEXT: ; def v[1:2] 2532; GFX900-NEXT: ;;#ASMEND 2533; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2534; GFX900-NEXT: v_mov_b32_e32 v3, 0 2535; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 2536; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 2537; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 2538; GFX900-NEXT: s_waitcnt vmcnt(0) 2539; GFX900-NEXT: s_setpc_b64 s[30:31] 2540; 2541; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_0: 2542; GFX90A: ; %bb.0: 2543; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2544; GFX90A-NEXT: ;;#ASMSTART 2545; GFX90A-NEXT: ; def v[2:3] 2546; GFX90A-NEXT: ;;#ASMEND 2547; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2548; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2549; GFX90A-NEXT: ;;#ASMSTART 2550; GFX90A-NEXT: ; def v[0:1] 2551; GFX90A-NEXT: ;;#ASMEND 2552; GFX90A-NEXT: v_perm_b32 v2, v3, v3, s4 2553; GFX90A-NEXT: v_lshlrev_b32_e32 v3, 16, v0 2554; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 2555; GFX90A-NEXT: s_waitcnt vmcnt(0) 2556; GFX90A-NEXT: s_setpc_b64 s[30:31] 2557; 2558; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_0: 2559; GFX940: ; %bb.0: 2560; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2561; GFX940-NEXT: ;;#ASMSTART 2562; GFX940-NEXT: ; def v[2:3] 2563; GFX940-NEXT: ;;#ASMEND 2564; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2565; GFX940-NEXT: v_mov_b32_e32 v4, 0 2566; GFX940-NEXT: ;;#ASMSTART 2567; GFX940-NEXT: ; def v[0:1] 2568; GFX940-NEXT: ;;#ASMEND 2569; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 2570; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 2571; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 2572; GFX940-NEXT: s_waitcnt vmcnt(0) 2573; GFX940-NEXT: s_setpc_b64 s[30:31] 2574 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2575 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2576 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 0> 2577 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2578 ret void 2579} 2580 2581define void @v_shuffle_v4i16_v4i16__7_7_1_0(ptr addrspace(1) inreg %ptr) { 2582; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_0: 2583; GFX900: ; %bb.0: 2584; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2585; GFX900-NEXT: ;;#ASMSTART 2586; GFX900-NEXT: ; def v[0:1] 2587; GFX900-NEXT: ;;#ASMEND 2588; GFX900-NEXT: ;;#ASMSTART 2589; GFX900-NEXT: ; def v[1:2] 2590; GFX900-NEXT: ;;#ASMEND 2591; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2592; GFX900-NEXT: v_mov_b32_e32 v3, 0 2593; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16 2594; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 2595; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2596; GFX900-NEXT: s_waitcnt vmcnt(0) 2597; GFX900-NEXT: s_setpc_b64 s[30:31] 2598; 2599; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_0: 2600; GFX90A: ; %bb.0: 2601; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2602; GFX90A-NEXT: ;;#ASMSTART 2603; GFX90A-NEXT: ; def v[0:1] 2604; GFX90A-NEXT: ;;#ASMEND 2605; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2606; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2607; GFX90A-NEXT: ;;#ASMSTART 2608; GFX90A-NEXT: ; def v[2:3] 2609; GFX90A-NEXT: ;;#ASMEND 2610; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16 2611; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2612; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2613; GFX90A-NEXT: s_waitcnt vmcnt(0) 2614; GFX90A-NEXT: s_setpc_b64 s[30:31] 2615; 2616; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_0: 2617; GFX940: ; %bb.0: 2618; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2619; GFX940-NEXT: ;;#ASMSTART 2620; GFX940-NEXT: ; def v[0:1] 2621; GFX940-NEXT: ;;#ASMEND 2622; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2623; GFX940-NEXT: v_mov_b32_e32 v4, 0 2624; GFX940-NEXT: ;;#ASMSTART 2625; GFX940-NEXT: ; def v[2:3] 2626; GFX940-NEXT: ;;#ASMEND 2627; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 2628; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 2629; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2630; GFX940-NEXT: s_waitcnt vmcnt(0) 2631; GFX940-NEXT: s_setpc_b64 s[30:31] 2632 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2633 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2634 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 0> 2635 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2636 ret void 2637} 2638 2639define void @v_shuffle_v4i16_v4i16__7_7_2_0(ptr addrspace(1) inreg %ptr) { 2640; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_0: 2641; GFX900: ; %bb.0: 2642; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2643; GFX900-NEXT: ;;#ASMSTART 2644; GFX900-NEXT: ; def v[0:1] 2645; GFX900-NEXT: ;;#ASMEND 2646; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2647; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 2648; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2649; GFX900-NEXT: v_mov_b32_e32 v4, 0 2650; GFX900-NEXT: ;;#ASMSTART 2651; GFX900-NEXT: ; def v[2:3] 2652; GFX900-NEXT: ;;#ASMEND 2653; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 2654; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2655; GFX900-NEXT: s_waitcnt vmcnt(0) 2656; GFX900-NEXT: s_setpc_b64 s[30:31] 2657; 2658; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_0: 2659; GFX90A: ; %bb.0: 2660; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2661; GFX90A-NEXT: ;;#ASMSTART 2662; GFX90A-NEXT: ; def v[0:1] 2663; GFX90A-NEXT: ;;#ASMEND 2664; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2665; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 2666; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2667; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2668; GFX90A-NEXT: ;;#ASMSTART 2669; GFX90A-NEXT: ; def v[2:3] 2670; GFX90A-NEXT: ;;#ASMEND 2671; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2672; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2673; GFX90A-NEXT: s_waitcnt vmcnt(0) 2674; GFX90A-NEXT: s_setpc_b64 s[30:31] 2675; 2676; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_0: 2677; GFX940: ; %bb.0: 2678; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2679; GFX940-NEXT: ;;#ASMSTART 2680; GFX940-NEXT: ; def v[0:1] 2681; GFX940-NEXT: ;;#ASMEND 2682; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2683; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 2684; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2685; GFX940-NEXT: v_mov_b32_e32 v4, 0 2686; GFX940-NEXT: ;;#ASMSTART 2687; GFX940-NEXT: ; def v[2:3] 2688; GFX940-NEXT: ;;#ASMEND 2689; GFX940-NEXT: s_nop 0 2690; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 2691; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2692; GFX940-NEXT: s_waitcnt vmcnt(0) 2693; GFX940-NEXT: s_setpc_b64 s[30:31] 2694 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2695 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2696 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 0> 2697 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2698 ret void 2699} 2700 2701define void @v_shuffle_v4i16_v4i16__7_7_3_0(ptr addrspace(1) inreg %ptr) { 2702; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_0: 2703; GFX900: ; %bb.0: 2704; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2705; GFX900-NEXT: ;;#ASMSTART 2706; GFX900-NEXT: ; def v[0:1] 2707; GFX900-NEXT: ;;#ASMEND 2708; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2709; GFX900-NEXT: v_mov_b32_e32 v4, 0 2710; GFX900-NEXT: ;;#ASMSTART 2711; GFX900-NEXT: ; def v[2:3] 2712; GFX900-NEXT: ;;#ASMEND 2713; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 2714; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 2715; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2716; GFX900-NEXT: s_waitcnt vmcnt(0) 2717; GFX900-NEXT: s_setpc_b64 s[30:31] 2718; 2719; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_0: 2720; GFX90A: ; %bb.0: 2721; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2722; GFX90A-NEXT: ;;#ASMSTART 2723; GFX90A-NEXT: ; def v[0:1] 2724; GFX90A-NEXT: ;;#ASMEND 2725; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2726; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2727; GFX90A-NEXT: ;;#ASMSTART 2728; GFX90A-NEXT: ; def v[2:3] 2729; GFX90A-NEXT: ;;#ASMEND 2730; GFX90A-NEXT: v_alignbit_b32 v1, v0, v1, 16 2731; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2732; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2733; GFX90A-NEXT: s_waitcnt vmcnt(0) 2734; GFX90A-NEXT: s_setpc_b64 s[30:31] 2735; 2736; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_0: 2737; GFX940: ; %bb.0: 2738; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2739; GFX940-NEXT: ;;#ASMSTART 2740; GFX940-NEXT: ; def v[0:1] 2741; GFX940-NEXT: ;;#ASMEND 2742; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2743; GFX940-NEXT: v_mov_b32_e32 v4, 0 2744; GFX940-NEXT: ;;#ASMSTART 2745; GFX940-NEXT: ; def v[2:3] 2746; GFX940-NEXT: ;;#ASMEND 2747; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 2748; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 2749; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2750; GFX940-NEXT: s_waitcnt vmcnt(0) 2751; GFX940-NEXT: s_setpc_b64 s[30:31] 2752 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2753 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2754 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 0> 2755 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2756 ret void 2757} 2758 2759define void @v_shuffle_v4i16_v4i16__7_7_4_0(ptr addrspace(1) inreg %ptr) { 2760; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_0: 2761; GFX900: ; %bb.0: 2762; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2763; GFX900-NEXT: ;;#ASMSTART 2764; GFX900-NEXT: ; def v[0:1] 2765; GFX900-NEXT: ;;#ASMEND 2766; GFX900-NEXT: ;;#ASMSTART 2767; GFX900-NEXT: ; def v[1:2] 2768; GFX900-NEXT: ;;#ASMEND 2769; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2770; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 2771; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2772; GFX900-NEXT: v_mov_b32_e32 v3, 0 2773; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 2774; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2775; GFX900-NEXT: s_waitcnt vmcnt(0) 2776; GFX900-NEXT: s_setpc_b64 s[30:31] 2777; 2778; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_0: 2779; GFX90A: ; %bb.0: 2780; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2781; GFX90A-NEXT: ;;#ASMSTART 2782; GFX90A-NEXT: ; def v[0:1] 2783; GFX90A-NEXT: ;;#ASMEND 2784; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2785; GFX90A-NEXT: ;;#ASMSTART 2786; GFX90A-NEXT: ; def v[2:3] 2787; GFX90A-NEXT: ;;#ASMEND 2788; GFX90A-NEXT: v_perm_b32 v1, v0, v2, s4 2789; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2790; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2791; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2792; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2793; GFX90A-NEXT: s_waitcnt vmcnt(0) 2794; GFX90A-NEXT: s_setpc_b64 s[30:31] 2795; 2796; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_0: 2797; GFX940: ; %bb.0: 2798; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2799; GFX940-NEXT: ;;#ASMSTART 2800; GFX940-NEXT: ; def v[0:1] 2801; GFX940-NEXT: ;;#ASMEND 2802; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2803; GFX940-NEXT: ;;#ASMSTART 2804; GFX940-NEXT: ; def v[2:3] 2805; GFX940-NEXT: ;;#ASMEND 2806; GFX940-NEXT: v_mov_b32_e32 v4, 0 2807; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 2808; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2809; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 2810; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2811; GFX940-NEXT: s_waitcnt vmcnt(0) 2812; GFX940-NEXT: s_setpc_b64 s[30:31] 2813 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2814 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2815 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 0> 2816 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2817 ret void 2818} 2819 2820define void @v_shuffle_v4i16_v4i16__7_7_5_0(ptr addrspace(1) inreg %ptr) { 2821; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_0: 2822; GFX900: ; %bb.0: 2823; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2824; GFX900-NEXT: ;;#ASMSTART 2825; GFX900-NEXT: ; def v[0:1] 2826; GFX900-NEXT: ;;#ASMEND 2827; GFX900-NEXT: ;;#ASMSTART 2828; GFX900-NEXT: ; def v[1:2] 2829; GFX900-NEXT: ;;#ASMEND 2830; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2831; GFX900-NEXT: v_mov_b32_e32 v3, 0 2832; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 2833; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 2834; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2835; GFX900-NEXT: s_waitcnt vmcnt(0) 2836; GFX900-NEXT: s_setpc_b64 s[30:31] 2837; 2838; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_0: 2839; GFX90A: ; %bb.0: 2840; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2841; GFX90A-NEXT: ;;#ASMSTART 2842; GFX90A-NEXT: ; def v[0:1] 2843; GFX90A-NEXT: ;;#ASMEND 2844; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2845; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2846; GFX90A-NEXT: ;;#ASMSTART 2847; GFX90A-NEXT: ; def v[2:3] 2848; GFX90A-NEXT: ;;#ASMEND 2849; GFX90A-NEXT: v_alignbit_b32 v1, v0, v2, 16 2850; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2851; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2852; GFX90A-NEXT: s_waitcnt vmcnt(0) 2853; GFX90A-NEXT: s_setpc_b64 s[30:31] 2854; 2855; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_0: 2856; GFX940: ; %bb.0: 2857; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2858; GFX940-NEXT: ;;#ASMSTART 2859; GFX940-NEXT: ; def v[0:1] 2860; GFX940-NEXT: ;;#ASMEND 2861; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2862; GFX940-NEXT: v_mov_b32_e32 v4, 0 2863; GFX940-NEXT: ;;#ASMSTART 2864; GFX940-NEXT: ; def v[2:3] 2865; GFX940-NEXT: ;;#ASMEND 2866; GFX940-NEXT: s_nop 0 2867; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 2868; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 2869; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2870; GFX940-NEXT: s_waitcnt vmcnt(0) 2871; GFX940-NEXT: s_setpc_b64 s[30:31] 2872 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2873 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2874 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 0> 2875 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2876 ret void 2877} 2878 2879define void @v_shuffle_v4i16_v4i16__7_7_6_0(ptr addrspace(1) inreg %ptr) { 2880; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_0: 2881; GFX900: ; %bb.0: 2882; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2883; GFX900-NEXT: ;;#ASMSTART 2884; GFX900-NEXT: ; def v[0:1] 2885; GFX900-NEXT: ;;#ASMEND 2886; GFX900-NEXT: ;;#ASMSTART 2887; GFX900-NEXT: ; def v[1:2] 2888; GFX900-NEXT: ;;#ASMEND 2889; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2890; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 2891; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2892; GFX900-NEXT: v_mov_b32_e32 v3, 0 2893; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 2894; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 2895; GFX900-NEXT: s_waitcnt vmcnt(0) 2896; GFX900-NEXT: s_setpc_b64 s[30:31] 2897; 2898; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_0: 2899; GFX90A: ; %bb.0: 2900; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2901; GFX90A-NEXT: ;;#ASMSTART 2902; GFX90A-NEXT: ; def v[0:1] 2903; GFX90A-NEXT: ;;#ASMEND 2904; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2905; GFX90A-NEXT: ;;#ASMSTART 2906; GFX90A-NEXT: ; def v[2:3] 2907; GFX90A-NEXT: ;;#ASMEND 2908; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4 2909; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2910; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2911; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 2912; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 2913; GFX90A-NEXT: s_waitcnt vmcnt(0) 2914; GFX90A-NEXT: s_setpc_b64 s[30:31] 2915; 2916; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_0: 2917; GFX940: ; %bb.0: 2918; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2919; GFX940-NEXT: ;;#ASMSTART 2920; GFX940-NEXT: ; def v[0:1] 2921; GFX940-NEXT: ;;#ASMEND 2922; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2923; GFX940-NEXT: ;;#ASMSTART 2924; GFX940-NEXT: ; def v[2:3] 2925; GFX940-NEXT: ;;#ASMEND 2926; GFX940-NEXT: v_mov_b32_e32 v4, 0 2927; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 2928; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2929; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 2930; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 2931; GFX940-NEXT: s_waitcnt vmcnt(0) 2932; GFX940-NEXT: s_setpc_b64 s[30:31] 2933 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2934 %vec1 = call <4 x i16> asm "; def $0", "=v"() 2935 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0> 2936 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2937 ret void 2938} 2939 2940define void @v_shuffle_v4i16_v4i16__u_1_1_1(ptr addrspace(1) inreg %ptr) { 2941; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_1_1_1: 2942; GFX900: ; %bb.0: 2943; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2944; GFX900-NEXT: ;;#ASMSTART 2945; GFX900-NEXT: ; def v[0:1] 2946; GFX900-NEXT: ;;#ASMEND 2947; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2948; GFX900-NEXT: v_mov_b32_e32 v2, 0 2949; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2950; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 2951; GFX900-NEXT: s_waitcnt vmcnt(0) 2952; GFX900-NEXT: s_setpc_b64 s[30:31] 2953; 2954; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_1_1_1: 2955; GFX90A: ; %bb.0: 2956; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2957; GFX90A-NEXT: ;;#ASMSTART 2958; GFX90A-NEXT: ; def v[0:1] 2959; GFX90A-NEXT: ;;#ASMEND 2960; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2961; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2962; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2963; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 2964; GFX90A-NEXT: s_waitcnt vmcnt(0) 2965; GFX90A-NEXT: s_setpc_b64 s[30:31] 2966; 2967; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_1_1_1: 2968; GFX940: ; %bb.0: 2969; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2970; GFX940-NEXT: ;;#ASMSTART 2971; GFX940-NEXT: ; def v[0:1] 2972; GFX940-NEXT: ;;#ASMEND 2973; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2974; GFX940-NEXT: v_mov_b32_e32 v2, 0 2975; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2976; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 2977; GFX940-NEXT: s_waitcnt vmcnt(0) 2978; GFX940-NEXT: s_setpc_b64 s[30:31] 2979 %vec0 = call <4 x i16> asm "; def $0", "=v"() 2980 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1> 2981 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 2982 ret void 2983} 2984 2985define void @v_shuffle_v4i16_v4i16__0_1_1_1(ptr addrspace(1) inreg %ptr) { 2986; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_1_1_1: 2987; GFX900: ; %bb.0: 2988; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2989; GFX900-NEXT: ;;#ASMSTART 2990; GFX900-NEXT: ; def v[0:1] 2991; GFX900-NEXT: ;;#ASMEND 2992; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2993; GFX900-NEXT: v_mov_b32_e32 v2, 0 2994; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2995; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 2996; GFX900-NEXT: s_waitcnt vmcnt(0) 2997; GFX900-NEXT: s_setpc_b64 s[30:31] 2998; 2999; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_1_1_1: 3000; GFX90A: ; %bb.0: 3001; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3002; GFX90A-NEXT: ;;#ASMSTART 3003; GFX90A-NEXT: ; def v[0:1] 3004; GFX90A-NEXT: ;;#ASMEND 3005; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3006; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3007; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 3008; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 3009; GFX90A-NEXT: s_waitcnt vmcnt(0) 3010; GFX90A-NEXT: s_setpc_b64 s[30:31] 3011; 3012; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_1_1_1: 3013; GFX940: ; %bb.0: 3014; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3015; GFX940-NEXT: ;;#ASMSTART 3016; GFX940-NEXT: ; def v[0:1] 3017; GFX940-NEXT: ;;#ASMEND 3018; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3019; GFX940-NEXT: v_mov_b32_e32 v2, 0 3020; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 3021; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 3022; GFX940-NEXT: s_waitcnt vmcnt(0) 3023; GFX940-NEXT: s_setpc_b64 s[30:31] 3024 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3025 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> 3026 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3027 ret void 3028} 3029 3030define void @v_shuffle_v4i16_v4i16__1_1_1_1(ptr addrspace(1) inreg %ptr) { 3031; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_1_1_1: 3032; GFX900: ; %bb.0: 3033; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3034; GFX900-NEXT: ;;#ASMSTART 3035; GFX900-NEXT: ; def v[0:1] 3036; GFX900-NEXT: ;;#ASMEND 3037; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3038; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 3039; GFX900-NEXT: v_mov_b32_e32 v2, 0 3040; GFX900-NEXT: v_mov_b32_e32 v1, v0 3041; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 3042; GFX900-NEXT: s_waitcnt vmcnt(0) 3043; GFX900-NEXT: s_setpc_b64 s[30:31] 3044; 3045; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_1_1_1: 3046; GFX90A: ; %bb.0: 3047; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3048; GFX90A-NEXT: ;;#ASMSTART 3049; GFX90A-NEXT: ; def v[0:1] 3050; GFX90A-NEXT: ;;#ASMEND 3051; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3052; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 3053; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3054; GFX90A-NEXT: v_mov_b32_e32 v1, v0 3055; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 3056; GFX90A-NEXT: s_waitcnt vmcnt(0) 3057; GFX90A-NEXT: s_setpc_b64 s[30:31] 3058; 3059; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_1_1_1: 3060; GFX940: ; %bb.0: 3061; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3062; GFX940-NEXT: ;;#ASMSTART 3063; GFX940-NEXT: ; def v[0:1] 3064; GFX940-NEXT: ;;#ASMEND 3065; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3066; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 3067; GFX940-NEXT: v_mov_b32_e32 v2, 0 3068; GFX940-NEXT: v_mov_b32_e32 v1, v0 3069; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 3070; GFX940-NEXT: s_waitcnt vmcnt(0) 3071; GFX940-NEXT: s_setpc_b64 s[30:31] 3072 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3073 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3074 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3075 ret void 3076} 3077 3078define void @v_shuffle_v4i16_v4i16__2_1_1_1(ptr addrspace(1) inreg %ptr) { 3079; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_1_1_1: 3080; GFX900: ; %bb.0: 3081; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3082; GFX900-NEXT: ;;#ASMSTART 3083; GFX900-NEXT: ; def v[0:1] 3084; GFX900-NEXT: ;;#ASMEND 3085; GFX900-NEXT: s_mov_b32 s4, 0xffff 3086; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 3087; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3088; GFX900-NEXT: v_mov_b32_e32 v3, 0 3089; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 3090; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3091; GFX900-NEXT: s_waitcnt vmcnt(0) 3092; GFX900-NEXT: s_setpc_b64 s[30:31] 3093; 3094; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_1_1_1: 3095; GFX90A: ; %bb.0: 3096; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3097; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3098; GFX90A-NEXT: ;;#ASMSTART 3099; GFX90A-NEXT: ; def v[0:1] 3100; GFX90A-NEXT: ;;#ASMEND 3101; GFX90A-NEXT: v_bfi_b32 v2, s4, v1, v0 3102; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3103; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3104; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 3105; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3106; GFX90A-NEXT: s_waitcnt vmcnt(0) 3107; GFX90A-NEXT: s_setpc_b64 s[30:31] 3108; 3109; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_1_1_1: 3110; GFX940: ; %bb.0: 3111; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3112; GFX940-NEXT: s_mov_b32 s2, 0xffff 3113; GFX940-NEXT: ;;#ASMSTART 3114; GFX940-NEXT: ; def v[0:1] 3115; GFX940-NEXT: ;;#ASMEND 3116; GFX940-NEXT: v_mov_b32_e32 v4, 0 3117; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 3118; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3119; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 3120; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3121; GFX940-NEXT: s_waitcnt vmcnt(0) 3122; GFX940-NEXT: s_setpc_b64 s[30:31] 3123 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3124 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 3125 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3126 ret void 3127} 3128 3129define void @v_shuffle_v4i16_v4i16__3_1_1_1(ptr addrspace(1) inreg %ptr) { 3130; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_1_1_1: 3131; GFX900: ; %bb.0: 3132; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3133; GFX900-NEXT: ;;#ASMSTART 3134; GFX900-NEXT: ; def v[0:1] 3135; GFX900-NEXT: ;;#ASMEND 3136; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3137; GFX900-NEXT: v_mov_b32_e32 v3, 0 3138; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 3139; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 3140; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3141; GFX900-NEXT: s_waitcnt vmcnt(0) 3142; GFX900-NEXT: s_setpc_b64 s[30:31] 3143; 3144; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_1_1_1: 3145; GFX90A: ; %bb.0: 3146; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3147; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3148; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3149; GFX90A-NEXT: ;;#ASMSTART 3150; GFX90A-NEXT: ; def v[0:1] 3151; GFX90A-NEXT: ;;#ASMEND 3152; GFX90A-NEXT: v_perm_b32 v2, v0, v1, s4 3153; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 3154; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3155; GFX90A-NEXT: s_waitcnt vmcnt(0) 3156; GFX90A-NEXT: s_setpc_b64 s[30:31] 3157; 3158; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_1_1_1: 3159; GFX940: ; %bb.0: 3160; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3161; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3162; GFX940-NEXT: v_mov_b32_e32 v4, 0 3163; GFX940-NEXT: ;;#ASMSTART 3164; GFX940-NEXT: ; def v[0:1] 3165; GFX940-NEXT: ;;#ASMEND 3166; GFX940-NEXT: s_nop 0 3167; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 3168; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 3169; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3170; GFX940-NEXT: s_waitcnt vmcnt(0) 3171; GFX940-NEXT: s_setpc_b64 s[30:31] 3172 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3173 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 3174 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3175 ret void 3176} 3177 3178define void @v_shuffle_v4i16_v4i16__4_1_1_1(ptr addrspace(1) inreg %ptr) { 3179; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_1_1_1: 3180; GFX900: ; %bb.0: 3181; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3182; GFX900-NEXT: ;;#ASMSTART 3183; GFX900-NEXT: ; def v[0:1] 3184; GFX900-NEXT: ;;#ASMEND 3185; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3186; GFX900-NEXT: v_mov_b32_e32 v2, 0 3187; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 3188; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 3189; GFX900-NEXT: s_waitcnt vmcnt(0) 3190; GFX900-NEXT: s_setpc_b64 s[30:31] 3191; 3192; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_1_1_1: 3193; GFX90A: ; %bb.0: 3194; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3195; GFX90A-NEXT: ;;#ASMSTART 3196; GFX90A-NEXT: ; def v[0:1] 3197; GFX90A-NEXT: ;;#ASMEND 3198; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3199; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3200; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 3201; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 3202; GFX90A-NEXT: s_waitcnt vmcnt(0) 3203; GFX90A-NEXT: s_setpc_b64 s[30:31] 3204; 3205; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_1_1_1: 3206; GFX940: ; %bb.0: 3207; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3208; GFX940-NEXT: ;;#ASMSTART 3209; GFX940-NEXT: ; def v[0:1] 3210; GFX940-NEXT: ;;#ASMEND 3211; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3212; GFX940-NEXT: v_mov_b32_e32 v2, 0 3213; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 3214; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 3215; GFX940-NEXT: s_waitcnt vmcnt(0) 3216; GFX940-NEXT: s_setpc_b64 s[30:31] 3217 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3218 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1> 3219 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3220 ret void 3221} 3222 3223define void @v_shuffle_v4i16_v4i16__5_1_1_1(ptr addrspace(1) inreg %ptr) { 3224; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_1_1_1: 3225; GFX900: ; %bb.0: 3226; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3227; GFX900-NEXT: ;;#ASMSTART 3228; GFX900-NEXT: ; def v[0:1] 3229; GFX900-NEXT: ;;#ASMEND 3230; GFX900-NEXT: ;;#ASMSTART 3231; GFX900-NEXT: ; def v[1:2] 3232; GFX900-NEXT: ;;#ASMEND 3233; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3234; GFX900-NEXT: v_mov_b32_e32 v3, 0 3235; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 3236; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 3237; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3238; GFX900-NEXT: s_waitcnt vmcnt(0) 3239; GFX900-NEXT: s_setpc_b64 s[30:31] 3240; 3241; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_1_1_1: 3242; GFX90A: ; %bb.0: 3243; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3244; GFX90A-NEXT: ;;#ASMSTART 3245; GFX90A-NEXT: ; def v[2:3] 3246; GFX90A-NEXT: ;;#ASMEND 3247; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3248; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3249; GFX90A-NEXT: ;;#ASMSTART 3250; GFX90A-NEXT: ; def v[0:1] 3251; GFX90A-NEXT: ;;#ASMEND 3252; GFX90A-NEXT: v_perm_b32 v2, v0, v2, s4 3253; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 3254; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3255; GFX90A-NEXT: s_waitcnt vmcnt(0) 3256; GFX90A-NEXT: s_setpc_b64 s[30:31] 3257; 3258; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_1_1_1: 3259; GFX940: ; %bb.0: 3260; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3261; GFX940-NEXT: ;;#ASMSTART 3262; GFX940-NEXT: ; def v[2:3] 3263; GFX940-NEXT: ;;#ASMEND 3264; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3265; GFX940-NEXT: v_mov_b32_e32 v4, 0 3266; GFX940-NEXT: ;;#ASMSTART 3267; GFX940-NEXT: ; def v[0:1] 3268; GFX940-NEXT: ;;#ASMEND 3269; GFX940-NEXT: s_nop 0 3270; GFX940-NEXT: v_perm_b32 v2, v0, v2, s2 3271; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 3272; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3273; GFX940-NEXT: s_waitcnt vmcnt(0) 3274; GFX940-NEXT: s_setpc_b64 s[30:31] 3275 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3276 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3277 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1> 3278 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3279 ret void 3280} 3281 3282define void @v_shuffle_v4i16_v4i16__6_1_1_1(ptr addrspace(1) inreg %ptr) { 3283; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_1_1_1: 3284; GFX900: ; %bb.0: 3285; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3286; GFX900-NEXT: ;;#ASMSTART 3287; GFX900-NEXT: ; def v[0:1] 3288; GFX900-NEXT: ;;#ASMEND 3289; GFX900-NEXT: ;;#ASMSTART 3290; GFX900-NEXT: ; def v[1:2] 3291; GFX900-NEXT: ;;#ASMEND 3292; GFX900-NEXT: s_mov_b32 s4, 0xffff 3293; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0 3294; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3295; GFX900-NEXT: v_mov_b32_e32 v3, 0 3296; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 3297; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3298; GFX900-NEXT: s_waitcnt vmcnt(0) 3299; GFX900-NEXT: s_setpc_b64 s[30:31] 3300; 3301; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_1_1_1: 3302; GFX90A: ; %bb.0: 3303; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3304; GFX90A-NEXT: ;;#ASMSTART 3305; GFX90A-NEXT: ; def v[2:3] 3306; GFX90A-NEXT: ;;#ASMEND 3307; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3308; GFX90A-NEXT: ;;#ASMSTART 3309; GFX90A-NEXT: ; def v[0:1] 3310; GFX90A-NEXT: ;;#ASMEND 3311; GFX90A-NEXT: v_bfi_b32 v2, s4, v3, v0 3312; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3313; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3314; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 3315; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3316; GFX90A-NEXT: s_waitcnt vmcnt(0) 3317; GFX90A-NEXT: s_setpc_b64 s[30:31] 3318; 3319; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_1_1_1: 3320; GFX940: ; %bb.0: 3321; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3322; GFX940-NEXT: ;;#ASMSTART 3323; GFX940-NEXT: ; def v[2:3] 3324; GFX940-NEXT: ;;#ASMEND 3325; GFX940-NEXT: s_mov_b32 s2, 0xffff 3326; GFX940-NEXT: ;;#ASMSTART 3327; GFX940-NEXT: ; def v[0:1] 3328; GFX940-NEXT: ;;#ASMEND 3329; GFX940-NEXT: v_mov_b32_e32 v4, 0 3330; GFX940-NEXT: v_bfi_b32 v2, s2, v3, v0 3331; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3332; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 3333; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3334; GFX940-NEXT: s_waitcnt vmcnt(0) 3335; GFX940-NEXT: s_setpc_b64 s[30:31] 3336 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3337 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3338 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1> 3339 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3340 ret void 3341} 3342 3343define void @v_shuffle_v4i16_v4i16__7_1_1_1(ptr addrspace(1) inreg %ptr) { 3344; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_1_1: 3345; GFX900: ; %bb.0: 3346; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3347; GFX900-NEXT: ;;#ASMSTART 3348; GFX900-NEXT: ; def v[0:1] 3349; GFX900-NEXT: ;;#ASMEND 3350; GFX900-NEXT: ;;#ASMSTART 3351; GFX900-NEXT: ; def v[1:2] 3352; GFX900-NEXT: ;;#ASMEND 3353; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3354; GFX900-NEXT: v_mov_b32_e32 v3, 0 3355; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 3356; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 3357; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3358; GFX900-NEXT: s_waitcnt vmcnt(0) 3359; GFX900-NEXT: s_setpc_b64 s[30:31] 3360; 3361; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_1_1: 3362; GFX90A: ; %bb.0: 3363; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3364; GFX90A-NEXT: ;;#ASMSTART 3365; GFX90A-NEXT: ; def v[2:3] 3366; GFX90A-NEXT: ;;#ASMEND 3367; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3368; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3369; GFX90A-NEXT: ;;#ASMSTART 3370; GFX90A-NEXT: ; def v[0:1] 3371; GFX90A-NEXT: ;;#ASMEND 3372; GFX90A-NEXT: v_perm_b32 v2, v0, v3, s4 3373; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 3374; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3375; GFX90A-NEXT: s_waitcnt vmcnt(0) 3376; GFX90A-NEXT: s_setpc_b64 s[30:31] 3377; 3378; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_1_1: 3379; GFX940: ; %bb.0: 3380; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3381; GFX940-NEXT: ;;#ASMSTART 3382; GFX940-NEXT: ; def v[2:3] 3383; GFX940-NEXT: ;;#ASMEND 3384; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3385; GFX940-NEXT: v_mov_b32_e32 v4, 0 3386; GFX940-NEXT: ;;#ASMSTART 3387; GFX940-NEXT: ; def v[0:1] 3388; GFX940-NEXT: ;;#ASMEND 3389; GFX940-NEXT: s_nop 0 3390; GFX940-NEXT: v_perm_b32 v2, v0, v3, s2 3391; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 3392; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3393; GFX940-NEXT: s_waitcnt vmcnt(0) 3394; GFX940-NEXT: s_setpc_b64 s[30:31] 3395 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3396 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3397 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1> 3398 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3399 ret void 3400} 3401 3402define void @v_shuffle_v4i16_v4i16__7_u_1_1(ptr addrspace(1) inreg %ptr) { 3403; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_1_1: 3404; GFX900: ; %bb.0: 3405; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3406; GFX900-NEXT: ;;#ASMSTART 3407; GFX900-NEXT: ; def v[0:1] 3408; GFX900-NEXT: ;;#ASMEND 3409; GFX900-NEXT: ;;#ASMSTART 3410; GFX900-NEXT: ; def v[1:2] 3411; GFX900-NEXT: ;;#ASMEND 3412; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3413; GFX900-NEXT: v_mov_b32_e32 v3, 0 3414; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 3415; GFX900-NEXT: v_alignbit_b32 v0, s4, v2, 16 3416; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 3417; GFX900-NEXT: s_waitcnt vmcnt(0) 3418; GFX900-NEXT: s_setpc_b64 s[30:31] 3419; 3420; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_1_1: 3421; GFX90A: ; %bb.0: 3422; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3423; GFX90A-NEXT: ;;#ASMSTART 3424; GFX90A-NEXT: ; def v[0:1] 3425; GFX90A-NEXT: ;;#ASMEND 3426; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3427; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3428; GFX90A-NEXT: ;;#ASMSTART 3429; GFX90A-NEXT: ; def v[2:3] 3430; GFX90A-NEXT: ;;#ASMEND 3431; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 3432; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16 3433; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3434; GFX90A-NEXT: s_waitcnt vmcnt(0) 3435; GFX90A-NEXT: s_setpc_b64 s[30:31] 3436; 3437; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_1_1: 3438; GFX940: ; %bb.0: 3439; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3440; GFX940-NEXT: ;;#ASMSTART 3441; GFX940-NEXT: ; def v[0:1] 3442; GFX940-NEXT: ;;#ASMEND 3443; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3444; GFX940-NEXT: v_mov_b32_e32 v4, 0 3445; GFX940-NEXT: ;;#ASMSTART 3446; GFX940-NEXT: ; def v[2:3] 3447; GFX940-NEXT: ;;#ASMEND 3448; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 3449; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 3450; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3451; GFX940-NEXT: s_waitcnt vmcnt(0) 3452; GFX940-NEXT: s_setpc_b64 s[30:31] 3453 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3454 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3455 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1> 3456 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3457 ret void 3458} 3459 3460define void @v_shuffle_v4i16_v4i16__7_0_1_1(ptr addrspace(1) inreg %ptr) { 3461; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_1_1: 3462; GFX900: ; %bb.0: 3463; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3464; GFX900-NEXT: ;;#ASMSTART 3465; GFX900-NEXT: ; def v[0:1] 3466; GFX900-NEXT: ;;#ASMEND 3467; GFX900-NEXT: ;;#ASMSTART 3468; GFX900-NEXT: ; def v[1:2] 3469; GFX900-NEXT: ;;#ASMEND 3470; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3471; GFX900-NEXT: v_mov_b32_e32 v3, 0 3472; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 3473; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 3474; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 3475; GFX900-NEXT: s_waitcnt vmcnt(0) 3476; GFX900-NEXT: s_setpc_b64 s[30:31] 3477; 3478; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_1_1: 3479; GFX90A: ; %bb.0: 3480; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3481; GFX90A-NEXT: ;;#ASMSTART 3482; GFX90A-NEXT: ; def v[0:1] 3483; GFX90A-NEXT: ;;#ASMEND 3484; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3485; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3486; GFX90A-NEXT: ;;#ASMSTART 3487; GFX90A-NEXT: ; def v[2:3] 3488; GFX90A-NEXT: ;;#ASMEND 3489; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 3490; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 3491; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3492; GFX90A-NEXT: s_waitcnt vmcnt(0) 3493; GFX90A-NEXT: s_setpc_b64 s[30:31] 3494; 3495; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_1_1: 3496; GFX940: ; %bb.0: 3497; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3498; GFX940-NEXT: ;;#ASMSTART 3499; GFX940-NEXT: ; def v[0:1] 3500; GFX940-NEXT: ;;#ASMEND 3501; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3502; GFX940-NEXT: v_mov_b32_e32 v4, 0 3503; GFX940-NEXT: ;;#ASMSTART 3504; GFX940-NEXT: ; def v[2:3] 3505; GFX940-NEXT: ;;#ASMEND 3506; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 3507; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 3508; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3509; GFX940-NEXT: s_waitcnt vmcnt(0) 3510; GFX940-NEXT: s_setpc_b64 s[30:31] 3511 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3512 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3513 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1> 3514 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3515 ret void 3516} 3517 3518define void @v_shuffle_v4i16_v4i16__7_2_1_1(ptr addrspace(1) inreg %ptr) { 3519; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_1_1: 3520; GFX900: ; %bb.0: 3521; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3522; GFX900-NEXT: ;;#ASMSTART 3523; GFX900-NEXT: ; def v[0:1] 3524; GFX900-NEXT: ;;#ASMEND 3525; GFX900-NEXT: ;;#ASMSTART 3526; GFX900-NEXT: ; def v[2:3] 3527; GFX900-NEXT: ;;#ASMEND 3528; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3529; GFX900-NEXT: v_mov_b32_e32 v4, 0 3530; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 3531; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 3532; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 3533; GFX900-NEXT: s_waitcnt vmcnt(0) 3534; GFX900-NEXT: s_setpc_b64 s[30:31] 3535; 3536; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_1_1: 3537; GFX90A: ; %bb.0: 3538; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3539; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3540; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3541; GFX90A-NEXT: ;;#ASMSTART 3542; GFX90A-NEXT: ; def v[0:1] 3543; GFX90A-NEXT: ;;#ASMEND 3544; GFX90A-NEXT: ;;#ASMSTART 3545; GFX90A-NEXT: ; def v[2:3] 3546; GFX90A-NEXT: ;;#ASMEND 3547; GFX90A-NEXT: v_perm_b32 v5, v0, v0, s4 3548; GFX90A-NEXT: v_alignbit_b32 v4, v1, v3, 16 3549; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] 3550; GFX90A-NEXT: s_waitcnt vmcnt(0) 3551; GFX90A-NEXT: s_setpc_b64 s[30:31] 3552; 3553; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_1_1: 3554; GFX940: ; %bb.0: 3555; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3556; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3557; GFX940-NEXT: v_mov_b32_e32 v6, 0 3558; GFX940-NEXT: ;;#ASMSTART 3559; GFX940-NEXT: ; def v[0:1] 3560; GFX940-NEXT: ;;#ASMEND 3561; GFX940-NEXT: ;;#ASMSTART 3562; GFX940-NEXT: ; def v[2:3] 3563; GFX940-NEXT: ;;#ASMEND 3564; GFX940-NEXT: s_nop 0 3565; GFX940-NEXT: v_perm_b32 v5, v0, v0, s2 3566; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 3567; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 3568; GFX940-NEXT: s_waitcnt vmcnt(0) 3569; GFX940-NEXT: s_setpc_b64 s[30:31] 3570 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3571 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3572 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1> 3573 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3574 ret void 3575} 3576 3577define void @v_shuffle_v4i16_v4i16__7_3_1_1(ptr addrspace(1) inreg %ptr) { 3578; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_1_1: 3579; GFX900: ; %bb.0: 3580; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3581; GFX900-NEXT: ;;#ASMSTART 3582; GFX900-NEXT: ; def v[0:1] 3583; GFX900-NEXT: ;;#ASMEND 3584; GFX900-NEXT: ;;#ASMSTART 3585; GFX900-NEXT: ; def v[2:3] 3586; GFX900-NEXT: ;;#ASMEND 3587; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3588; GFX900-NEXT: v_mov_b32_e32 v4, 0 3589; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 3590; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 3591; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 3592; GFX900-NEXT: s_waitcnt vmcnt(0) 3593; GFX900-NEXT: s_setpc_b64 s[30:31] 3594; 3595; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_1_1: 3596; GFX90A: ; %bb.0: 3597; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3598; GFX90A-NEXT: ;;#ASMSTART 3599; GFX90A-NEXT: ; def v[2:3] 3600; GFX90A-NEXT: ;;#ASMEND 3601; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3602; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3603; GFX90A-NEXT: ;;#ASMSTART 3604; GFX90A-NEXT: ; def v[0:1] 3605; GFX90A-NEXT: ;;#ASMEND 3606; GFX90A-NEXT: v_perm_b32 v2, v1, v3, s4 3607; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 3608; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3609; GFX90A-NEXT: s_waitcnt vmcnt(0) 3610; GFX90A-NEXT: s_setpc_b64 s[30:31] 3611; 3612; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_1_1: 3613; GFX940: ; %bb.0: 3614; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3615; GFX940-NEXT: ;;#ASMSTART 3616; GFX940-NEXT: ; def v[2:3] 3617; GFX940-NEXT: ;;#ASMEND 3618; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3619; GFX940-NEXT: v_mov_b32_e32 v4, 0 3620; GFX940-NEXT: ;;#ASMSTART 3621; GFX940-NEXT: ; def v[0:1] 3622; GFX940-NEXT: ;;#ASMEND 3623; GFX940-NEXT: s_nop 0 3624; GFX940-NEXT: v_perm_b32 v2, v1, v3, s2 3625; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 3626; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3627; GFX940-NEXT: s_waitcnt vmcnt(0) 3628; GFX940-NEXT: s_setpc_b64 s[30:31] 3629 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3630 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3631 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1> 3632 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3633 ret void 3634} 3635 3636define void @v_shuffle_v4i16_v4i16__7_4_1_1(ptr addrspace(1) inreg %ptr) { 3637; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_1_1: 3638; GFX900: ; %bb.0: 3639; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3640; GFX900-NEXT: ;;#ASMSTART 3641; GFX900-NEXT: ; def v[0:1] 3642; GFX900-NEXT: ;;#ASMEND 3643; GFX900-NEXT: ;;#ASMSTART 3644; GFX900-NEXT: ; def v[1:2] 3645; GFX900-NEXT: ;;#ASMEND 3646; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3647; GFX900-NEXT: v_mov_b32_e32 v4, 0 3648; GFX900-NEXT: v_perm_b32 v3, v0, v0, s4 3649; GFX900-NEXT: v_alignbit_b32 v2, v1, v2, 16 3650; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3651; GFX900-NEXT: s_waitcnt vmcnt(0) 3652; GFX900-NEXT: s_setpc_b64 s[30:31] 3653; 3654; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_1_1: 3655; GFX90A: ; %bb.0: 3656; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3657; GFX90A-NEXT: ;;#ASMSTART 3658; GFX90A-NEXT: ; def v[0:1] 3659; GFX90A-NEXT: ;;#ASMEND 3660; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3661; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3662; GFX90A-NEXT: ;;#ASMSTART 3663; GFX90A-NEXT: ; def v[2:3] 3664; GFX90A-NEXT: ;;#ASMEND 3665; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 3666; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16 3667; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3668; GFX90A-NEXT: s_waitcnt vmcnt(0) 3669; GFX90A-NEXT: s_setpc_b64 s[30:31] 3670; 3671; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_1_1: 3672; GFX940: ; %bb.0: 3673; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3674; GFX940-NEXT: ;;#ASMSTART 3675; GFX940-NEXT: ; def v[0:1] 3676; GFX940-NEXT: ;;#ASMEND 3677; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3678; GFX940-NEXT: v_mov_b32_e32 v4, 0 3679; GFX940-NEXT: ;;#ASMSTART 3680; GFX940-NEXT: ; def v[2:3] 3681; GFX940-NEXT: ;;#ASMEND 3682; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 3683; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 3684; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3685; GFX940-NEXT: s_waitcnt vmcnt(0) 3686; GFX940-NEXT: s_setpc_b64 s[30:31] 3687 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3688 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3689 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1> 3690 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3691 ret void 3692} 3693 3694define void @v_shuffle_v4i16_v4i16__7_5_1_1(ptr addrspace(1) inreg %ptr) { 3695; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_1_1: 3696; GFX900: ; %bb.0: 3697; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3698; GFX900-NEXT: ;;#ASMSTART 3699; GFX900-NEXT: ; def v[0:1] 3700; GFX900-NEXT: ;;#ASMEND 3701; GFX900-NEXT: ;;#ASMSTART 3702; GFX900-NEXT: ; def v[1:2] 3703; GFX900-NEXT: ;;#ASMEND 3704; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3705; GFX900-NEXT: v_mov_b32_e32 v3, 0 3706; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 3707; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 3708; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3709; GFX900-NEXT: s_waitcnt vmcnt(0) 3710; GFX900-NEXT: s_setpc_b64 s[30:31] 3711; 3712; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_1_1: 3713; GFX90A: ; %bb.0: 3714; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3715; GFX90A-NEXT: ;;#ASMSTART 3716; GFX90A-NEXT: ; def v[2:3] 3717; GFX90A-NEXT: ;;#ASMEND 3718; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3719; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3720; GFX90A-NEXT: ;;#ASMSTART 3721; GFX90A-NEXT: ; def v[0:1] 3722; GFX90A-NEXT: ;;#ASMEND 3723; GFX90A-NEXT: v_perm_b32 v2, v2, v3, s4 3724; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 3725; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3726; GFX90A-NEXT: s_waitcnt vmcnt(0) 3727; GFX90A-NEXT: s_setpc_b64 s[30:31] 3728; 3729; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_1_1: 3730; GFX940: ; %bb.0: 3731; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3732; GFX940-NEXT: ;;#ASMSTART 3733; GFX940-NEXT: ; def v[2:3] 3734; GFX940-NEXT: ;;#ASMEND 3735; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3736; GFX940-NEXT: v_mov_b32_e32 v4, 0 3737; GFX940-NEXT: ;;#ASMSTART 3738; GFX940-NEXT: ; def v[0:1] 3739; GFX940-NEXT: ;;#ASMEND 3740; GFX940-NEXT: v_perm_b32 v2, v2, v3, s2 3741; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 3742; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3743; GFX940-NEXT: s_waitcnt vmcnt(0) 3744; GFX940-NEXT: s_setpc_b64 s[30:31] 3745 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3746 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3747 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1> 3748 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3749 ret void 3750} 3751 3752define void @v_shuffle_v4i16_v4i16__7_6_1_1(ptr addrspace(1) inreg %ptr) { 3753; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_1_1: 3754; GFX900: ; %bb.0: 3755; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3756; GFX900-NEXT: ;;#ASMSTART 3757; GFX900-NEXT: ; def v[0:1] 3758; GFX900-NEXT: ;;#ASMEND 3759; GFX900-NEXT: ;;#ASMSTART 3760; GFX900-NEXT: ; def v[1:2] 3761; GFX900-NEXT: ;;#ASMEND 3762; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3763; GFX900-NEXT: v_mov_b32_e32 v3, 0 3764; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 3765; GFX900-NEXT: v_alignbit_b32 v0, v2, v2, 16 3766; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 3767; GFX900-NEXT: s_waitcnt vmcnt(0) 3768; GFX900-NEXT: s_setpc_b64 s[30:31] 3769; 3770; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_1_1: 3771; GFX90A: ; %bb.0: 3772; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3773; GFX90A-NEXT: ;;#ASMSTART 3774; GFX90A-NEXT: ; def v[0:1] 3775; GFX90A-NEXT: ;;#ASMEND 3776; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3777; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3778; GFX90A-NEXT: ;;#ASMSTART 3779; GFX90A-NEXT: ; def v[2:3] 3780; GFX90A-NEXT: ;;#ASMEND 3781; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 3782; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16 3783; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3784; GFX90A-NEXT: s_waitcnt vmcnt(0) 3785; GFX90A-NEXT: s_setpc_b64 s[30:31] 3786; 3787; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_1_1: 3788; GFX940: ; %bb.0: 3789; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3790; GFX940-NEXT: ;;#ASMSTART 3791; GFX940-NEXT: ; def v[0:1] 3792; GFX940-NEXT: ;;#ASMEND 3793; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3794; GFX940-NEXT: v_mov_b32_e32 v4, 0 3795; GFX940-NEXT: ;;#ASMSTART 3796; GFX940-NEXT: ; def v[2:3] 3797; GFX940-NEXT: ;;#ASMEND 3798; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 3799; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 3800; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3801; GFX940-NEXT: s_waitcnt vmcnt(0) 3802; GFX940-NEXT: s_setpc_b64 s[30:31] 3803 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3804 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3805 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1> 3806 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3807 ret void 3808} 3809 3810define void @v_shuffle_v4i16_v4i16__7_7_1_1(ptr addrspace(1) inreg %ptr) { 3811; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_1: 3812; GFX900: ; %bb.0: 3813; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3814; GFX900-NEXT: ;;#ASMSTART 3815; GFX900-NEXT: ; def v[0:1] 3816; GFX900-NEXT: ;;#ASMEND 3817; GFX900-NEXT: ;;#ASMSTART 3818; GFX900-NEXT: ; def v[1:2] 3819; GFX900-NEXT: ;;#ASMEND 3820; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3821; GFX900-NEXT: v_mov_b32_e32 v3, 0 3822; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 3823; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 3824; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 3825; GFX900-NEXT: s_waitcnt vmcnt(0) 3826; GFX900-NEXT: s_setpc_b64 s[30:31] 3827; 3828; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_1: 3829; GFX90A: ; %bb.0: 3830; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3831; GFX90A-NEXT: ;;#ASMSTART 3832; GFX90A-NEXT: ; def v[0:1] 3833; GFX90A-NEXT: ;;#ASMEND 3834; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3835; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3836; GFX90A-NEXT: ;;#ASMSTART 3837; GFX90A-NEXT: ; def v[2:3] 3838; GFX90A-NEXT: ;;#ASMEND 3839; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 3840; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 3841; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 3842; GFX90A-NEXT: s_waitcnt vmcnt(0) 3843; GFX90A-NEXT: s_setpc_b64 s[30:31] 3844; 3845; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_1: 3846; GFX940: ; %bb.0: 3847; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3848; GFX940-NEXT: ;;#ASMSTART 3849; GFX940-NEXT: ; def v[0:1] 3850; GFX940-NEXT: ;;#ASMEND 3851; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3852; GFX940-NEXT: v_mov_b32_e32 v4, 0 3853; GFX940-NEXT: ;;#ASMSTART 3854; GFX940-NEXT: ; def v[2:3] 3855; GFX940-NEXT: ;;#ASMEND 3856; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 3857; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 3858; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 3859; GFX940-NEXT: s_waitcnt vmcnt(0) 3860; GFX940-NEXT: s_setpc_b64 s[30:31] 3861 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3862 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3863 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1> 3864 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3865 ret void 3866} 3867 3868define void @v_shuffle_v4i16_v4i16__7_7_u_1(ptr addrspace(1) inreg %ptr) { 3869; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_1: 3870; GFX900: ; %bb.0: 3871; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3872; GFX900-NEXT: ;;#ASMSTART 3873; GFX900-NEXT: ; def v[0:1] 3874; GFX900-NEXT: ;;#ASMEND 3875; GFX900-NEXT: ;;#ASMSTART 3876; GFX900-NEXT: ; def v[1:2] 3877; GFX900-NEXT: ;;#ASMEND 3878; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3879; GFX900-NEXT: v_mov_b32_e32 v3, 0 3880; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 3881; GFX900-NEXT: v_mov_b32_e32 v2, v0 3882; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3883; GFX900-NEXT: s_waitcnt vmcnt(0) 3884; GFX900-NEXT: s_setpc_b64 s[30:31] 3885; 3886; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_1: 3887; GFX90A: ; %bb.0: 3888; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3889; GFX90A-NEXT: ;;#ASMSTART 3890; GFX90A-NEXT: ; def v[2:3] 3891; GFX90A-NEXT: ;;#ASMEND 3892; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3893; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3894; GFX90A-NEXT: ;;#ASMSTART 3895; GFX90A-NEXT: ; def v[0:1] 3896; GFX90A-NEXT: ;;#ASMEND 3897; GFX90A-NEXT: v_perm_b32 v2, v3, v3, s4 3898; GFX90A-NEXT: v_mov_b32_e32 v3, v0 3899; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3900; GFX90A-NEXT: s_waitcnt vmcnt(0) 3901; GFX90A-NEXT: s_setpc_b64 s[30:31] 3902; 3903; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_1: 3904; GFX940: ; %bb.0: 3905; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3906; GFX940-NEXT: ;;#ASMSTART 3907; GFX940-NEXT: ; def v[2:3] 3908; GFX940-NEXT: ;;#ASMEND 3909; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3910; GFX940-NEXT: v_mov_b32_e32 v4, 0 3911; GFX940-NEXT: ;;#ASMSTART 3912; GFX940-NEXT: ; def v[0:1] 3913; GFX940-NEXT: ;;#ASMEND 3914; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 3915; GFX940-NEXT: v_mov_b32_e32 v3, v0 3916; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3917; GFX940-NEXT: s_waitcnt vmcnt(0) 3918; GFX940-NEXT: s_setpc_b64 s[30:31] 3919 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3920 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3921 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1> 3922 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3923 ret void 3924} 3925 3926define void @v_shuffle_v4i16_v4i16__7_7_0_1(ptr addrspace(1) inreg %ptr) { 3927; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_1: 3928; GFX900: ; %bb.0: 3929; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3930; GFX900-NEXT: ;;#ASMSTART 3931; GFX900-NEXT: ; def v[0:1] 3932; GFX900-NEXT: ;;#ASMEND 3933; GFX900-NEXT: ;;#ASMSTART 3934; GFX900-NEXT: ; def v[1:2] 3935; GFX900-NEXT: ;;#ASMEND 3936; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3937; GFX900-NEXT: v_mov_b32_e32 v3, 0 3938; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 3939; GFX900-NEXT: v_mov_b32_e32 v2, v0 3940; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 3941; GFX900-NEXT: s_waitcnt vmcnt(0) 3942; GFX900-NEXT: s_setpc_b64 s[30:31] 3943; 3944; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_1: 3945; GFX90A: ; %bb.0: 3946; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3947; GFX90A-NEXT: ;;#ASMSTART 3948; GFX90A-NEXT: ; def v[2:3] 3949; GFX90A-NEXT: ;;#ASMEND 3950; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3951; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3952; GFX90A-NEXT: ;;#ASMSTART 3953; GFX90A-NEXT: ; def v[0:1] 3954; GFX90A-NEXT: ;;#ASMEND 3955; GFX90A-NEXT: v_perm_b32 v2, v3, v3, s4 3956; GFX90A-NEXT: v_mov_b32_e32 v3, v0 3957; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 3958; GFX90A-NEXT: s_waitcnt vmcnt(0) 3959; GFX90A-NEXT: s_setpc_b64 s[30:31] 3960; 3961; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_1: 3962; GFX940: ; %bb.0: 3963; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3964; GFX940-NEXT: ;;#ASMSTART 3965; GFX940-NEXT: ; def v[2:3] 3966; GFX940-NEXT: ;;#ASMEND 3967; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3968; GFX940-NEXT: v_mov_b32_e32 v4, 0 3969; GFX940-NEXT: ;;#ASMSTART 3970; GFX940-NEXT: ; def v[0:1] 3971; GFX940-NEXT: ;;#ASMEND 3972; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 3973; GFX940-NEXT: v_mov_b32_e32 v3, v0 3974; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 3975; GFX940-NEXT: s_waitcnt vmcnt(0) 3976; GFX940-NEXT: s_setpc_b64 s[30:31] 3977 %vec0 = call <4 x i16> asm "; def $0", "=v"() 3978 %vec1 = call <4 x i16> asm "; def $0", "=v"() 3979 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1> 3980 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 3981 ret void 3982} 3983 3984define void @v_shuffle_v4i16_v4i16__7_7_2_1(ptr addrspace(1) inreg %ptr) { 3985; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_1: 3986; GFX900: ; %bb.0: 3987; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3988; GFX900-NEXT: ;;#ASMSTART 3989; GFX900-NEXT: ; def v[0:1] 3990; GFX900-NEXT: ;;#ASMEND 3991; GFX900-NEXT: s_mov_b32 s4, 0xffff 3992; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 3993; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3994; GFX900-NEXT: v_mov_b32_e32 v4, 0 3995; GFX900-NEXT: ;;#ASMSTART 3996; GFX900-NEXT: ; def v[2:3] 3997; GFX900-NEXT: ;;#ASMEND 3998; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 3999; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4000; GFX900-NEXT: s_waitcnt vmcnt(0) 4001; GFX900-NEXT: s_setpc_b64 s[30:31] 4002; 4003; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_1: 4004; GFX90A: ; %bb.0: 4005; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4006; GFX90A-NEXT: ;;#ASMSTART 4007; GFX90A-NEXT: ; def v[0:1] 4008; GFX90A-NEXT: ;;#ASMEND 4009; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4010; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0 4011; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4012; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4013; GFX90A-NEXT: ;;#ASMSTART 4014; GFX90A-NEXT: ; def v[2:3] 4015; GFX90A-NEXT: ;;#ASMEND 4016; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 4017; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4018; GFX90A-NEXT: s_waitcnt vmcnt(0) 4019; GFX90A-NEXT: s_setpc_b64 s[30:31] 4020; 4021; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_1: 4022; GFX940: ; %bb.0: 4023; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4024; GFX940-NEXT: ;;#ASMSTART 4025; GFX940-NEXT: ; def v[0:1] 4026; GFX940-NEXT: ;;#ASMEND 4027; GFX940-NEXT: s_mov_b32 s2, 0xffff 4028; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 4029; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4030; GFX940-NEXT: v_mov_b32_e32 v4, 0 4031; GFX940-NEXT: ;;#ASMSTART 4032; GFX940-NEXT: ; def v[2:3] 4033; GFX940-NEXT: ;;#ASMEND 4034; GFX940-NEXT: s_nop 0 4035; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 4036; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4037; GFX940-NEXT: s_waitcnt vmcnt(0) 4038; GFX940-NEXT: s_setpc_b64 s[30:31] 4039 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4040 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4041 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1> 4042 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4043 ret void 4044} 4045 4046define void @v_shuffle_v4i16_v4i16__7_7_3_1(ptr addrspace(1) inreg %ptr) { 4047; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_1: 4048; GFX900: ; %bb.0: 4049; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4050; GFX900-NEXT: ;;#ASMSTART 4051; GFX900-NEXT: ; def v[0:1] 4052; GFX900-NEXT: ;;#ASMEND 4053; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4054; GFX900-NEXT: v_mov_b32_e32 v4, 0 4055; GFX900-NEXT: ;;#ASMSTART 4056; GFX900-NEXT: ; def v[2:3] 4057; GFX900-NEXT: ;;#ASMEND 4058; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 4059; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 4060; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4061; GFX900-NEXT: s_waitcnt vmcnt(0) 4062; GFX900-NEXT: s_setpc_b64 s[30:31] 4063; 4064; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_1: 4065; GFX90A: ; %bb.0: 4066; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4067; GFX90A-NEXT: ;;#ASMSTART 4068; GFX90A-NEXT: ; def v[0:1] 4069; GFX90A-NEXT: ;;#ASMEND 4070; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4071; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4072; GFX90A-NEXT: ;;#ASMSTART 4073; GFX90A-NEXT: ; def v[2:3] 4074; GFX90A-NEXT: ;;#ASMEND 4075; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 4076; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 4077; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4078; GFX90A-NEXT: s_waitcnt vmcnt(0) 4079; GFX90A-NEXT: s_setpc_b64 s[30:31] 4080; 4081; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_1: 4082; GFX940: ; %bb.0: 4083; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4084; GFX940-NEXT: ;;#ASMSTART 4085; GFX940-NEXT: ; def v[0:1] 4086; GFX940-NEXT: ;;#ASMEND 4087; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4088; GFX940-NEXT: v_mov_b32_e32 v4, 0 4089; GFX940-NEXT: ;;#ASMSTART 4090; GFX940-NEXT: ; def v[2:3] 4091; GFX940-NEXT: ;;#ASMEND 4092; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 4093; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 4094; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4095; GFX940-NEXT: s_waitcnt vmcnt(0) 4096; GFX940-NEXT: s_setpc_b64 s[30:31] 4097 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4098 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4099 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1> 4100 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4101 ret void 4102} 4103 4104define void @v_shuffle_v4i16_v4i16__7_7_4_1(ptr addrspace(1) inreg %ptr) { 4105; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_1: 4106; GFX900: ; %bb.0: 4107; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4108; GFX900-NEXT: ;;#ASMSTART 4109; GFX900-NEXT: ; def v[0:1] 4110; GFX900-NEXT: ;;#ASMEND 4111; GFX900-NEXT: ;;#ASMSTART 4112; GFX900-NEXT: ; def v[1:2] 4113; GFX900-NEXT: ;;#ASMEND 4114; GFX900-NEXT: s_mov_b32 s4, 0xffff 4115; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 4116; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4117; GFX900-NEXT: v_mov_b32_e32 v3, 0 4118; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 4119; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 4120; GFX900-NEXT: s_waitcnt vmcnt(0) 4121; GFX900-NEXT: s_setpc_b64 s[30:31] 4122; 4123; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_1: 4124; GFX90A: ; %bb.0: 4125; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4126; GFX90A-NEXT: ;;#ASMSTART 4127; GFX90A-NEXT: ; def v[0:1] 4128; GFX90A-NEXT: ;;#ASMEND 4129; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4130; GFX90A-NEXT: ;;#ASMSTART 4131; GFX90A-NEXT: ; def v[2:3] 4132; GFX90A-NEXT: ;;#ASMEND 4133; GFX90A-NEXT: v_bfi_b32 v1, s4, v2, v0 4134; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4135; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4136; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 4137; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4138; GFX90A-NEXT: s_waitcnt vmcnt(0) 4139; GFX90A-NEXT: s_setpc_b64 s[30:31] 4140; 4141; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_1: 4142; GFX940: ; %bb.0: 4143; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4144; GFX940-NEXT: ;;#ASMSTART 4145; GFX940-NEXT: ; def v[0:1] 4146; GFX940-NEXT: ;;#ASMEND 4147; GFX940-NEXT: s_mov_b32 s2, 0xffff 4148; GFX940-NEXT: ;;#ASMSTART 4149; GFX940-NEXT: ; def v[2:3] 4150; GFX940-NEXT: ;;#ASMEND 4151; GFX940-NEXT: v_mov_b32_e32 v4, 0 4152; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v0 4153; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4154; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 4155; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4156; GFX940-NEXT: s_waitcnt vmcnt(0) 4157; GFX940-NEXT: s_setpc_b64 s[30:31] 4158 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4159 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4160 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1> 4161 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4162 ret void 4163} 4164 4165define void @v_shuffle_v4i16_v4i16__7_7_5_1(ptr addrspace(1) inreg %ptr) { 4166; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_1: 4167; GFX900: ; %bb.0: 4168; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4169; GFX900-NEXT: ;;#ASMSTART 4170; GFX900-NEXT: ; def v[0:1] 4171; GFX900-NEXT: ;;#ASMEND 4172; GFX900-NEXT: ;;#ASMSTART 4173; GFX900-NEXT: ; def v[1:2] 4174; GFX900-NEXT: ;;#ASMEND 4175; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4176; GFX900-NEXT: v_mov_b32_e32 v3, 0 4177; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 4178; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 4179; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 4180; GFX900-NEXT: s_waitcnt vmcnt(0) 4181; GFX900-NEXT: s_setpc_b64 s[30:31] 4182; 4183; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_1: 4184; GFX90A: ; %bb.0: 4185; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4186; GFX90A-NEXT: ;;#ASMSTART 4187; GFX90A-NEXT: ; def v[0:1] 4188; GFX90A-NEXT: ;;#ASMEND 4189; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4190; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4191; GFX90A-NEXT: ;;#ASMSTART 4192; GFX90A-NEXT: ; def v[2:3] 4193; GFX90A-NEXT: ;;#ASMEND 4194; GFX90A-NEXT: v_perm_b32 v1, v0, v2, s4 4195; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 4196; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4197; GFX90A-NEXT: s_waitcnt vmcnt(0) 4198; GFX90A-NEXT: s_setpc_b64 s[30:31] 4199; 4200; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_1: 4201; GFX940: ; %bb.0: 4202; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4203; GFX940-NEXT: ;;#ASMSTART 4204; GFX940-NEXT: ; def v[0:1] 4205; GFX940-NEXT: ;;#ASMEND 4206; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4207; GFX940-NEXT: v_mov_b32_e32 v4, 0 4208; GFX940-NEXT: ;;#ASMSTART 4209; GFX940-NEXT: ; def v[2:3] 4210; GFX940-NEXT: ;;#ASMEND 4211; GFX940-NEXT: s_nop 0 4212; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 4213; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 4214; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4215; GFX940-NEXT: s_waitcnt vmcnt(0) 4216; GFX940-NEXT: s_setpc_b64 s[30:31] 4217 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4218 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4219 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1> 4220 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4221 ret void 4222} 4223 4224define void @v_shuffle_v4i16_v4i16__7_7_6_1(ptr addrspace(1) inreg %ptr) { 4225; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_1: 4226; GFX900: ; %bb.0: 4227; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4228; GFX900-NEXT: ;;#ASMSTART 4229; GFX900-NEXT: ; def v[0:1] 4230; GFX900-NEXT: ;;#ASMEND 4231; GFX900-NEXT: ;;#ASMSTART 4232; GFX900-NEXT: ; def v[1:2] 4233; GFX900-NEXT: ;;#ASMEND 4234; GFX900-NEXT: s_mov_b32 s4, 0xffff 4235; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0 4236; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4237; GFX900-NEXT: v_mov_b32_e32 v3, 0 4238; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 4239; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 4240; GFX900-NEXT: s_waitcnt vmcnt(0) 4241; GFX900-NEXT: s_setpc_b64 s[30:31] 4242; 4243; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_1: 4244; GFX90A: ; %bb.0: 4245; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4246; GFX90A-NEXT: ;;#ASMSTART 4247; GFX90A-NEXT: ; def v[0:1] 4248; GFX90A-NEXT: ;;#ASMEND 4249; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4250; GFX90A-NEXT: ;;#ASMSTART 4251; GFX90A-NEXT: ; def v[2:3] 4252; GFX90A-NEXT: ;;#ASMEND 4253; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v0 4254; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4255; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4256; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 4257; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4258; GFX90A-NEXT: s_waitcnt vmcnt(0) 4259; GFX90A-NEXT: s_setpc_b64 s[30:31] 4260; 4261; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_1: 4262; GFX940: ; %bb.0: 4263; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4264; GFX940-NEXT: ;;#ASMSTART 4265; GFX940-NEXT: ; def v[0:1] 4266; GFX940-NEXT: ;;#ASMEND 4267; GFX940-NEXT: s_mov_b32 s2, 0xffff 4268; GFX940-NEXT: ;;#ASMSTART 4269; GFX940-NEXT: ; def v[2:3] 4270; GFX940-NEXT: ;;#ASMEND 4271; GFX940-NEXT: v_mov_b32_e32 v4, 0 4272; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 4273; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4274; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 4275; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4276; GFX940-NEXT: s_waitcnt vmcnt(0) 4277; GFX940-NEXT: s_setpc_b64 s[30:31] 4278 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4279 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4280 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1> 4281 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4282 ret void 4283} 4284 4285define void @v_shuffle_v4i16_v4i16__u_2_2_2(ptr addrspace(1) inreg %ptr) { 4286; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_2_2_2: 4287; GFX900: ; %bb.0: 4288; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4289; GFX900-NEXT: ;;#ASMSTART 4290; GFX900-NEXT: ; def v[0:1] 4291; GFX900-NEXT: ;;#ASMEND 4292; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4293; GFX900-NEXT: v_mov_b32_e32 v3, 0 4294; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 4295; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4296; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 4297; GFX900-NEXT: s_waitcnt vmcnt(0) 4298; GFX900-NEXT: s_setpc_b64 s[30:31] 4299; 4300; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_2_2_2: 4301; GFX90A: ; %bb.0: 4302; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4303; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4304; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4305; GFX90A-NEXT: ;;#ASMSTART 4306; GFX90A-NEXT: ; def v[0:1] 4307; GFX90A-NEXT: ;;#ASMEND 4308; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 4309; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1 4310; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 4311; GFX90A-NEXT: s_waitcnt vmcnt(0) 4312; GFX90A-NEXT: s_setpc_b64 s[30:31] 4313; 4314; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_2_2_2: 4315; GFX940: ; %bb.0: 4316; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4317; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4318; GFX940-NEXT: v_mov_b32_e32 v4, 0 4319; GFX940-NEXT: ;;#ASMSTART 4320; GFX940-NEXT: ; def v[0:1] 4321; GFX940-NEXT: ;;#ASMEND 4322; GFX940-NEXT: s_nop 0 4323; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 4324; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 4325; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 4326; GFX940-NEXT: s_waitcnt vmcnt(0) 4327; GFX940-NEXT: s_setpc_b64 s[30:31] 4328 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4329 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2> 4330 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4331 ret void 4332} 4333 4334define void @v_shuffle_v4i16_v4i16__0_2_2_2(ptr addrspace(1) inreg %ptr) { 4335; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_2_2_2: 4336; GFX900: ; %bb.0: 4337; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4338; GFX900-NEXT: ;;#ASMSTART 4339; GFX900-NEXT: ; def v[0:1] 4340; GFX900-NEXT: ;;#ASMEND 4341; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4342; GFX900-NEXT: v_mov_b32_e32 v2, 0 4343; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 4344; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 4345; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4346; GFX900-NEXT: s_waitcnt vmcnt(0) 4347; GFX900-NEXT: s_setpc_b64 s[30:31] 4348; 4349; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_2_2_2: 4350; GFX90A: ; %bb.0: 4351; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4352; GFX90A-NEXT: ;;#ASMSTART 4353; GFX90A-NEXT: ; def v[0:1] 4354; GFX90A-NEXT: ;;#ASMEND 4355; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4356; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4357; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 4358; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 4359; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4360; GFX90A-NEXT: s_waitcnt vmcnt(0) 4361; GFX90A-NEXT: s_setpc_b64 s[30:31] 4362; 4363; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_2_2_2: 4364; GFX940: ; %bb.0: 4365; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4366; GFX940-NEXT: ;;#ASMSTART 4367; GFX940-NEXT: ; def v[0:1] 4368; GFX940-NEXT: ;;#ASMEND 4369; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4370; GFX940-NEXT: v_mov_b32_e32 v2, 0 4371; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 4372; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 4373; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 4374; GFX940-NEXT: s_waitcnt vmcnt(0) 4375; GFX940-NEXT: s_setpc_b64 s[30:31] 4376 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4377 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2> 4378 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4379 ret void 4380} 4381 4382define void @v_shuffle_v4i16_v4i16__1_2_2_2(ptr addrspace(1) inreg %ptr) { 4383; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_2_2_2: 4384; GFX900: ; %bb.0: 4385; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4386; GFX900-NEXT: ;;#ASMSTART 4387; GFX900-NEXT: ; def v[0:1] 4388; GFX900-NEXT: ;;#ASMEND 4389; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4390; GFX900-NEXT: v_mov_b32_e32 v3, 0 4391; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 4392; GFX900-NEXT: v_alignbit_b32 v1, v1, v0, 16 4393; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 4394; GFX900-NEXT: s_waitcnt vmcnt(0) 4395; GFX900-NEXT: s_setpc_b64 s[30:31] 4396; 4397; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_2_2_2: 4398; GFX90A: ; %bb.0: 4399; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4400; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4401; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4402; GFX90A-NEXT: ;;#ASMSTART 4403; GFX90A-NEXT: ; def v[0:1] 4404; GFX90A-NEXT: ;;#ASMEND 4405; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 4406; GFX90A-NEXT: v_alignbit_b32 v2, v1, v0, 16 4407; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 4408; GFX90A-NEXT: s_waitcnt vmcnt(0) 4409; GFX90A-NEXT: s_setpc_b64 s[30:31] 4410; 4411; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_2_2_2: 4412; GFX940: ; %bb.0: 4413; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4414; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4415; GFX940-NEXT: v_mov_b32_e32 v4, 0 4416; GFX940-NEXT: ;;#ASMSTART 4417; GFX940-NEXT: ; def v[0:1] 4418; GFX940-NEXT: ;;#ASMEND 4419; GFX940-NEXT: s_nop 0 4420; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 4421; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 4422; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 4423; GFX940-NEXT: s_waitcnt vmcnt(0) 4424; GFX940-NEXT: s_setpc_b64 s[30:31] 4425 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4426 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2> 4427 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4428 ret void 4429} 4430 4431define void @v_shuffle_v4i16_v4i16__2_2_2_2(ptr addrspace(1) inreg %ptr) { 4432; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_2_2_2: 4433; GFX900: ; %bb.0: 4434; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4435; GFX900-NEXT: ;;#ASMSTART 4436; GFX900-NEXT: ; def v[0:1] 4437; GFX900-NEXT: ;;#ASMEND 4438; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4439; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 4440; GFX900-NEXT: v_mov_b32_e32 v2, 0 4441; GFX900-NEXT: v_mov_b32_e32 v1, v0 4442; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4443; GFX900-NEXT: s_waitcnt vmcnt(0) 4444; GFX900-NEXT: s_setpc_b64 s[30:31] 4445; 4446; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_2_2_2: 4447; GFX90A: ; %bb.0: 4448; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4449; GFX90A-NEXT: ;;#ASMSTART 4450; GFX90A-NEXT: ; def v[0:1] 4451; GFX90A-NEXT: ;;#ASMEND 4452; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4453; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 4454; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4455; GFX90A-NEXT: v_mov_b32_e32 v1, v0 4456; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 4457; GFX90A-NEXT: s_waitcnt vmcnt(0) 4458; GFX90A-NEXT: s_setpc_b64 s[30:31] 4459; 4460; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_2_2_2: 4461; GFX940: ; %bb.0: 4462; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4463; GFX940-NEXT: ;;#ASMSTART 4464; GFX940-NEXT: ; def v[0:1] 4465; GFX940-NEXT: ;;#ASMEND 4466; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4467; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 4468; GFX940-NEXT: v_mov_b32_e32 v2, 0 4469; GFX940-NEXT: v_mov_b32_e32 v1, v0 4470; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 4471; GFX940-NEXT: s_waitcnt vmcnt(0) 4472; GFX940-NEXT: s_setpc_b64 s[30:31] 4473 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4474 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 4475 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4476 ret void 4477} 4478 4479define void @v_shuffle_v4i16_v4i16__3_2_2_2(ptr addrspace(1) inreg %ptr) { 4480; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_2_2_2: 4481; GFX900: ; %bb.0: 4482; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4483; GFX900-NEXT: ;;#ASMSTART 4484; GFX900-NEXT: ; def v[0:1] 4485; GFX900-NEXT: ;;#ASMEND 4486; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4487; GFX900-NEXT: v_mov_b32_e32 v3, 0 4488; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 4489; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16 4490; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 4491; GFX900-NEXT: s_waitcnt vmcnt(0) 4492; GFX900-NEXT: s_setpc_b64 s[30:31] 4493; 4494; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_2_2_2: 4495; GFX90A: ; %bb.0: 4496; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4497; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4498; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4499; GFX90A-NEXT: ;;#ASMSTART 4500; GFX90A-NEXT: ; def v[0:1] 4501; GFX90A-NEXT: ;;#ASMEND 4502; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 4503; GFX90A-NEXT: v_alignbit_b32 v2, v1, v1, 16 4504; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 4505; GFX90A-NEXT: s_waitcnt vmcnt(0) 4506; GFX90A-NEXT: s_setpc_b64 s[30:31] 4507; 4508; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_2_2_2: 4509; GFX940: ; %bb.0: 4510; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4511; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4512; GFX940-NEXT: v_mov_b32_e32 v4, 0 4513; GFX940-NEXT: ;;#ASMSTART 4514; GFX940-NEXT: ; def v[0:1] 4515; GFX940-NEXT: ;;#ASMEND 4516; GFX940-NEXT: s_nop 0 4517; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 4518; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 4519; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 4520; GFX940-NEXT: s_waitcnt vmcnt(0) 4521; GFX940-NEXT: s_setpc_b64 s[30:31] 4522 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4523 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2> 4524 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4525 ret void 4526} 4527 4528define void @v_shuffle_v4i16_v4i16__4_2_2_2(ptr addrspace(1) inreg %ptr) { 4529; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_2_2_2: 4530; GFX900: ; %bb.0: 4531; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4532; GFX900-NEXT: ;;#ASMSTART 4533; GFX900-NEXT: ; def v[0:1] 4534; GFX900-NEXT: ;;#ASMEND 4535; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4536; GFX900-NEXT: v_mov_b32_e32 v3, 0 4537; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 4538; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4539; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 4540; GFX900-NEXT: s_waitcnt vmcnt(0) 4541; GFX900-NEXT: s_setpc_b64 s[30:31] 4542; 4543; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_2_2_2: 4544; GFX90A: ; %bb.0: 4545; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4546; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4547; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4548; GFX90A-NEXT: ;;#ASMSTART 4549; GFX90A-NEXT: ; def v[0:1] 4550; GFX90A-NEXT: ;;#ASMEND 4551; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 4552; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1 4553; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 4554; GFX90A-NEXT: s_waitcnt vmcnt(0) 4555; GFX90A-NEXT: s_setpc_b64 s[30:31] 4556; 4557; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_2_2_2: 4558; GFX940: ; %bb.0: 4559; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4560; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4561; GFX940-NEXT: v_mov_b32_e32 v4, 0 4562; GFX940-NEXT: ;;#ASMSTART 4563; GFX940-NEXT: ; def v[0:1] 4564; GFX940-NEXT: ;;#ASMEND 4565; GFX940-NEXT: s_nop 0 4566; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 4567; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 4568; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 4569; GFX940-NEXT: s_waitcnt vmcnt(0) 4570; GFX940-NEXT: s_setpc_b64 s[30:31] 4571 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4572 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2> 4573 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4574 ret void 4575} 4576 4577define void @v_shuffle_v4i16_v4i16__5_2_2_2(ptr addrspace(1) inreg %ptr) { 4578; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_2_2_2: 4579; GFX900: ; %bb.0: 4580; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4581; GFX900-NEXT: ;;#ASMSTART 4582; GFX900-NEXT: ; def v[2:3] 4583; GFX900-NEXT: ;;#ASMEND 4584; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4585; GFX900-NEXT: v_mov_b32_e32 v4, 0 4586; GFX900-NEXT: ;;#ASMSTART 4587; GFX900-NEXT: ; def v[0:1] 4588; GFX900-NEXT: ;;#ASMEND 4589; GFX900-NEXT: v_perm_b32 v3, v1, v1, s4 4590; GFX900-NEXT: v_alignbit_b32 v2, v1, v2, 16 4591; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 4592; GFX900-NEXT: s_waitcnt vmcnt(0) 4593; GFX900-NEXT: s_setpc_b64 s[30:31] 4594; 4595; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_2_2_2: 4596; GFX90A: ; %bb.0: 4597; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4598; GFX90A-NEXT: ;;#ASMSTART 4599; GFX90A-NEXT: ; def v[2:3] 4600; GFX90A-NEXT: ;;#ASMEND 4601; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4602; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4603; GFX90A-NEXT: ;;#ASMSTART 4604; GFX90A-NEXT: ; def v[0:1] 4605; GFX90A-NEXT: ;;#ASMEND 4606; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 4607; GFX90A-NEXT: v_alignbit_b32 v2, v1, v2, 16 4608; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 4609; GFX90A-NEXT: s_waitcnt vmcnt(0) 4610; GFX90A-NEXT: s_setpc_b64 s[30:31] 4611; 4612; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_2_2_2: 4613; GFX940: ; %bb.0: 4614; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4615; GFX940-NEXT: ;;#ASMSTART 4616; GFX940-NEXT: ; def v[2:3] 4617; GFX940-NEXT: ;;#ASMEND 4618; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4619; GFX940-NEXT: v_mov_b32_e32 v4, 0 4620; GFX940-NEXT: ;;#ASMSTART 4621; GFX940-NEXT: ; def v[0:1] 4622; GFX940-NEXT: ;;#ASMEND 4623; GFX940-NEXT: s_nop 0 4624; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 4625; GFX940-NEXT: v_alignbit_b32 v2, v1, v2, 16 4626; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 4627; GFX940-NEXT: s_waitcnt vmcnt(0) 4628; GFX940-NEXT: s_setpc_b64 s[30:31] 4629 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4630 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4631 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2> 4632 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4633 ret void 4634} 4635 4636define void @v_shuffle_v4i16_v4i16__6_2_2_2(ptr addrspace(1) inreg %ptr) { 4637; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_2_2_2: 4638; GFX900: ; %bb.0: 4639; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4640; GFX900-NEXT: ;;#ASMSTART 4641; GFX900-NEXT: ; def v[0:1] 4642; GFX900-NEXT: ;;#ASMEND 4643; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4644; GFX900-NEXT: v_mov_b32_e32 v4, 0 4645; GFX900-NEXT: ;;#ASMSTART 4646; GFX900-NEXT: ; def v[2:3] 4647; GFX900-NEXT: ;;#ASMEND 4648; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 4649; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 4650; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4651; GFX900-NEXT: s_waitcnt vmcnt(0) 4652; GFX900-NEXT: s_setpc_b64 s[30:31] 4653; 4654; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_2_2_2: 4655; GFX90A: ; %bb.0: 4656; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4657; GFX90A-NEXT: ;;#ASMSTART 4658; GFX90A-NEXT: ; def v[0:1] 4659; GFX90A-NEXT: ;;#ASMEND 4660; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4661; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4662; GFX90A-NEXT: ;;#ASMSTART 4663; GFX90A-NEXT: ; def v[2:3] 4664; GFX90A-NEXT: ;;#ASMEND 4665; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 4666; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 4667; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4668; GFX90A-NEXT: s_waitcnt vmcnt(0) 4669; GFX90A-NEXT: s_setpc_b64 s[30:31] 4670; 4671; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_2_2_2: 4672; GFX940: ; %bb.0: 4673; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4674; GFX940-NEXT: ;;#ASMSTART 4675; GFX940-NEXT: ; def v[0:1] 4676; GFX940-NEXT: ;;#ASMEND 4677; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4678; GFX940-NEXT: v_mov_b32_e32 v4, 0 4679; GFX940-NEXT: ;;#ASMSTART 4680; GFX940-NEXT: ; def v[2:3] 4681; GFX940-NEXT: ;;#ASMEND 4682; GFX940-NEXT: s_nop 0 4683; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 4684; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 4685; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4686; GFX940-NEXT: s_waitcnt vmcnt(0) 4687; GFX940-NEXT: s_setpc_b64 s[30:31] 4688 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4689 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4690 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2> 4691 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4692 ret void 4693} 4694 4695define void @v_shuffle_v4i16_v4i16__7_2_2_2(ptr addrspace(1) inreg %ptr) { 4696; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_2_2: 4697; GFX900: ; %bb.0: 4698; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4699; GFX900-NEXT: ;;#ASMSTART 4700; GFX900-NEXT: ; def v[0:1] 4701; GFX900-NEXT: ;;#ASMEND 4702; GFX900-NEXT: ;;#ASMSTART 4703; GFX900-NEXT: ; def v[2:3] 4704; GFX900-NEXT: ;;#ASMEND 4705; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4706; GFX900-NEXT: v_mov_b32_e32 v4, 0 4707; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 4708; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 4709; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 4710; GFX900-NEXT: s_waitcnt vmcnt(0) 4711; GFX900-NEXT: s_setpc_b64 s[30:31] 4712; 4713; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_2_2: 4714; GFX90A: ; %bb.0: 4715; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4716; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4717; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4718; GFX90A-NEXT: ;;#ASMSTART 4719; GFX90A-NEXT: ; def v[0:1] 4720; GFX90A-NEXT: ;;#ASMEND 4721; GFX90A-NEXT: ;;#ASMSTART 4722; GFX90A-NEXT: ; def v[2:3] 4723; GFX90A-NEXT: ;;#ASMEND 4724; GFX90A-NEXT: v_perm_b32 v5, v1, v1, s4 4725; GFX90A-NEXT: v_alignbit_b32 v4, v1, v3, 16 4726; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] 4727; GFX90A-NEXT: s_waitcnt vmcnt(0) 4728; GFX90A-NEXT: s_setpc_b64 s[30:31] 4729; 4730; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_2_2: 4731; GFX940: ; %bb.0: 4732; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4733; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4734; GFX940-NEXT: v_mov_b32_e32 v6, 0 4735; GFX940-NEXT: ;;#ASMSTART 4736; GFX940-NEXT: ; def v[0:1] 4737; GFX940-NEXT: ;;#ASMEND 4738; GFX940-NEXT: ;;#ASMSTART 4739; GFX940-NEXT: ; def v[2:3] 4740; GFX940-NEXT: ;;#ASMEND 4741; GFX940-NEXT: s_nop 0 4742; GFX940-NEXT: v_perm_b32 v5, v1, v1, s2 4743; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 4744; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 4745; GFX940-NEXT: s_waitcnt vmcnt(0) 4746; GFX940-NEXT: s_setpc_b64 s[30:31] 4747 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4748 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4749 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2> 4750 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4751 ret void 4752} 4753 4754define void @v_shuffle_v4i16_v4i16__7_u_2_2(ptr addrspace(1) inreg %ptr) { 4755; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_2_2: 4756; GFX900: ; %bb.0: 4757; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4758; GFX900-NEXT: ;;#ASMSTART 4759; GFX900-NEXT: ; def v[0:1] 4760; GFX900-NEXT: ;;#ASMEND 4761; GFX900-NEXT: ;;#ASMSTART 4762; GFX900-NEXT: ; def v[2:3] 4763; GFX900-NEXT: ;;#ASMEND 4764; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16 4765; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4766; GFX900-NEXT: v_mov_b32_e32 v4, 0 4767; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 4768; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4769; GFX900-NEXT: s_waitcnt vmcnt(0) 4770; GFX900-NEXT: s_setpc_b64 s[30:31] 4771; 4772; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_2_2: 4773; GFX90A: ; %bb.0: 4774; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4775; GFX90A-NEXT: ;;#ASMSTART 4776; GFX90A-NEXT: ; def v[0:1] 4777; GFX90A-NEXT: ;;#ASMEND 4778; GFX90A-NEXT: ;;#ASMSTART 4779; GFX90A-NEXT: ; def v[2:3] 4780; GFX90A-NEXT: ;;#ASMEND 4781; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16 4782; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4783; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4784; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 4785; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4786; GFX90A-NEXT: s_waitcnt vmcnt(0) 4787; GFX90A-NEXT: s_setpc_b64 s[30:31] 4788; 4789; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_2_2: 4790; GFX940: ; %bb.0: 4791; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4792; GFX940-NEXT: ;;#ASMSTART 4793; GFX940-NEXT: ; def v[0:1] 4794; GFX940-NEXT: ;;#ASMEND 4795; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4796; GFX940-NEXT: v_mov_b32_e32 v4, 0 4797; GFX940-NEXT: ;;#ASMSTART 4798; GFX940-NEXT: ; def v[2:3] 4799; GFX940-NEXT: ;;#ASMEND 4800; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 4801; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 4802; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4803; GFX940-NEXT: s_waitcnt vmcnt(0) 4804; GFX940-NEXT: s_setpc_b64 s[30:31] 4805 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4806 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4807 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2> 4808 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4809 ret void 4810} 4811 4812define void @v_shuffle_v4i16_v4i16__7_0_2_2(ptr addrspace(1) inreg %ptr) { 4813; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_2_2: 4814; GFX900: ; %bb.0: 4815; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4816; GFX900-NEXT: ;;#ASMSTART 4817; GFX900-NEXT: ; def v[0:1] 4818; GFX900-NEXT: ;;#ASMEND 4819; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4820; GFX900-NEXT: v_mov_b32_e32 v4, 0 4821; GFX900-NEXT: ;;#ASMSTART 4822; GFX900-NEXT: ; def v[2:3] 4823; GFX900-NEXT: ;;#ASMEND 4824; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16 4825; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 4826; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4827; GFX900-NEXT: s_waitcnt vmcnt(0) 4828; GFX900-NEXT: s_setpc_b64 s[30:31] 4829; 4830; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_2_2: 4831; GFX90A: ; %bb.0: 4832; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4833; GFX90A-NEXT: ;;#ASMSTART 4834; GFX90A-NEXT: ; def v[0:1] 4835; GFX90A-NEXT: ;;#ASMEND 4836; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4837; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4838; GFX90A-NEXT: ;;#ASMSTART 4839; GFX90A-NEXT: ; def v[2:3] 4840; GFX90A-NEXT: ;;#ASMEND 4841; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 4842; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 4843; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4844; GFX90A-NEXT: s_waitcnt vmcnt(0) 4845; GFX90A-NEXT: s_setpc_b64 s[30:31] 4846; 4847; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_2_2: 4848; GFX940: ; %bb.0: 4849; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4850; GFX940-NEXT: ;;#ASMSTART 4851; GFX940-NEXT: ; def v[0:1] 4852; GFX940-NEXT: ;;#ASMEND 4853; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4854; GFX940-NEXT: v_mov_b32_e32 v4, 0 4855; GFX940-NEXT: ;;#ASMSTART 4856; GFX940-NEXT: ; def v[2:3] 4857; GFX940-NEXT: ;;#ASMEND 4858; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 4859; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 4860; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4861; GFX940-NEXT: s_waitcnt vmcnt(0) 4862; GFX940-NEXT: s_setpc_b64 s[30:31] 4863 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4864 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4865 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2> 4866 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4867 ret void 4868} 4869 4870define void @v_shuffle_v4i16_v4i16__7_1_2_2(ptr addrspace(1) inreg %ptr) { 4871; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_2_2: 4872; GFX900: ; %bb.0: 4873; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4874; GFX900-NEXT: ;;#ASMSTART 4875; GFX900-NEXT: ; def v[0:1] 4876; GFX900-NEXT: ;;#ASMEND 4877; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4878; GFX900-NEXT: ;;#ASMSTART 4879; GFX900-NEXT: ; def v[2:3] 4880; GFX900-NEXT: ;;#ASMEND 4881; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 4882; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4883; GFX900-NEXT: v_mov_b32_e32 v4, 0 4884; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 4885; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4886; GFX900-NEXT: s_waitcnt vmcnt(0) 4887; GFX900-NEXT: s_setpc_b64 s[30:31] 4888; 4889; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_2_2: 4890; GFX90A: ; %bb.0: 4891; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4892; GFX90A-NEXT: ;;#ASMSTART 4893; GFX90A-NEXT: ; def v[0:1] 4894; GFX90A-NEXT: ;;#ASMEND 4895; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4896; GFX90A-NEXT: ;;#ASMSTART 4897; GFX90A-NEXT: ; def v[2:3] 4898; GFX90A-NEXT: ;;#ASMEND 4899; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 4900; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4901; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4902; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 4903; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4904; GFX90A-NEXT: s_waitcnt vmcnt(0) 4905; GFX90A-NEXT: s_setpc_b64 s[30:31] 4906; 4907; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_2_2: 4908; GFX940: ; %bb.0: 4909; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4910; GFX940-NEXT: ;;#ASMSTART 4911; GFX940-NEXT: ; def v[0:1] 4912; GFX940-NEXT: ;;#ASMEND 4913; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4914; GFX940-NEXT: ;;#ASMSTART 4915; GFX940-NEXT: ; def v[2:3] 4916; GFX940-NEXT: ;;#ASMEND 4917; GFX940-NEXT: v_mov_b32_e32 v4, 0 4918; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 4919; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4920; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 4921; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4922; GFX940-NEXT: s_waitcnt vmcnt(0) 4923; GFX940-NEXT: s_setpc_b64 s[30:31] 4924 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4925 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4926 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2> 4927 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4928 ret void 4929} 4930 4931define void @v_shuffle_v4i16_v4i16__7_3_2_2(ptr addrspace(1) inreg %ptr) { 4932; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_2_2: 4933; GFX900: ; %bb.0: 4934; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4935; GFX900-NEXT: ;;#ASMSTART 4936; GFX900-NEXT: ; def v[0:1] 4937; GFX900-NEXT: ;;#ASMEND 4938; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4939; GFX900-NEXT: ;;#ASMSTART 4940; GFX900-NEXT: ; def v[2:3] 4941; GFX900-NEXT: ;;#ASMEND 4942; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 4943; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4944; GFX900-NEXT: v_mov_b32_e32 v4, 0 4945; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 4946; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4947; GFX900-NEXT: s_waitcnt vmcnt(0) 4948; GFX900-NEXT: s_setpc_b64 s[30:31] 4949; 4950; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_2_2: 4951; GFX90A: ; %bb.0: 4952; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4953; GFX90A-NEXT: ;;#ASMSTART 4954; GFX90A-NEXT: ; def v[0:1] 4955; GFX90A-NEXT: ;;#ASMEND 4956; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4957; GFX90A-NEXT: ;;#ASMSTART 4958; GFX90A-NEXT: ; def v[2:3] 4959; GFX90A-NEXT: ;;#ASMEND 4960; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 4961; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4962; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4963; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 4964; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 4965; GFX90A-NEXT: s_waitcnt vmcnt(0) 4966; GFX90A-NEXT: s_setpc_b64 s[30:31] 4967; 4968; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_2_2: 4969; GFX940: ; %bb.0: 4970; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4971; GFX940-NEXT: ;;#ASMSTART 4972; GFX940-NEXT: ; def v[0:1] 4973; GFX940-NEXT: ;;#ASMEND 4974; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4975; GFX940-NEXT: ;;#ASMSTART 4976; GFX940-NEXT: ; def v[2:3] 4977; GFX940-NEXT: ;;#ASMEND 4978; GFX940-NEXT: v_mov_b32_e32 v4, 0 4979; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 4980; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4981; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 4982; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 4983; GFX940-NEXT: s_waitcnt vmcnt(0) 4984; GFX940-NEXT: s_setpc_b64 s[30:31] 4985 %vec0 = call <4 x i16> asm "; def $0", "=v"() 4986 %vec1 = call <4 x i16> asm "; def $0", "=v"() 4987 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2> 4988 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 4989 ret void 4990} 4991 4992define void @v_shuffle_v4i16_v4i16__7_4_2_2(ptr addrspace(1) inreg %ptr) { 4993; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_2_2: 4994; GFX900: ; %bb.0: 4995; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4996; GFX900-NEXT: ;;#ASMSTART 4997; GFX900-NEXT: ; def v[0:1] 4998; GFX900-NEXT: ;;#ASMEND 4999; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5000; GFX900-NEXT: v_mov_b32_e32 v4, 0 5001; GFX900-NEXT: ;;#ASMSTART 5002; GFX900-NEXT: ; def v[2:3] 5003; GFX900-NEXT: ;;#ASMEND 5004; GFX900-NEXT: v_alignbit_b32 v0, v2, v3, 16 5005; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5006; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5007; GFX900-NEXT: s_waitcnt vmcnt(0) 5008; GFX900-NEXT: s_setpc_b64 s[30:31] 5009; 5010; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_2_2: 5011; GFX90A: ; %bb.0: 5012; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5013; GFX90A-NEXT: ;;#ASMSTART 5014; GFX90A-NEXT: ; def v[0:1] 5015; GFX90A-NEXT: ;;#ASMEND 5016; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5017; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5018; GFX90A-NEXT: ;;#ASMSTART 5019; GFX90A-NEXT: ; def v[2:3] 5020; GFX90A-NEXT: ;;#ASMEND 5021; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16 5022; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 5023; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5024; GFX90A-NEXT: s_waitcnt vmcnt(0) 5025; GFX90A-NEXT: s_setpc_b64 s[30:31] 5026; 5027; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_2_2: 5028; GFX940: ; %bb.0: 5029; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5030; GFX940-NEXT: ;;#ASMSTART 5031; GFX940-NEXT: ; def v[0:1] 5032; GFX940-NEXT: ;;#ASMEND 5033; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5034; GFX940-NEXT: v_mov_b32_e32 v4, 0 5035; GFX940-NEXT: ;;#ASMSTART 5036; GFX940-NEXT: ; def v[2:3] 5037; GFX940-NEXT: ;;#ASMEND 5038; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 5039; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 5040; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5041; GFX940-NEXT: s_waitcnt vmcnt(0) 5042; GFX940-NEXT: s_setpc_b64 s[30:31] 5043 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5044 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5045 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2> 5046 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5047 ret void 5048} 5049 5050define void @v_shuffle_v4i16_v4i16__7_5_2_2(ptr addrspace(1) inreg %ptr) { 5051; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_2_2: 5052; GFX900: ; %bb.0: 5053; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5054; GFX900-NEXT: ;;#ASMSTART 5055; GFX900-NEXT: ; def v[0:1] 5056; GFX900-NEXT: ;;#ASMEND 5057; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5058; GFX900-NEXT: ;;#ASMSTART 5059; GFX900-NEXT: ; def v[2:3] 5060; GFX900-NEXT: ;;#ASMEND 5061; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 5062; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5063; GFX900-NEXT: v_mov_b32_e32 v4, 0 5064; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5065; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5066; GFX900-NEXT: s_waitcnt vmcnt(0) 5067; GFX900-NEXT: s_setpc_b64 s[30:31] 5068; 5069; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_2_2: 5070; GFX90A: ; %bb.0: 5071; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5072; GFX90A-NEXT: ;;#ASMSTART 5073; GFX90A-NEXT: ; def v[0:1] 5074; GFX90A-NEXT: ;;#ASMEND 5075; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5076; GFX90A-NEXT: ;;#ASMSTART 5077; GFX90A-NEXT: ; def v[2:3] 5078; GFX90A-NEXT: ;;#ASMEND 5079; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 5080; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5081; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5082; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 5083; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5084; GFX90A-NEXT: s_waitcnt vmcnt(0) 5085; GFX90A-NEXT: s_setpc_b64 s[30:31] 5086; 5087; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_2_2: 5088; GFX940: ; %bb.0: 5089; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5090; GFX940-NEXT: ;;#ASMSTART 5091; GFX940-NEXT: ; def v[0:1] 5092; GFX940-NEXT: ;;#ASMEND 5093; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5094; GFX940-NEXT: ;;#ASMSTART 5095; GFX940-NEXT: ; def v[2:3] 5096; GFX940-NEXT: ;;#ASMEND 5097; GFX940-NEXT: v_mov_b32_e32 v4, 0 5098; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 5099; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5100; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 5101; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5102; GFX940-NEXT: s_waitcnt vmcnt(0) 5103; GFX940-NEXT: s_setpc_b64 s[30:31] 5104 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5105 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5106 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2> 5107 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5108 ret void 5109} 5110 5111define void @v_shuffle_v4i16_v4i16__7_6_2_2(ptr addrspace(1) inreg %ptr) { 5112; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_2_2: 5113; GFX900: ; %bb.0: 5114; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5115; GFX900-NEXT: ;;#ASMSTART 5116; GFX900-NEXT: ; def v[0:1] 5117; GFX900-NEXT: ;;#ASMEND 5118; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5119; GFX900-NEXT: v_mov_b32_e32 v4, 0 5120; GFX900-NEXT: ;;#ASMSTART 5121; GFX900-NEXT: ; def v[2:3] 5122; GFX900-NEXT: ;;#ASMEND 5123; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5124; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16 5125; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5126; GFX900-NEXT: s_waitcnt vmcnt(0) 5127; GFX900-NEXT: s_setpc_b64 s[30:31] 5128; 5129; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_2_2: 5130; GFX90A: ; %bb.0: 5131; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5132; GFX90A-NEXT: ;;#ASMSTART 5133; GFX90A-NEXT: ; def v[0:1] 5134; GFX90A-NEXT: ;;#ASMEND 5135; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5136; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5137; GFX90A-NEXT: ;;#ASMSTART 5138; GFX90A-NEXT: ; def v[2:3] 5139; GFX90A-NEXT: ;;#ASMEND 5140; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 5141; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16 5142; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5143; GFX90A-NEXT: s_waitcnt vmcnt(0) 5144; GFX90A-NEXT: s_setpc_b64 s[30:31] 5145; 5146; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_2_2: 5147; GFX940: ; %bb.0: 5148; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5149; GFX940-NEXT: ;;#ASMSTART 5150; GFX940-NEXT: ; def v[0:1] 5151; GFX940-NEXT: ;;#ASMEND 5152; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5153; GFX940-NEXT: v_mov_b32_e32 v4, 0 5154; GFX940-NEXT: ;;#ASMSTART 5155; GFX940-NEXT: ; def v[2:3] 5156; GFX940-NEXT: ;;#ASMEND 5157; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 5158; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 5159; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5160; GFX940-NEXT: s_waitcnt vmcnt(0) 5161; GFX940-NEXT: s_setpc_b64 s[30:31] 5162 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5163 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5164 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2> 5165 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5166 ret void 5167} 5168 5169define void @v_shuffle_v4i16_v4i16__7_7_2_2(ptr addrspace(1) inreg %ptr) { 5170; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_2: 5171; GFX900: ; %bb.0: 5172; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5173; GFX900-NEXT: ;;#ASMSTART 5174; GFX900-NEXT: ; def v[0:1] 5175; GFX900-NEXT: ;;#ASMEND 5176; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5177; GFX900-NEXT: ;;#ASMSTART 5178; GFX900-NEXT: ; def v[2:3] 5179; GFX900-NEXT: ;;#ASMEND 5180; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 5181; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5182; GFX900-NEXT: v_mov_b32_e32 v4, 0 5183; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5184; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5185; GFX900-NEXT: s_waitcnt vmcnt(0) 5186; GFX900-NEXT: s_setpc_b64 s[30:31] 5187; 5188; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_2: 5189; GFX90A: ; %bb.0: 5190; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5191; GFX90A-NEXT: ;;#ASMSTART 5192; GFX90A-NEXT: ; def v[0:1] 5193; GFX90A-NEXT: ;;#ASMEND 5194; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5195; GFX90A-NEXT: ;;#ASMSTART 5196; GFX90A-NEXT: ; def v[2:3] 5197; GFX90A-NEXT: ;;#ASMEND 5198; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 5199; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5200; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5201; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 5202; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5203; GFX90A-NEXT: s_waitcnt vmcnt(0) 5204; GFX90A-NEXT: s_setpc_b64 s[30:31] 5205; 5206; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_2: 5207; GFX940: ; %bb.0: 5208; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5209; GFX940-NEXT: ;;#ASMSTART 5210; GFX940-NEXT: ; def v[0:1] 5211; GFX940-NEXT: ;;#ASMEND 5212; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5213; GFX940-NEXT: ;;#ASMSTART 5214; GFX940-NEXT: ; def v[2:3] 5215; GFX940-NEXT: ;;#ASMEND 5216; GFX940-NEXT: v_mov_b32_e32 v4, 0 5217; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 5218; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5219; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 5220; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5221; GFX940-NEXT: s_waitcnt vmcnt(0) 5222; GFX940-NEXT: s_setpc_b64 s[30:31] 5223 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5224 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5225 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2> 5226 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5227 ret void 5228} 5229 5230define void @v_shuffle_v4i16_v4i16__7_7_u_2(ptr addrspace(1) inreg %ptr) { 5231; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_2: 5232; GFX900: ; %bb.0: 5233; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5234; GFX900-NEXT: ;;#ASMSTART 5235; GFX900-NEXT: ; def v[0:1] 5236; GFX900-NEXT: ;;#ASMEND 5237; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5238; GFX900-NEXT: v_mov_b32_e32 v4, 0 5239; GFX900-NEXT: ;;#ASMSTART 5240; GFX900-NEXT: ; def v[2:3] 5241; GFX900-NEXT: ;;#ASMEND 5242; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 5243; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1 5244; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5245; GFX900-NEXT: s_waitcnt vmcnt(0) 5246; GFX900-NEXT: s_setpc_b64 s[30:31] 5247; 5248; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_2: 5249; GFX90A: ; %bb.0: 5250; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5251; GFX90A-NEXT: ;;#ASMSTART 5252; GFX90A-NEXT: ; def v[0:1] 5253; GFX90A-NEXT: ;;#ASMEND 5254; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5255; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5256; GFX90A-NEXT: ;;#ASMSTART 5257; GFX90A-NEXT: ; def v[2:3] 5258; GFX90A-NEXT: ;;#ASMEND 5259; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 5260; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v1 5261; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5262; GFX90A-NEXT: s_waitcnt vmcnt(0) 5263; GFX90A-NEXT: s_setpc_b64 s[30:31] 5264; 5265; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_2: 5266; GFX940: ; %bb.0: 5267; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5268; GFX940-NEXT: ;;#ASMSTART 5269; GFX940-NEXT: ; def v[0:1] 5270; GFX940-NEXT: ;;#ASMEND 5271; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5272; GFX940-NEXT: v_mov_b32_e32 v4, 0 5273; GFX940-NEXT: ;;#ASMSTART 5274; GFX940-NEXT: ; def v[2:3] 5275; GFX940-NEXT: ;;#ASMEND 5276; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 5277; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 5278; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5279; GFX940-NEXT: s_waitcnt vmcnt(0) 5280; GFX940-NEXT: s_setpc_b64 s[30:31] 5281 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5282 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5283 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2> 5284 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5285 ret void 5286} 5287 5288define void @v_shuffle_v4i16_v4i16__7_7_0_2(ptr addrspace(1) inreg %ptr) { 5289; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_2: 5290; GFX900: ; %bb.0: 5291; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5292; GFX900-NEXT: ;;#ASMSTART 5293; GFX900-NEXT: ; def v[0:1] 5294; GFX900-NEXT: ;;#ASMEND 5295; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5296; GFX900-NEXT: v_perm_b32 v1, v1, v0, s4 5297; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5298; GFX900-NEXT: v_mov_b32_e32 v4, 0 5299; GFX900-NEXT: ;;#ASMSTART 5300; GFX900-NEXT: ; def v[2:3] 5301; GFX900-NEXT: ;;#ASMEND 5302; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 5303; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5304; GFX900-NEXT: s_waitcnt vmcnt(0) 5305; GFX900-NEXT: s_setpc_b64 s[30:31] 5306; 5307; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_2: 5308; GFX90A: ; %bb.0: 5309; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5310; GFX90A-NEXT: ;;#ASMSTART 5311; GFX90A-NEXT: ; def v[0:1] 5312; GFX90A-NEXT: ;;#ASMEND 5313; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5314; GFX90A-NEXT: v_perm_b32 v1, v1, v0, s4 5315; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5316; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5317; GFX90A-NEXT: ;;#ASMSTART 5318; GFX90A-NEXT: ; def v[2:3] 5319; GFX90A-NEXT: ;;#ASMEND 5320; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 5321; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5322; GFX90A-NEXT: s_waitcnt vmcnt(0) 5323; GFX90A-NEXT: s_setpc_b64 s[30:31] 5324; 5325; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_2: 5326; GFX940: ; %bb.0: 5327; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5328; GFX940-NEXT: ;;#ASMSTART 5329; GFX940-NEXT: ; def v[0:1] 5330; GFX940-NEXT: ;;#ASMEND 5331; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5332; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 5333; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5334; GFX940-NEXT: v_mov_b32_e32 v4, 0 5335; GFX940-NEXT: ;;#ASMSTART 5336; GFX940-NEXT: ; def v[2:3] 5337; GFX940-NEXT: ;;#ASMEND 5338; GFX940-NEXT: s_nop 0 5339; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 5340; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5341; GFX940-NEXT: s_waitcnt vmcnt(0) 5342; GFX940-NEXT: s_setpc_b64 s[30:31] 5343 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5344 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5345 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2> 5346 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5347 ret void 5348} 5349 5350define void @v_shuffle_v4i16_v4i16__7_7_1_2(ptr addrspace(1) inreg %ptr) { 5351; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_2: 5352; GFX900: ; %bb.0: 5353; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5354; GFX900-NEXT: ;;#ASMSTART 5355; GFX900-NEXT: ; def v[2:3] 5356; GFX900-NEXT: ;;#ASMEND 5357; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5358; GFX900-NEXT: v_mov_b32_e32 v4, 0 5359; GFX900-NEXT: ;;#ASMSTART 5360; GFX900-NEXT: ; def v[0:1] 5361; GFX900-NEXT: ;;#ASMEND 5362; GFX900-NEXT: v_perm_b32 v2, v3, v3, s4 5363; GFX900-NEXT: v_alignbit_b32 v3, v1, v0, 16 5364; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 5365; GFX900-NEXT: s_waitcnt vmcnt(0) 5366; GFX900-NEXT: s_setpc_b64 s[30:31] 5367; 5368; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_2: 5369; GFX90A: ; %bb.0: 5370; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5371; GFX90A-NEXT: ;;#ASMSTART 5372; GFX90A-NEXT: ; def v[2:3] 5373; GFX90A-NEXT: ;;#ASMEND 5374; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5375; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5376; GFX90A-NEXT: ;;#ASMSTART 5377; GFX90A-NEXT: ; def v[0:1] 5378; GFX90A-NEXT: ;;#ASMEND 5379; GFX90A-NEXT: v_perm_b32 v2, v3, v3, s4 5380; GFX90A-NEXT: v_alignbit_b32 v3, v1, v0, 16 5381; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 5382; GFX90A-NEXT: s_waitcnt vmcnt(0) 5383; GFX90A-NEXT: s_setpc_b64 s[30:31] 5384; 5385; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_2: 5386; GFX940: ; %bb.0: 5387; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5388; GFX940-NEXT: ;;#ASMSTART 5389; GFX940-NEXT: ; def v[2:3] 5390; GFX940-NEXT: ;;#ASMEND 5391; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5392; GFX940-NEXT: v_mov_b32_e32 v4, 0 5393; GFX940-NEXT: ;;#ASMSTART 5394; GFX940-NEXT: ; def v[0:1] 5395; GFX940-NEXT: ;;#ASMEND 5396; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 5397; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 5398; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 5399; GFX940-NEXT: s_waitcnt vmcnt(0) 5400; GFX940-NEXT: s_setpc_b64 s[30:31] 5401 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5402 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5403 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2> 5404 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5405 ret void 5406} 5407 5408define void @v_shuffle_v4i16_v4i16__7_7_3_2(ptr addrspace(1) inreg %ptr) { 5409; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_2: 5410; GFX900: ; %bb.0: 5411; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5412; GFX900-NEXT: ;;#ASMSTART 5413; GFX900-NEXT: ; def v[0:1] 5414; GFX900-NEXT: ;;#ASMEND 5415; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5416; GFX900-NEXT: v_mov_b32_e32 v4, 0 5417; GFX900-NEXT: ;;#ASMSTART 5418; GFX900-NEXT: ; def v[2:3] 5419; GFX900-NEXT: ;;#ASMEND 5420; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 5421; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16 5422; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5423; GFX900-NEXT: s_waitcnt vmcnt(0) 5424; GFX900-NEXT: s_setpc_b64 s[30:31] 5425; 5426; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_2: 5427; GFX90A: ; %bb.0: 5428; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5429; GFX90A-NEXT: ;;#ASMSTART 5430; GFX90A-NEXT: ; def v[0:1] 5431; GFX90A-NEXT: ;;#ASMEND 5432; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5433; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5434; GFX90A-NEXT: ;;#ASMSTART 5435; GFX90A-NEXT: ; def v[2:3] 5436; GFX90A-NEXT: ;;#ASMEND 5437; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 5438; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16 5439; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5440; GFX90A-NEXT: s_waitcnt vmcnt(0) 5441; GFX90A-NEXT: s_setpc_b64 s[30:31] 5442; 5443; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_2: 5444; GFX940: ; %bb.0: 5445; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5446; GFX940-NEXT: ;;#ASMSTART 5447; GFX940-NEXT: ; def v[0:1] 5448; GFX940-NEXT: ;;#ASMEND 5449; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5450; GFX940-NEXT: v_mov_b32_e32 v4, 0 5451; GFX940-NEXT: ;;#ASMSTART 5452; GFX940-NEXT: ; def v[2:3] 5453; GFX940-NEXT: ;;#ASMEND 5454; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 5455; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 5456; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5457; GFX940-NEXT: s_waitcnt vmcnt(0) 5458; GFX940-NEXT: s_setpc_b64 s[30:31] 5459 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5460 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5461 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2> 5462 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5463 ret void 5464} 5465 5466define void @v_shuffle_v4i16_v4i16__7_7_4_2(ptr addrspace(1) inreg %ptr) { 5467; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_2: 5468; GFX900: ; %bb.0: 5469; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5470; GFX900-NEXT: ;;#ASMSTART 5471; GFX900-NEXT: ; def v[0:1] 5472; GFX900-NEXT: ;;#ASMEND 5473; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5474; GFX900-NEXT: ;;#ASMSTART 5475; GFX900-NEXT: ; def v[2:3] 5476; GFX900-NEXT: ;;#ASMEND 5477; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 5478; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5479; GFX900-NEXT: v_mov_b32_e32 v4, 0 5480; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 5481; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5482; GFX900-NEXT: s_waitcnt vmcnt(0) 5483; GFX900-NEXT: s_setpc_b64 s[30:31] 5484; 5485; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_2: 5486; GFX90A: ; %bb.0: 5487; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5488; GFX90A-NEXT: ;;#ASMSTART 5489; GFX90A-NEXT: ; def v[0:1] 5490; GFX90A-NEXT: ;;#ASMEND 5491; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5492; GFX90A-NEXT: ;;#ASMSTART 5493; GFX90A-NEXT: ; def v[2:3] 5494; GFX90A-NEXT: ;;#ASMEND 5495; GFX90A-NEXT: v_perm_b32 v1, v1, v2, s4 5496; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5497; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5498; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 5499; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5500; GFX90A-NEXT: s_waitcnt vmcnt(0) 5501; GFX90A-NEXT: s_setpc_b64 s[30:31] 5502; 5503; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_2: 5504; GFX940: ; %bb.0: 5505; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5506; GFX940-NEXT: ;;#ASMSTART 5507; GFX940-NEXT: ; def v[0:1] 5508; GFX940-NEXT: ;;#ASMEND 5509; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5510; GFX940-NEXT: ;;#ASMSTART 5511; GFX940-NEXT: ; def v[2:3] 5512; GFX940-NEXT: ;;#ASMEND 5513; GFX940-NEXT: v_mov_b32_e32 v4, 0 5514; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 5515; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5516; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 5517; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5518; GFX940-NEXT: s_waitcnt vmcnt(0) 5519; GFX940-NEXT: s_setpc_b64 s[30:31] 5520 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5521 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5522 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2> 5523 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5524 ret void 5525} 5526 5527define void @v_shuffle_v4i16_v4i16__7_7_5_2(ptr addrspace(1) inreg %ptr) { 5528; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_2: 5529; GFX900: ; %bb.0: 5530; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5531; GFX900-NEXT: ;;#ASMSTART 5532; GFX900-NEXT: ; def v[0:1] 5533; GFX900-NEXT: ;;#ASMEND 5534; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5535; GFX900-NEXT: v_mov_b32_e32 v4, 0 5536; GFX900-NEXT: ;;#ASMSTART 5537; GFX900-NEXT: ; def v[2:3] 5538; GFX900-NEXT: ;;#ASMEND 5539; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 5540; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16 5541; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5542; GFX900-NEXT: s_waitcnt vmcnt(0) 5543; GFX900-NEXT: s_setpc_b64 s[30:31] 5544; 5545; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_2: 5546; GFX90A: ; %bb.0: 5547; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5548; GFX90A-NEXT: ;;#ASMSTART 5549; GFX90A-NEXT: ; def v[0:1] 5550; GFX90A-NEXT: ;;#ASMEND 5551; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5552; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5553; GFX90A-NEXT: ;;#ASMSTART 5554; GFX90A-NEXT: ; def v[2:3] 5555; GFX90A-NEXT: ;;#ASMEND 5556; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 5557; GFX90A-NEXT: v_alignbit_b32 v1, v1, v2, 16 5558; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5559; GFX90A-NEXT: s_waitcnt vmcnt(0) 5560; GFX90A-NEXT: s_setpc_b64 s[30:31] 5561; 5562; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_2: 5563; GFX940: ; %bb.0: 5564; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5565; GFX940-NEXT: ;;#ASMSTART 5566; GFX940-NEXT: ; def v[0:1] 5567; GFX940-NEXT: ;;#ASMEND 5568; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5569; GFX940-NEXT: v_mov_b32_e32 v4, 0 5570; GFX940-NEXT: ;;#ASMSTART 5571; GFX940-NEXT: ; def v[2:3] 5572; GFX940-NEXT: ;;#ASMEND 5573; GFX940-NEXT: s_nop 0 5574; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 5575; GFX940-NEXT: v_alignbit_b32 v1, v1, v2, 16 5576; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5577; GFX940-NEXT: s_waitcnt vmcnt(0) 5578; GFX940-NEXT: s_setpc_b64 s[30:31] 5579 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5580 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5581 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2> 5582 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5583 ret void 5584} 5585 5586define void @v_shuffle_v4i16_v4i16__7_7_6_2(ptr addrspace(1) inreg %ptr) { 5587; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_2: 5588; GFX900: ; %bb.0: 5589; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5590; GFX900-NEXT: ;;#ASMSTART 5591; GFX900-NEXT: ; def v[0:1] 5592; GFX900-NEXT: ;;#ASMEND 5593; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5594; GFX900-NEXT: ;;#ASMSTART 5595; GFX900-NEXT: ; def v[2:3] 5596; GFX900-NEXT: ;;#ASMEND 5597; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 5598; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5599; GFX900-NEXT: v_mov_b32_e32 v4, 0 5600; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 5601; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5602; GFX900-NEXT: s_waitcnt vmcnt(0) 5603; GFX900-NEXT: s_setpc_b64 s[30:31] 5604; 5605; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_2: 5606; GFX90A: ; %bb.0: 5607; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5608; GFX90A-NEXT: ;;#ASMSTART 5609; GFX90A-NEXT: ; def v[0:1] 5610; GFX90A-NEXT: ;;#ASMEND 5611; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5612; GFX90A-NEXT: ;;#ASMSTART 5613; GFX90A-NEXT: ; def v[2:3] 5614; GFX90A-NEXT: ;;#ASMEND 5615; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4 5616; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5617; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5618; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 5619; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5620; GFX90A-NEXT: s_waitcnt vmcnt(0) 5621; GFX90A-NEXT: s_setpc_b64 s[30:31] 5622; 5623; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_2: 5624; GFX940: ; %bb.0: 5625; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5626; GFX940-NEXT: ;;#ASMSTART 5627; GFX940-NEXT: ; def v[0:1] 5628; GFX940-NEXT: ;;#ASMEND 5629; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5630; GFX940-NEXT: ;;#ASMSTART 5631; GFX940-NEXT: ; def v[2:3] 5632; GFX940-NEXT: ;;#ASMEND 5633; GFX940-NEXT: v_mov_b32_e32 v4, 0 5634; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 5635; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5636; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 5637; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5638; GFX940-NEXT: s_waitcnt vmcnt(0) 5639; GFX940-NEXT: s_setpc_b64 s[30:31] 5640 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5641 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5642 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2> 5643 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5644 ret void 5645} 5646 5647define void @v_shuffle_v4i16_v4i16__u_3_3_3(ptr addrspace(1) inreg %ptr) { 5648; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_3_3_3: 5649; GFX900: ; %bb.0: 5650; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5651; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5652; GFX900-NEXT: v_mov_b32_e32 v3, 0 5653; GFX900-NEXT: ;;#ASMSTART 5654; GFX900-NEXT: ; def v[0:1] 5655; GFX900-NEXT: ;;#ASMEND 5656; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 5657; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 5658; GFX900-NEXT: s_waitcnt vmcnt(0) 5659; GFX900-NEXT: s_setpc_b64 s[30:31] 5660; 5661; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_3_3_3: 5662; GFX90A: ; %bb.0: 5663; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5664; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5665; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5666; GFX90A-NEXT: ;;#ASMSTART 5667; GFX90A-NEXT: ; def v[0:1] 5668; GFX90A-NEXT: ;;#ASMEND 5669; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 5670; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5671; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 5672; GFX90A-NEXT: s_waitcnt vmcnt(0) 5673; GFX90A-NEXT: s_setpc_b64 s[30:31] 5674; 5675; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_3_3_3: 5676; GFX940: ; %bb.0: 5677; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5678; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5679; GFX940-NEXT: v_mov_b32_e32 v4, 0 5680; GFX940-NEXT: ;;#ASMSTART 5681; GFX940-NEXT: ; def v[0:1] 5682; GFX940-NEXT: ;;#ASMEND 5683; GFX940-NEXT: s_nop 0 5684; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 5685; GFX940-NEXT: v_mov_b32_e32 v2, v1 5686; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 5687; GFX940-NEXT: s_waitcnt vmcnt(0) 5688; GFX940-NEXT: s_setpc_b64 s[30:31] 5689 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5690 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3> 5691 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5692 ret void 5693} 5694 5695define void @v_shuffle_v4i16_v4i16__0_3_3_3(ptr addrspace(1) inreg %ptr) { 5696; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_3_3_3: 5697; GFX900: ; %bb.0: 5698; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5699; GFX900-NEXT: ;;#ASMSTART 5700; GFX900-NEXT: ; def v[0:1] 5701; GFX900-NEXT: ;;#ASMEND 5702; GFX900-NEXT: s_mov_b32 s4, 0xffff 5703; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 5704; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5705; GFX900-NEXT: v_mov_b32_e32 v2, 0 5706; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5707; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5708; GFX900-NEXT: s_waitcnt vmcnt(0) 5709; GFX900-NEXT: s_setpc_b64 s[30:31] 5710; 5711; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_3_3_3: 5712; GFX90A: ; %bb.0: 5713; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5714; GFX90A-NEXT: ;;#ASMSTART 5715; GFX90A-NEXT: ; def v[0:1] 5716; GFX90A-NEXT: ;;#ASMEND 5717; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5718; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 5719; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5720; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5721; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 5722; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5723; GFX90A-NEXT: s_waitcnt vmcnt(0) 5724; GFX90A-NEXT: s_setpc_b64 s[30:31] 5725; 5726; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_3_3_3: 5727; GFX940: ; %bb.0: 5728; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5729; GFX940-NEXT: ;;#ASMSTART 5730; GFX940-NEXT: ; def v[0:1] 5731; GFX940-NEXT: ;;#ASMEND 5732; GFX940-NEXT: s_mov_b32 s2, 0xffff 5733; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 5734; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5735; GFX940-NEXT: v_mov_b32_e32 v2, 0 5736; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 5737; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 5738; GFX940-NEXT: s_waitcnt vmcnt(0) 5739; GFX940-NEXT: s_setpc_b64 s[30:31] 5740 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5741 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3> 5742 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5743 ret void 5744} 5745 5746define void @v_shuffle_v4i16_v4i16__1_3_3_3(ptr addrspace(1) inreg %ptr) { 5747; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_3_3_3: 5748; GFX900: ; %bb.0: 5749; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5750; GFX900-NEXT: ;;#ASMSTART 5751; GFX900-NEXT: ; def v[0:1] 5752; GFX900-NEXT: ;;#ASMEND 5753; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5754; GFX900-NEXT: v_mov_b32_e32 v2, 0 5755; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 5756; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5757; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5758; GFX900-NEXT: s_waitcnt vmcnt(0) 5759; GFX900-NEXT: s_setpc_b64 s[30:31] 5760; 5761; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_3_3_3: 5762; GFX90A: ; %bb.0: 5763; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5764; GFX90A-NEXT: ;;#ASMSTART 5765; GFX90A-NEXT: ; def v[0:1] 5766; GFX90A-NEXT: ;;#ASMEND 5767; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5768; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5769; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 5770; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 5771; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5772; GFX90A-NEXT: s_waitcnt vmcnt(0) 5773; GFX90A-NEXT: s_setpc_b64 s[30:31] 5774; 5775; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_3_3_3: 5776; GFX940: ; %bb.0: 5777; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5778; GFX940-NEXT: ;;#ASMSTART 5779; GFX940-NEXT: ; def v[0:1] 5780; GFX940-NEXT: ;;#ASMEND 5781; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5782; GFX940-NEXT: v_mov_b32_e32 v2, 0 5783; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 5784; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 5785; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 5786; GFX940-NEXT: s_waitcnt vmcnt(0) 5787; GFX940-NEXT: s_setpc_b64 s[30:31] 5788 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5789 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3> 5790 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5791 ret void 5792} 5793 5794define void @v_shuffle_v4i16_v4i16__2_3_3_3(ptr addrspace(1) inreg %ptr) { 5795; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_3_3_3: 5796; GFX900: ; %bb.0: 5797; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5798; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5799; GFX900-NEXT: v_mov_b32_e32 v3, 0 5800; GFX900-NEXT: ;;#ASMSTART 5801; GFX900-NEXT: ; def v[0:1] 5802; GFX900-NEXT: ;;#ASMEND 5803; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 5804; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 5805; GFX900-NEXT: s_waitcnt vmcnt(0) 5806; GFX900-NEXT: s_setpc_b64 s[30:31] 5807; 5808; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_3_3_3: 5809; GFX90A: ; %bb.0: 5810; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5811; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5812; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5813; GFX90A-NEXT: ;;#ASMSTART 5814; GFX90A-NEXT: ; def v[0:1] 5815; GFX90A-NEXT: ;;#ASMEND 5816; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 5817; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5818; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 5819; GFX90A-NEXT: s_waitcnt vmcnt(0) 5820; GFX90A-NEXT: s_setpc_b64 s[30:31] 5821; 5822; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_3_3_3: 5823; GFX940: ; %bb.0: 5824; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5825; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5826; GFX940-NEXT: v_mov_b32_e32 v4, 0 5827; GFX940-NEXT: ;;#ASMSTART 5828; GFX940-NEXT: ; def v[0:1] 5829; GFX940-NEXT: ;;#ASMEND 5830; GFX940-NEXT: s_nop 0 5831; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 5832; GFX940-NEXT: v_mov_b32_e32 v2, v1 5833; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 5834; GFX940-NEXT: s_waitcnt vmcnt(0) 5835; GFX940-NEXT: s_setpc_b64 s[30:31] 5836 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5837 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3> 5838 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5839 ret void 5840} 5841 5842define void @v_shuffle_v4i16_v4i16__3_3_3_3(ptr addrspace(1) inreg %ptr) { 5843; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_3_3_3: 5844; GFX900: ; %bb.0: 5845; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5846; GFX900-NEXT: ;;#ASMSTART 5847; GFX900-NEXT: ; def v[0:1] 5848; GFX900-NEXT: ;;#ASMEND 5849; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5850; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 5851; GFX900-NEXT: v_mov_b32_e32 v2, 0 5852; GFX900-NEXT: v_mov_b32_e32 v1, v0 5853; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5854; GFX900-NEXT: s_waitcnt vmcnt(0) 5855; GFX900-NEXT: s_setpc_b64 s[30:31] 5856; 5857; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_3_3_3: 5858; GFX90A: ; %bb.0: 5859; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5860; GFX90A-NEXT: ;;#ASMSTART 5861; GFX90A-NEXT: ; def v[0:1] 5862; GFX90A-NEXT: ;;#ASMEND 5863; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5864; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 5865; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5866; GFX90A-NEXT: v_mov_b32_e32 v1, v0 5867; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 5868; GFX90A-NEXT: s_waitcnt vmcnt(0) 5869; GFX90A-NEXT: s_setpc_b64 s[30:31] 5870; 5871; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_3_3_3: 5872; GFX940: ; %bb.0: 5873; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5874; GFX940-NEXT: ;;#ASMSTART 5875; GFX940-NEXT: ; def v[0:1] 5876; GFX940-NEXT: ;;#ASMEND 5877; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5878; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 5879; GFX940-NEXT: v_mov_b32_e32 v2, 0 5880; GFX940-NEXT: v_mov_b32_e32 v1, v0 5881; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 5882; GFX940-NEXT: s_waitcnt vmcnt(0) 5883; GFX940-NEXT: s_setpc_b64 s[30:31] 5884 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5885 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 5886 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5887 ret void 5888} 5889 5890define void @v_shuffle_v4i16_v4i16__4_3_3_3(ptr addrspace(1) inreg %ptr) { 5891; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_3_3_3: 5892; GFX900: ; %bb.0: 5893; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5894; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5895; GFX900-NEXT: v_mov_b32_e32 v3, 0 5896; GFX900-NEXT: ;;#ASMSTART 5897; GFX900-NEXT: ; def v[0:1] 5898; GFX900-NEXT: ;;#ASMEND 5899; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 5900; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 5901; GFX900-NEXT: s_waitcnt vmcnt(0) 5902; GFX900-NEXT: s_setpc_b64 s[30:31] 5903; 5904; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_3_3_3: 5905; GFX90A: ; %bb.0: 5906; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5907; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5908; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5909; GFX90A-NEXT: ;;#ASMSTART 5910; GFX90A-NEXT: ; def v[0:1] 5911; GFX90A-NEXT: ;;#ASMEND 5912; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 5913; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5914; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 5915; GFX90A-NEXT: s_waitcnt vmcnt(0) 5916; GFX90A-NEXT: s_setpc_b64 s[30:31] 5917; 5918; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_3_3_3: 5919; GFX940: ; %bb.0: 5920; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5921; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5922; GFX940-NEXT: v_mov_b32_e32 v4, 0 5923; GFX940-NEXT: ;;#ASMSTART 5924; GFX940-NEXT: ; def v[0:1] 5925; GFX940-NEXT: ;;#ASMEND 5926; GFX940-NEXT: s_nop 0 5927; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 5928; GFX940-NEXT: v_mov_b32_e32 v2, v1 5929; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 5930; GFX940-NEXT: s_waitcnt vmcnt(0) 5931; GFX940-NEXT: s_setpc_b64 s[30:31] 5932 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5933 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3> 5934 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5935 ret void 5936} 5937 5938define void @v_shuffle_v4i16_v4i16__5_3_3_3(ptr addrspace(1) inreg %ptr) { 5939; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_3_3_3: 5940; GFX900: ; %bb.0: 5941; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5942; GFX900-NEXT: ;;#ASMSTART 5943; GFX900-NEXT: ; def v[0:1] 5944; GFX900-NEXT: ;;#ASMEND 5945; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5946; GFX900-NEXT: v_mov_b32_e32 v4, 0 5947; GFX900-NEXT: ;;#ASMSTART 5948; GFX900-NEXT: ; def v[2:3] 5949; GFX900-NEXT: ;;#ASMEND 5950; GFX900-NEXT: v_perm_b32 v0, v1, v2, s4 5951; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 5952; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5953; GFX900-NEXT: s_waitcnt vmcnt(0) 5954; GFX900-NEXT: s_setpc_b64 s[30:31] 5955; 5956; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_3_3_3: 5957; GFX90A: ; %bb.0: 5958; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5959; GFX90A-NEXT: ;;#ASMSTART 5960; GFX90A-NEXT: ; def v[0:1] 5961; GFX90A-NEXT: ;;#ASMEND 5962; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5963; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5964; GFX90A-NEXT: ;;#ASMSTART 5965; GFX90A-NEXT: ; def v[2:3] 5966; GFX90A-NEXT: ;;#ASMEND 5967; GFX90A-NEXT: v_perm_b32 v0, v1, v2, s4 5968; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 5969; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 5970; GFX90A-NEXT: s_waitcnt vmcnt(0) 5971; GFX90A-NEXT: s_setpc_b64 s[30:31] 5972; 5973; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_3_3_3: 5974; GFX940: ; %bb.0: 5975; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5976; GFX940-NEXT: ;;#ASMSTART 5977; GFX940-NEXT: ; def v[0:1] 5978; GFX940-NEXT: ;;#ASMEND 5979; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5980; GFX940-NEXT: v_mov_b32_e32 v4, 0 5981; GFX940-NEXT: ;;#ASMSTART 5982; GFX940-NEXT: ; def v[2:3] 5983; GFX940-NEXT: ;;#ASMEND 5984; GFX940-NEXT: s_nop 0 5985; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 5986; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 5987; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 5988; GFX940-NEXT: s_waitcnt vmcnt(0) 5989; GFX940-NEXT: s_setpc_b64 s[30:31] 5990 %vec0 = call <4 x i16> asm "; def $0", "=v"() 5991 %vec1 = call <4 x i16> asm "; def $0", "=v"() 5992 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3> 5993 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 5994 ret void 5995} 5996 5997define void @v_shuffle_v4i16_v4i16__6_3_3_3(ptr addrspace(1) inreg %ptr) { 5998; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_3_3_3: 5999; GFX900: ; %bb.0: 6000; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6001; GFX900-NEXT: ;;#ASMSTART 6002; GFX900-NEXT: ; def v[0:1] 6003; GFX900-NEXT: ;;#ASMEND 6004; GFX900-NEXT: s_mov_b32 s4, 0xffff 6005; GFX900-NEXT: ;;#ASMSTART 6006; GFX900-NEXT: ; def v[2:3] 6007; GFX900-NEXT: ;;#ASMEND 6008; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v1 6009; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6010; GFX900-NEXT: v_mov_b32_e32 v4, 0 6011; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6012; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6013; GFX900-NEXT: s_waitcnt vmcnt(0) 6014; GFX900-NEXT: s_setpc_b64 s[30:31] 6015; 6016; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_3_3_3: 6017; GFX90A: ; %bb.0: 6018; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6019; GFX90A-NEXT: ;;#ASMSTART 6020; GFX90A-NEXT: ; def v[0:1] 6021; GFX90A-NEXT: ;;#ASMEND 6022; GFX90A-NEXT: s_mov_b32 s4, 0xffff 6023; GFX90A-NEXT: ;;#ASMSTART 6024; GFX90A-NEXT: ; def v[2:3] 6025; GFX90A-NEXT: ;;#ASMEND 6026; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v1 6027; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6028; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6029; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6030; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6031; GFX90A-NEXT: s_waitcnt vmcnt(0) 6032; GFX90A-NEXT: s_setpc_b64 s[30:31] 6033; 6034; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_3_3_3: 6035; GFX940: ; %bb.0: 6036; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6037; GFX940-NEXT: ;;#ASMSTART 6038; GFX940-NEXT: ; def v[0:1] 6039; GFX940-NEXT: ;;#ASMEND 6040; GFX940-NEXT: s_mov_b32 s2, 0xffff 6041; GFX940-NEXT: ;;#ASMSTART 6042; GFX940-NEXT: ; def v[2:3] 6043; GFX940-NEXT: ;;#ASMEND 6044; GFX940-NEXT: v_mov_b32_e32 v4, 0 6045; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 6046; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6047; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6048; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6049; GFX940-NEXT: s_waitcnt vmcnt(0) 6050; GFX940-NEXT: s_setpc_b64 s[30:31] 6051 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6052 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6053 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3> 6054 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6055 ret void 6056} 6057 6058define void @v_shuffle_v4i16_v4i16__7_3_3_3(ptr addrspace(1) inreg %ptr) { 6059; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_3_3: 6060; GFX900: ; %bb.0: 6061; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6062; GFX900-NEXT: ;;#ASMSTART 6063; GFX900-NEXT: ; def v[0:1] 6064; GFX900-NEXT: ;;#ASMEND 6065; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6066; GFX900-NEXT: v_mov_b32_e32 v4, 0 6067; GFX900-NEXT: ;;#ASMSTART 6068; GFX900-NEXT: ; def v[2:3] 6069; GFX900-NEXT: ;;#ASMEND 6070; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 6071; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6072; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6073; GFX900-NEXT: s_waitcnt vmcnt(0) 6074; GFX900-NEXT: s_setpc_b64 s[30:31] 6075; 6076; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_3_3: 6077; GFX90A: ; %bb.0: 6078; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6079; GFX90A-NEXT: ;;#ASMSTART 6080; GFX90A-NEXT: ; def v[0:1] 6081; GFX90A-NEXT: ;;#ASMEND 6082; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6083; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6084; GFX90A-NEXT: ;;#ASMSTART 6085; GFX90A-NEXT: ; def v[2:3] 6086; GFX90A-NEXT: ;;#ASMEND 6087; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 6088; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6089; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6090; GFX90A-NEXT: s_waitcnt vmcnt(0) 6091; GFX90A-NEXT: s_setpc_b64 s[30:31] 6092; 6093; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_3_3: 6094; GFX940: ; %bb.0: 6095; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6096; GFX940-NEXT: ;;#ASMSTART 6097; GFX940-NEXT: ; def v[0:1] 6098; GFX940-NEXT: ;;#ASMEND 6099; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6100; GFX940-NEXT: v_mov_b32_e32 v4, 0 6101; GFX940-NEXT: ;;#ASMSTART 6102; GFX940-NEXT: ; def v[2:3] 6103; GFX940-NEXT: ;;#ASMEND 6104; GFX940-NEXT: s_nop 0 6105; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 6106; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6107; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6108; GFX940-NEXT: s_waitcnt vmcnt(0) 6109; GFX940-NEXT: s_setpc_b64 s[30:31] 6110 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6111 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6112 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3> 6113 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6114 ret void 6115} 6116 6117define void @v_shuffle_v4i16_v4i16__7_u_3_3(ptr addrspace(1) inreg %ptr) { 6118; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_3_3: 6119; GFX900: ; %bb.0: 6120; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6121; GFX900-NEXT: ;;#ASMSTART 6122; GFX900-NEXT: ; def v[0:1] 6123; GFX900-NEXT: ;;#ASMEND 6124; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6125; GFX900-NEXT: v_mov_b32_e32 v4, 0 6126; GFX900-NEXT: ;;#ASMSTART 6127; GFX900-NEXT: ; def v[2:3] 6128; GFX900-NEXT: ;;#ASMEND 6129; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6130; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16 6131; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6132; GFX900-NEXT: s_waitcnt vmcnt(0) 6133; GFX900-NEXT: s_setpc_b64 s[30:31] 6134; 6135; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_3_3: 6136; GFX90A: ; %bb.0: 6137; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6138; GFX90A-NEXT: ;;#ASMSTART 6139; GFX90A-NEXT: ; def v[0:1] 6140; GFX90A-NEXT: ;;#ASMEND 6141; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6142; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6143; GFX90A-NEXT: ;;#ASMSTART 6144; GFX90A-NEXT: ; def v[2:3] 6145; GFX90A-NEXT: ;;#ASMEND 6146; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6147; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16 6148; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6149; GFX90A-NEXT: s_waitcnt vmcnt(0) 6150; GFX90A-NEXT: s_setpc_b64 s[30:31] 6151; 6152; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_3_3: 6153; GFX940: ; %bb.0: 6154; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6155; GFX940-NEXT: ;;#ASMSTART 6156; GFX940-NEXT: ; def v[0:1] 6157; GFX940-NEXT: ;;#ASMEND 6158; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6159; GFX940-NEXT: v_mov_b32_e32 v4, 0 6160; GFX940-NEXT: ;;#ASMSTART 6161; GFX940-NEXT: ; def v[2:3] 6162; GFX940-NEXT: ;;#ASMEND 6163; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6164; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 6165; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6166; GFX940-NEXT: s_waitcnt vmcnt(0) 6167; GFX940-NEXT: s_setpc_b64 s[30:31] 6168 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6169 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6170 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3> 6171 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6172 ret void 6173} 6174 6175define void @v_shuffle_v4i16_v4i16__7_0_3_3(ptr addrspace(1) inreg %ptr) { 6176; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_3_3: 6177; GFX900: ; %bb.0: 6178; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6179; GFX900-NEXT: ;;#ASMSTART 6180; GFX900-NEXT: ; def v[0:1] 6181; GFX900-NEXT: ;;#ASMEND 6182; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6183; GFX900-NEXT: v_mov_b32_e32 v4, 0 6184; GFX900-NEXT: ;;#ASMSTART 6185; GFX900-NEXT: ; def v[2:3] 6186; GFX900-NEXT: ;;#ASMEND 6187; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6188; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16 6189; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6190; GFX900-NEXT: s_waitcnt vmcnt(0) 6191; GFX900-NEXT: s_setpc_b64 s[30:31] 6192; 6193; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_3_3: 6194; GFX90A: ; %bb.0: 6195; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6196; GFX90A-NEXT: ;;#ASMSTART 6197; GFX90A-NEXT: ; def v[0:1] 6198; GFX90A-NEXT: ;;#ASMEND 6199; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6200; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6201; GFX90A-NEXT: ;;#ASMSTART 6202; GFX90A-NEXT: ; def v[2:3] 6203; GFX90A-NEXT: ;;#ASMEND 6204; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6205; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 6206; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6207; GFX90A-NEXT: s_waitcnt vmcnt(0) 6208; GFX90A-NEXT: s_setpc_b64 s[30:31] 6209; 6210; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_3_3: 6211; GFX940: ; %bb.0: 6212; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6213; GFX940-NEXT: ;;#ASMSTART 6214; GFX940-NEXT: ; def v[0:1] 6215; GFX940-NEXT: ;;#ASMEND 6216; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6217; GFX940-NEXT: v_mov_b32_e32 v4, 0 6218; GFX940-NEXT: ;;#ASMSTART 6219; GFX940-NEXT: ; def v[2:3] 6220; GFX940-NEXT: ;;#ASMEND 6221; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6222; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 6223; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6224; GFX940-NEXT: s_waitcnt vmcnt(0) 6225; GFX940-NEXT: s_setpc_b64 s[30:31] 6226 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6227 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6228 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3> 6229 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6230 ret void 6231} 6232 6233define void @v_shuffle_v4i16_v4i16__7_1_3_3(ptr addrspace(1) inreg %ptr) { 6234; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_3_3: 6235; GFX900: ; %bb.0: 6236; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6237; GFX900-NEXT: ;;#ASMSTART 6238; GFX900-NEXT: ; def v[0:1] 6239; GFX900-NEXT: ;;#ASMEND 6240; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6241; GFX900-NEXT: v_mov_b32_e32 v4, 0 6242; GFX900-NEXT: ;;#ASMSTART 6243; GFX900-NEXT: ; def v[2:3] 6244; GFX900-NEXT: ;;#ASMEND 6245; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 6246; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6247; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6248; GFX900-NEXT: s_waitcnt vmcnt(0) 6249; GFX900-NEXT: s_setpc_b64 s[30:31] 6250; 6251; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_3_3: 6252; GFX90A: ; %bb.0: 6253; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6254; GFX90A-NEXT: ;;#ASMSTART 6255; GFX90A-NEXT: ; def v[0:1] 6256; GFX90A-NEXT: ;;#ASMEND 6257; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6258; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6259; GFX90A-NEXT: ;;#ASMSTART 6260; GFX90A-NEXT: ; def v[2:3] 6261; GFX90A-NEXT: ;;#ASMEND 6262; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 6263; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6264; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6265; GFX90A-NEXT: s_waitcnt vmcnt(0) 6266; GFX90A-NEXT: s_setpc_b64 s[30:31] 6267; 6268; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_3_3: 6269; GFX940: ; %bb.0: 6270; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6271; GFX940-NEXT: ;;#ASMSTART 6272; GFX940-NEXT: ; def v[0:1] 6273; GFX940-NEXT: ;;#ASMEND 6274; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6275; GFX940-NEXT: v_mov_b32_e32 v4, 0 6276; GFX940-NEXT: ;;#ASMSTART 6277; GFX940-NEXT: ; def v[2:3] 6278; GFX940-NEXT: ;;#ASMEND 6279; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6280; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 6281; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6282; GFX940-NEXT: s_waitcnt vmcnt(0) 6283; GFX940-NEXT: s_setpc_b64 s[30:31] 6284 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6285 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6286 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3> 6287 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6288 ret void 6289} 6290 6291define void @v_shuffle_v4i16_v4i16__7_2_3_3(ptr addrspace(1) inreg %ptr) { 6292; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_3_3: 6293; GFX900: ; %bb.0: 6294; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6295; GFX900-NEXT: ;;#ASMSTART 6296; GFX900-NEXT: ; def v[0:1] 6297; GFX900-NEXT: ;;#ASMEND 6298; GFX900-NEXT: ;;#ASMSTART 6299; GFX900-NEXT: ; def v[2:3] 6300; GFX900-NEXT: ;;#ASMEND 6301; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6302; GFX900-NEXT: v_mov_b32_e32 v4, 0 6303; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 6304; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 6305; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 6306; GFX900-NEXT: s_waitcnt vmcnt(0) 6307; GFX900-NEXT: s_setpc_b64 s[30:31] 6308; 6309; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_3_3: 6310; GFX90A: ; %bb.0: 6311; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6312; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6313; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6314; GFX90A-NEXT: ;;#ASMSTART 6315; GFX90A-NEXT: ; def v[0:1] 6316; GFX90A-NEXT: ;;#ASMEND 6317; GFX90A-NEXT: ;;#ASMSTART 6318; GFX90A-NEXT: ; def v[2:3] 6319; GFX90A-NEXT: ;;#ASMEND 6320; GFX90A-NEXT: v_perm_b32 v5, v1, v1, s4 6321; GFX90A-NEXT: v_alignbit_b32 v4, v1, v3, 16 6322; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] 6323; GFX90A-NEXT: s_waitcnt vmcnt(0) 6324; GFX90A-NEXT: s_setpc_b64 s[30:31] 6325; 6326; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_3_3: 6327; GFX940: ; %bb.0: 6328; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6329; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6330; GFX940-NEXT: v_mov_b32_e32 v6, 0 6331; GFX940-NEXT: ;;#ASMSTART 6332; GFX940-NEXT: ; def v[0:1] 6333; GFX940-NEXT: ;;#ASMEND 6334; GFX940-NEXT: ;;#ASMSTART 6335; GFX940-NEXT: ; def v[2:3] 6336; GFX940-NEXT: ;;#ASMEND 6337; GFX940-NEXT: s_nop 0 6338; GFX940-NEXT: v_perm_b32 v5, v1, v1, s2 6339; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 6340; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 6341; GFX940-NEXT: s_waitcnt vmcnt(0) 6342; GFX940-NEXT: s_setpc_b64 s[30:31] 6343 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6344 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6345 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3> 6346 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6347 ret void 6348} 6349 6350define void @v_shuffle_v4i16_v4i16__7_4_3_3(ptr addrspace(1) inreg %ptr) { 6351; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_3_3: 6352; GFX900: ; %bb.0: 6353; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6354; GFX900-NEXT: ;;#ASMSTART 6355; GFX900-NEXT: ; def v[0:1] 6356; GFX900-NEXT: ;;#ASMEND 6357; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6358; GFX900-NEXT: v_mov_b32_e32 v4, 0 6359; GFX900-NEXT: ;;#ASMSTART 6360; GFX900-NEXT: ; def v[2:3] 6361; GFX900-NEXT: ;;#ASMEND 6362; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6363; GFX900-NEXT: v_alignbit_b32 v0, v2, v3, 16 6364; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6365; GFX900-NEXT: s_waitcnt vmcnt(0) 6366; GFX900-NEXT: s_setpc_b64 s[30:31] 6367; 6368; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_3_3: 6369; GFX90A: ; %bb.0: 6370; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6371; GFX90A-NEXT: ;;#ASMSTART 6372; GFX90A-NEXT: ; def v[0:1] 6373; GFX90A-NEXT: ;;#ASMEND 6374; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6375; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6376; GFX90A-NEXT: ;;#ASMSTART 6377; GFX90A-NEXT: ; def v[2:3] 6378; GFX90A-NEXT: ;;#ASMEND 6379; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6380; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16 6381; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6382; GFX90A-NEXT: s_waitcnt vmcnt(0) 6383; GFX90A-NEXT: s_setpc_b64 s[30:31] 6384; 6385; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_3_3: 6386; GFX940: ; %bb.0: 6387; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6388; GFX940-NEXT: ;;#ASMSTART 6389; GFX940-NEXT: ; def v[0:1] 6390; GFX940-NEXT: ;;#ASMEND 6391; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6392; GFX940-NEXT: v_mov_b32_e32 v4, 0 6393; GFX940-NEXT: ;;#ASMSTART 6394; GFX940-NEXT: ; def v[2:3] 6395; GFX940-NEXT: ;;#ASMEND 6396; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6397; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 6398; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6399; GFX940-NEXT: s_waitcnt vmcnt(0) 6400; GFX940-NEXT: s_setpc_b64 s[30:31] 6401 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6402 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6403 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3> 6404 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6405 ret void 6406} 6407 6408define void @v_shuffle_v4i16_v4i16__7_5_3_3(ptr addrspace(1) inreg %ptr) { 6409; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_3_3: 6410; GFX900: ; %bb.0: 6411; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6412; GFX900-NEXT: ;;#ASMSTART 6413; GFX900-NEXT: ; def v[0:1] 6414; GFX900-NEXT: ;;#ASMEND 6415; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6416; GFX900-NEXT: v_mov_b32_e32 v4, 0 6417; GFX900-NEXT: ;;#ASMSTART 6418; GFX900-NEXT: ; def v[2:3] 6419; GFX900-NEXT: ;;#ASMEND 6420; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 6421; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6422; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6423; GFX900-NEXT: s_waitcnt vmcnt(0) 6424; GFX900-NEXT: s_setpc_b64 s[30:31] 6425; 6426; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_3_3: 6427; GFX90A: ; %bb.0: 6428; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6429; GFX90A-NEXT: ;;#ASMSTART 6430; GFX90A-NEXT: ; def v[0:1] 6431; GFX90A-NEXT: ;;#ASMEND 6432; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6433; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6434; GFX90A-NEXT: ;;#ASMSTART 6435; GFX90A-NEXT: ; def v[2:3] 6436; GFX90A-NEXT: ;;#ASMEND 6437; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 6438; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6439; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6440; GFX90A-NEXT: s_waitcnt vmcnt(0) 6441; GFX90A-NEXT: s_setpc_b64 s[30:31] 6442; 6443; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_3_3: 6444; GFX940: ; %bb.0: 6445; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6446; GFX940-NEXT: ;;#ASMSTART 6447; GFX940-NEXT: ; def v[0:1] 6448; GFX940-NEXT: ;;#ASMEND 6449; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6450; GFX940-NEXT: v_mov_b32_e32 v4, 0 6451; GFX940-NEXT: ;;#ASMSTART 6452; GFX940-NEXT: ; def v[2:3] 6453; GFX940-NEXT: ;;#ASMEND 6454; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6455; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 6456; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6457; GFX940-NEXT: s_waitcnt vmcnt(0) 6458; GFX940-NEXT: s_setpc_b64 s[30:31] 6459 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6460 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6461 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3> 6462 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6463 ret void 6464} 6465 6466define void @v_shuffle_v4i16_v4i16__7_6_3_3(ptr addrspace(1) inreg %ptr) { 6467; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_3_3: 6468; GFX900: ; %bb.0: 6469; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6470; GFX900-NEXT: ;;#ASMSTART 6471; GFX900-NEXT: ; def v[0:1] 6472; GFX900-NEXT: ;;#ASMEND 6473; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6474; GFX900-NEXT: v_mov_b32_e32 v4, 0 6475; GFX900-NEXT: ;;#ASMSTART 6476; GFX900-NEXT: ; def v[2:3] 6477; GFX900-NEXT: ;;#ASMEND 6478; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6479; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16 6480; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6481; GFX900-NEXT: s_waitcnt vmcnt(0) 6482; GFX900-NEXT: s_setpc_b64 s[30:31] 6483; 6484; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_3_3: 6485; GFX90A: ; %bb.0: 6486; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6487; GFX90A-NEXT: ;;#ASMSTART 6488; GFX90A-NEXT: ; def v[0:1] 6489; GFX90A-NEXT: ;;#ASMEND 6490; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6491; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6492; GFX90A-NEXT: ;;#ASMSTART 6493; GFX90A-NEXT: ; def v[2:3] 6494; GFX90A-NEXT: ;;#ASMEND 6495; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6496; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16 6497; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6498; GFX90A-NEXT: s_waitcnt vmcnt(0) 6499; GFX90A-NEXT: s_setpc_b64 s[30:31] 6500; 6501; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_3_3: 6502; GFX940: ; %bb.0: 6503; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6504; GFX940-NEXT: ;;#ASMSTART 6505; GFX940-NEXT: ; def v[0:1] 6506; GFX940-NEXT: ;;#ASMEND 6507; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6508; GFX940-NEXT: v_mov_b32_e32 v4, 0 6509; GFX940-NEXT: ;;#ASMSTART 6510; GFX940-NEXT: ; def v[2:3] 6511; GFX940-NEXT: ;;#ASMEND 6512; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6513; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 6514; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6515; GFX940-NEXT: s_waitcnt vmcnt(0) 6516; GFX940-NEXT: s_setpc_b64 s[30:31] 6517 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6518 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6519 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3> 6520 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6521 ret void 6522} 6523 6524define void @v_shuffle_v4i16_v4i16__7_7_3_3(ptr addrspace(1) inreg %ptr) { 6525; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_3: 6526; GFX900: ; %bb.0: 6527; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6528; GFX900-NEXT: ;;#ASMSTART 6529; GFX900-NEXT: ; def v[0:1] 6530; GFX900-NEXT: ;;#ASMEND 6531; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6532; GFX900-NEXT: v_mov_b32_e32 v4, 0 6533; GFX900-NEXT: ;;#ASMSTART 6534; GFX900-NEXT: ; def v[2:3] 6535; GFX900-NEXT: ;;#ASMEND 6536; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 6537; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 6538; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6539; GFX900-NEXT: s_waitcnt vmcnt(0) 6540; GFX900-NEXT: s_setpc_b64 s[30:31] 6541; 6542; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_3: 6543; GFX90A: ; %bb.0: 6544; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6545; GFX90A-NEXT: ;;#ASMSTART 6546; GFX90A-NEXT: ; def v[0:1] 6547; GFX90A-NEXT: ;;#ASMEND 6548; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6549; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6550; GFX90A-NEXT: ;;#ASMSTART 6551; GFX90A-NEXT: ; def v[2:3] 6552; GFX90A-NEXT: ;;#ASMEND 6553; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 6554; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6555; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6556; GFX90A-NEXT: s_waitcnt vmcnt(0) 6557; GFX90A-NEXT: s_setpc_b64 s[30:31] 6558; 6559; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_3: 6560; GFX940: ; %bb.0: 6561; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6562; GFX940-NEXT: ;;#ASMSTART 6563; GFX940-NEXT: ; def v[0:1] 6564; GFX940-NEXT: ;;#ASMEND 6565; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6566; GFX940-NEXT: v_mov_b32_e32 v4, 0 6567; GFX940-NEXT: ;;#ASMSTART 6568; GFX940-NEXT: ; def v[2:3] 6569; GFX940-NEXT: ;;#ASMEND 6570; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 6571; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6572; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6573; GFX940-NEXT: s_waitcnt vmcnt(0) 6574; GFX940-NEXT: s_setpc_b64 s[30:31] 6575 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6576 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6577 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3> 6578 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6579 ret void 6580} 6581 6582define void @v_shuffle_v4i16_v4i16__7_7_u_3(ptr addrspace(1) inreg %ptr) { 6583; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_3: 6584; GFX900: ; %bb.0: 6585; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6586; GFX900-NEXT: ;;#ASMSTART 6587; GFX900-NEXT: ; def v[0:1] 6588; GFX900-NEXT: ;;#ASMEND 6589; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6590; GFX900-NEXT: v_mov_b32_e32 v4, 0 6591; GFX900-NEXT: ;;#ASMSTART 6592; GFX900-NEXT: ; def v[2:3] 6593; GFX900-NEXT: ;;#ASMEND 6594; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 6595; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6596; GFX900-NEXT: s_waitcnt vmcnt(0) 6597; GFX900-NEXT: s_setpc_b64 s[30:31] 6598; 6599; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_3: 6600; GFX90A: ; %bb.0: 6601; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6602; GFX90A-NEXT: ;;#ASMSTART 6603; GFX90A-NEXT: ; def v[0:1] 6604; GFX90A-NEXT: ;;#ASMEND 6605; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6606; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6607; GFX90A-NEXT: ;;#ASMSTART 6608; GFX90A-NEXT: ; def v[2:3] 6609; GFX90A-NEXT: ;;#ASMEND 6610; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6611; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6612; GFX90A-NEXT: s_waitcnt vmcnt(0) 6613; GFX90A-NEXT: s_setpc_b64 s[30:31] 6614; 6615; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_3: 6616; GFX940: ; %bb.0: 6617; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6618; GFX940-NEXT: ;;#ASMSTART 6619; GFX940-NEXT: ; def v[0:1] 6620; GFX940-NEXT: ;;#ASMEND 6621; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6622; GFX940-NEXT: v_mov_b32_e32 v4, 0 6623; GFX940-NEXT: ;;#ASMSTART 6624; GFX940-NEXT: ; def v[2:3] 6625; GFX940-NEXT: ;;#ASMEND 6626; GFX940-NEXT: s_nop 0 6627; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6628; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6629; GFX940-NEXT: s_waitcnt vmcnt(0) 6630; GFX940-NEXT: s_setpc_b64 s[30:31] 6631 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6632 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6633 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3> 6634 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6635 ret void 6636} 6637 6638define void @v_shuffle_v4i16_v4i16__7_7_0_3(ptr addrspace(1) inreg %ptr) { 6639; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_3: 6640; GFX900: ; %bb.0: 6641; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6642; GFX900-NEXT: ;;#ASMSTART 6643; GFX900-NEXT: ; def v[0:1] 6644; GFX900-NEXT: ;;#ASMEND 6645; GFX900-NEXT: s_mov_b32 s4, 0xffff 6646; GFX900-NEXT: v_bfi_b32 v1, s4, v0, v1 6647; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6648; GFX900-NEXT: v_mov_b32_e32 v4, 0 6649; GFX900-NEXT: ;;#ASMSTART 6650; GFX900-NEXT: ; def v[2:3] 6651; GFX900-NEXT: ;;#ASMEND 6652; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 6653; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6654; GFX900-NEXT: s_waitcnt vmcnt(0) 6655; GFX900-NEXT: s_setpc_b64 s[30:31] 6656; 6657; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_3: 6658; GFX90A: ; %bb.0: 6659; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6660; GFX90A-NEXT: ;;#ASMSTART 6661; GFX90A-NEXT: ; def v[0:1] 6662; GFX90A-NEXT: ;;#ASMEND 6663; GFX90A-NEXT: s_mov_b32 s4, 0xffff 6664; GFX90A-NEXT: v_bfi_b32 v1, s4, v0, v1 6665; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6666; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6667; GFX90A-NEXT: ;;#ASMSTART 6668; GFX90A-NEXT: ; def v[2:3] 6669; GFX90A-NEXT: ;;#ASMEND 6670; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6671; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6672; GFX90A-NEXT: s_waitcnt vmcnt(0) 6673; GFX90A-NEXT: s_setpc_b64 s[30:31] 6674; 6675; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_3: 6676; GFX940: ; %bb.0: 6677; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6678; GFX940-NEXT: ;;#ASMSTART 6679; GFX940-NEXT: ; def v[0:1] 6680; GFX940-NEXT: ;;#ASMEND 6681; GFX940-NEXT: s_mov_b32 s2, 0xffff 6682; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v1 6683; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6684; GFX940-NEXT: v_mov_b32_e32 v4, 0 6685; GFX940-NEXT: ;;#ASMSTART 6686; GFX940-NEXT: ; def v[2:3] 6687; GFX940-NEXT: ;;#ASMEND 6688; GFX940-NEXT: s_nop 0 6689; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6690; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6691; GFX940-NEXT: s_waitcnt vmcnt(0) 6692; GFX940-NEXT: s_setpc_b64 s[30:31] 6693 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6694 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6695 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3> 6696 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6697 ret void 6698} 6699 6700define void @v_shuffle_v4i16_v4i16__7_7_1_3(ptr addrspace(1) inreg %ptr) { 6701; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_3: 6702; GFX900: ; %bb.0: 6703; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6704; GFX900-NEXT: ;;#ASMSTART 6705; GFX900-NEXT: ; def v[0:1] 6706; GFX900-NEXT: ;;#ASMEND 6707; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6708; GFX900-NEXT: v_mov_b32_e32 v4, 0 6709; GFX900-NEXT: ;;#ASMSTART 6710; GFX900-NEXT: ; def v[2:3] 6711; GFX900-NEXT: ;;#ASMEND 6712; GFX900-NEXT: v_perm_b32 v1, v1, v0, s4 6713; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 6714; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6715; GFX900-NEXT: s_waitcnt vmcnt(0) 6716; GFX900-NEXT: s_setpc_b64 s[30:31] 6717; 6718; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_3: 6719; GFX90A: ; %bb.0: 6720; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6721; GFX90A-NEXT: ;;#ASMSTART 6722; GFX90A-NEXT: ; def v[0:1] 6723; GFX90A-NEXT: ;;#ASMEND 6724; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6725; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6726; GFX90A-NEXT: ;;#ASMSTART 6727; GFX90A-NEXT: ; def v[2:3] 6728; GFX90A-NEXT: ;;#ASMEND 6729; GFX90A-NEXT: v_perm_b32 v1, v1, v0, s4 6730; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6731; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6732; GFX90A-NEXT: s_waitcnt vmcnt(0) 6733; GFX90A-NEXT: s_setpc_b64 s[30:31] 6734; 6735; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_3: 6736; GFX940: ; %bb.0: 6737; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6738; GFX940-NEXT: ;;#ASMSTART 6739; GFX940-NEXT: ; def v[0:1] 6740; GFX940-NEXT: ;;#ASMEND 6741; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6742; GFX940-NEXT: v_mov_b32_e32 v4, 0 6743; GFX940-NEXT: ;;#ASMSTART 6744; GFX940-NEXT: ; def v[2:3] 6745; GFX940-NEXT: ;;#ASMEND 6746; GFX940-NEXT: v_perm_b32 v1, v1, v0, s2 6747; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6748; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6749; GFX940-NEXT: s_waitcnt vmcnt(0) 6750; GFX940-NEXT: s_setpc_b64 s[30:31] 6751 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6752 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6753 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3> 6754 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6755 ret void 6756} 6757 6758define void @v_shuffle_v4i16_v4i16__7_7_2_3(ptr addrspace(1) inreg %ptr) { 6759; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_3: 6760; GFX900: ; %bb.0: 6761; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6762; GFX900-NEXT: ;;#ASMSTART 6763; GFX900-NEXT: ; def v[0:1] 6764; GFX900-NEXT: ;;#ASMEND 6765; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6766; GFX900-NEXT: v_mov_b32_e32 v4, 0 6767; GFX900-NEXT: ;;#ASMSTART 6768; GFX900-NEXT: ; def v[2:3] 6769; GFX900-NEXT: ;;#ASMEND 6770; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 6771; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6772; GFX900-NEXT: s_waitcnt vmcnt(0) 6773; GFX900-NEXT: s_setpc_b64 s[30:31] 6774; 6775; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_3: 6776; GFX90A: ; %bb.0: 6777; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6778; GFX90A-NEXT: ;;#ASMSTART 6779; GFX90A-NEXT: ; def v[0:1] 6780; GFX90A-NEXT: ;;#ASMEND 6781; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6782; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6783; GFX90A-NEXT: ;;#ASMSTART 6784; GFX90A-NEXT: ; def v[2:3] 6785; GFX90A-NEXT: ;;#ASMEND 6786; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6787; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6788; GFX90A-NEXT: s_waitcnt vmcnt(0) 6789; GFX90A-NEXT: s_setpc_b64 s[30:31] 6790; 6791; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_3: 6792; GFX940: ; %bb.0: 6793; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6794; GFX940-NEXT: ;;#ASMSTART 6795; GFX940-NEXT: ; def v[0:1] 6796; GFX940-NEXT: ;;#ASMEND 6797; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6798; GFX940-NEXT: v_mov_b32_e32 v4, 0 6799; GFX940-NEXT: ;;#ASMSTART 6800; GFX940-NEXT: ; def v[2:3] 6801; GFX940-NEXT: ;;#ASMEND 6802; GFX940-NEXT: s_nop 0 6803; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6804; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6805; GFX940-NEXT: s_waitcnt vmcnt(0) 6806; GFX940-NEXT: s_setpc_b64 s[30:31] 6807 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6808 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6809 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3> 6810 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6811 ret void 6812} 6813 6814define void @v_shuffle_v4i16_v4i16__7_7_4_3(ptr addrspace(1) inreg %ptr) { 6815; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_3: 6816; GFX900: ; %bb.0: 6817; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6818; GFX900-NEXT: ;;#ASMSTART 6819; GFX900-NEXT: ; def v[0:1] 6820; GFX900-NEXT: ;;#ASMEND 6821; GFX900-NEXT: s_mov_b32 s4, 0xffff 6822; GFX900-NEXT: ;;#ASMSTART 6823; GFX900-NEXT: ; def v[2:3] 6824; GFX900-NEXT: ;;#ASMEND 6825; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v1 6826; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6827; GFX900-NEXT: v_mov_b32_e32 v4, 0 6828; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 6829; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6830; GFX900-NEXT: s_waitcnt vmcnt(0) 6831; GFX900-NEXT: s_setpc_b64 s[30:31] 6832; 6833; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_3: 6834; GFX90A: ; %bb.0: 6835; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6836; GFX90A-NEXT: ;;#ASMSTART 6837; GFX90A-NEXT: ; def v[0:1] 6838; GFX90A-NEXT: ;;#ASMEND 6839; GFX90A-NEXT: s_mov_b32 s4, 0xffff 6840; GFX90A-NEXT: ;;#ASMSTART 6841; GFX90A-NEXT: ; def v[2:3] 6842; GFX90A-NEXT: ;;#ASMEND 6843; GFX90A-NEXT: v_bfi_b32 v1, s4, v2, v1 6844; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6845; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6846; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6847; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6848; GFX90A-NEXT: s_waitcnt vmcnt(0) 6849; GFX90A-NEXT: s_setpc_b64 s[30:31] 6850; 6851; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_3: 6852; GFX940: ; %bb.0: 6853; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6854; GFX940-NEXT: ;;#ASMSTART 6855; GFX940-NEXT: ; def v[0:1] 6856; GFX940-NEXT: ;;#ASMEND 6857; GFX940-NEXT: s_mov_b32 s2, 0xffff 6858; GFX940-NEXT: ;;#ASMSTART 6859; GFX940-NEXT: ; def v[2:3] 6860; GFX940-NEXT: ;;#ASMEND 6861; GFX940-NEXT: v_mov_b32_e32 v4, 0 6862; GFX940-NEXT: v_bfi_b32 v1, s2, v2, v1 6863; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6864; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6865; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6866; GFX940-NEXT: s_waitcnt vmcnt(0) 6867; GFX940-NEXT: s_setpc_b64 s[30:31] 6868 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6869 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6870 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3> 6871 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6872 ret void 6873} 6874 6875define void @v_shuffle_v4i16_v4i16__7_7_5_3(ptr addrspace(1) inreg %ptr) { 6876; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_3: 6877; GFX900: ; %bb.0: 6878; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6879; GFX900-NEXT: ;;#ASMSTART 6880; GFX900-NEXT: ; def v[0:1] 6881; GFX900-NEXT: ;;#ASMEND 6882; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6883; GFX900-NEXT: v_mov_b32_e32 v4, 0 6884; GFX900-NEXT: ;;#ASMSTART 6885; GFX900-NEXT: ; def v[2:3] 6886; GFX900-NEXT: ;;#ASMEND 6887; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 6888; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 6889; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6890; GFX900-NEXT: s_waitcnt vmcnt(0) 6891; GFX900-NEXT: s_setpc_b64 s[30:31] 6892; 6893; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_3: 6894; GFX90A: ; %bb.0: 6895; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6896; GFX90A-NEXT: ;;#ASMSTART 6897; GFX90A-NEXT: ; def v[0:1] 6898; GFX90A-NEXT: ;;#ASMEND 6899; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6900; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6901; GFX90A-NEXT: ;;#ASMSTART 6902; GFX90A-NEXT: ; def v[2:3] 6903; GFX90A-NEXT: ;;#ASMEND 6904; GFX90A-NEXT: v_perm_b32 v1, v1, v2, s4 6905; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6906; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6907; GFX90A-NEXT: s_waitcnt vmcnt(0) 6908; GFX90A-NEXT: s_setpc_b64 s[30:31] 6909; 6910; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_3: 6911; GFX940: ; %bb.0: 6912; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6913; GFX940-NEXT: ;;#ASMSTART 6914; GFX940-NEXT: ; def v[0:1] 6915; GFX940-NEXT: ;;#ASMEND 6916; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6917; GFX940-NEXT: v_mov_b32_e32 v4, 0 6918; GFX940-NEXT: ;;#ASMSTART 6919; GFX940-NEXT: ; def v[2:3] 6920; GFX940-NEXT: ;;#ASMEND 6921; GFX940-NEXT: s_nop 0 6922; GFX940-NEXT: v_perm_b32 v1, v1, v2, s2 6923; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6924; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6925; GFX940-NEXT: s_waitcnt vmcnt(0) 6926; GFX940-NEXT: s_setpc_b64 s[30:31] 6927 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6928 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6929 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3> 6930 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6931 ret void 6932} 6933 6934define void @v_shuffle_v4i16_v4i16__7_7_6_3(ptr addrspace(1) inreg %ptr) { 6935; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_3: 6936; GFX900: ; %bb.0: 6937; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6938; GFX900-NEXT: ;;#ASMSTART 6939; GFX900-NEXT: ; def v[0:1] 6940; GFX900-NEXT: ;;#ASMEND 6941; GFX900-NEXT: s_mov_b32 s4, 0xffff 6942; GFX900-NEXT: ;;#ASMSTART 6943; GFX900-NEXT: ; def v[2:3] 6944; GFX900-NEXT: ;;#ASMEND 6945; GFX900-NEXT: v_bfi_b32 v1, s4, v3, v1 6946; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6947; GFX900-NEXT: v_mov_b32_e32 v4, 0 6948; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 6949; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6950; GFX900-NEXT: s_waitcnt vmcnt(0) 6951; GFX900-NEXT: s_setpc_b64 s[30:31] 6952; 6953; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_3: 6954; GFX90A: ; %bb.0: 6955; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6956; GFX90A-NEXT: ;;#ASMSTART 6957; GFX90A-NEXT: ; def v[0:1] 6958; GFX90A-NEXT: ;;#ASMEND 6959; GFX90A-NEXT: s_mov_b32 s4, 0xffff 6960; GFX90A-NEXT: ;;#ASMSTART 6961; GFX90A-NEXT: ; def v[2:3] 6962; GFX90A-NEXT: ;;#ASMEND 6963; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v1 6964; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6965; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6966; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6967; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 6968; GFX90A-NEXT: s_waitcnt vmcnt(0) 6969; GFX90A-NEXT: s_setpc_b64 s[30:31] 6970; 6971; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_3: 6972; GFX940: ; %bb.0: 6973; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6974; GFX940-NEXT: ;;#ASMSTART 6975; GFX940-NEXT: ; def v[0:1] 6976; GFX940-NEXT: ;;#ASMEND 6977; GFX940-NEXT: s_mov_b32 s2, 0xffff 6978; GFX940-NEXT: ;;#ASMSTART 6979; GFX940-NEXT: ; def v[2:3] 6980; GFX940-NEXT: ;;#ASMEND 6981; GFX940-NEXT: v_mov_b32_e32 v4, 0 6982; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v1 6983; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6984; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6985; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 6986; GFX940-NEXT: s_waitcnt vmcnt(0) 6987; GFX940-NEXT: s_setpc_b64 s[30:31] 6988 %vec0 = call <4 x i16> asm "; def $0", "=v"() 6989 %vec1 = call <4 x i16> asm "; def $0", "=v"() 6990 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3> 6991 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 6992 ret void 6993} 6994 6995define void @v_shuffle_v4i16_v4i16__u_4_4_4(ptr addrspace(1) inreg %ptr) { 6996; GFX9-LABEL: v_shuffle_v4i16_v4i16__u_4_4_4: 6997; GFX9: ; %bb.0: 6998; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6999; GFX9-NEXT: s_setpc_b64 s[30:31] 7000 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7001 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4> 7002 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7003 ret void 7004} 7005 7006define void @v_shuffle_v4i16_v4i16__0_4_4_4(ptr addrspace(1) inreg %ptr) { 7007; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_4_4_4: 7008; GFX900: ; %bb.0: 7009; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7010; GFX900-NEXT: v_mov_b32_e32 v2, 0 7011; GFX900-NEXT: ;;#ASMSTART 7012; GFX900-NEXT: ; def v[0:1] 7013; GFX900-NEXT: ;;#ASMEND 7014; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 7015; GFX900-NEXT: s_waitcnt vmcnt(0) 7016; GFX900-NEXT: s_setpc_b64 s[30:31] 7017; 7018; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_4_4_4: 7019; GFX90A: ; %bb.0: 7020; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7021; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7022; GFX90A-NEXT: ;;#ASMSTART 7023; GFX90A-NEXT: ; def v[0:1] 7024; GFX90A-NEXT: ;;#ASMEND 7025; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 7026; GFX90A-NEXT: s_waitcnt vmcnt(0) 7027; GFX90A-NEXT: s_setpc_b64 s[30:31] 7028; 7029; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_4_4_4: 7030; GFX940: ; %bb.0: 7031; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7032; GFX940-NEXT: v_mov_b32_e32 v2, 0 7033; GFX940-NEXT: ;;#ASMSTART 7034; GFX940-NEXT: ; def v[0:1] 7035; GFX940-NEXT: ;;#ASMEND 7036; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 7037; GFX940-NEXT: s_waitcnt vmcnt(0) 7038; GFX940-NEXT: s_setpc_b64 s[30:31] 7039 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7040 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 7041 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7042 ret void 7043} 7044 7045define void @v_shuffle_v4i16_v4i16__1_4_4_4(ptr addrspace(1) inreg %ptr) { 7046; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_4_4_4: 7047; GFX900: ; %bb.0: 7048; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7049; GFX900-NEXT: ;;#ASMSTART 7050; GFX900-NEXT: ; def v[0:1] 7051; GFX900-NEXT: ;;#ASMEND 7052; GFX900-NEXT: v_mov_b32_e32 v2, 0 7053; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 7054; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 7055; GFX900-NEXT: s_waitcnt vmcnt(0) 7056; GFX900-NEXT: s_setpc_b64 s[30:31] 7057; 7058; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_4_4_4: 7059; GFX90A: ; %bb.0: 7060; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7061; GFX90A-NEXT: ;;#ASMSTART 7062; GFX90A-NEXT: ; def v[0:1] 7063; GFX90A-NEXT: ;;#ASMEND 7064; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7065; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 7066; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 7067; GFX90A-NEXT: s_waitcnt vmcnt(0) 7068; GFX90A-NEXT: s_setpc_b64 s[30:31] 7069; 7070; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_4_4_4: 7071; GFX940: ; %bb.0: 7072; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7073; GFX940-NEXT: ;;#ASMSTART 7074; GFX940-NEXT: ; def v[0:1] 7075; GFX940-NEXT: ;;#ASMEND 7076; GFX940-NEXT: v_mov_b32_e32 v2, 0 7077; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 7078; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 7079; GFX940-NEXT: s_waitcnt vmcnt(0) 7080; GFX940-NEXT: s_setpc_b64 s[30:31] 7081 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7082 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4> 7083 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7084 ret void 7085} 7086 7087define void @v_shuffle_v4i16_v4i16__2_4_4_4(ptr addrspace(1) inreg %ptr) { 7088; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_4_4_4: 7089; GFX900: ; %bb.0: 7090; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7091; GFX900-NEXT: ;;#ASMSTART 7092; GFX900-NEXT: ; def v[0:1] 7093; GFX900-NEXT: ;;#ASMEND 7094; GFX900-NEXT: v_mov_b32_e32 v2, 0 7095; GFX900-NEXT: v_mov_b32_e32 v0, v1 7096; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 7097; GFX900-NEXT: s_waitcnt vmcnt(0) 7098; GFX900-NEXT: s_setpc_b64 s[30:31] 7099; 7100; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_4_4_4: 7101; GFX90A: ; %bb.0: 7102; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7103; GFX90A-NEXT: ;;#ASMSTART 7104; GFX90A-NEXT: ; def v[0:1] 7105; GFX90A-NEXT: ;;#ASMEND 7106; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7107; GFX90A-NEXT: v_mov_b32_e32 v0, v1 7108; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 7109; GFX90A-NEXT: s_waitcnt vmcnt(0) 7110; GFX90A-NEXT: s_setpc_b64 s[30:31] 7111; 7112; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_4_4_4: 7113; GFX940: ; %bb.0: 7114; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7115; GFX940-NEXT: ;;#ASMSTART 7116; GFX940-NEXT: ; def v[0:1] 7117; GFX940-NEXT: ;;#ASMEND 7118; GFX940-NEXT: v_mov_b32_e32 v2, 0 7119; GFX940-NEXT: v_mov_b32_e32 v0, v1 7120; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 7121; GFX940-NEXT: s_waitcnt vmcnt(0) 7122; GFX940-NEXT: s_setpc_b64 s[30:31] 7123 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7124 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4> 7125 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7126 ret void 7127} 7128 7129define void @v_shuffle_v4i16_v4i16__3_4_4_4(ptr addrspace(1) inreg %ptr) { 7130; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_4_4_4: 7131; GFX900: ; %bb.0: 7132; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7133; GFX900-NEXT: ;;#ASMSTART 7134; GFX900-NEXT: ; def v[0:1] 7135; GFX900-NEXT: ;;#ASMEND 7136; GFX900-NEXT: v_mov_b32_e32 v2, 0 7137; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 7138; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 7139; GFX900-NEXT: s_waitcnt vmcnt(0) 7140; GFX900-NEXT: s_setpc_b64 s[30:31] 7141; 7142; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_4_4_4: 7143; GFX90A: ; %bb.0: 7144; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7145; GFX90A-NEXT: ;;#ASMSTART 7146; GFX90A-NEXT: ; def v[0:1] 7147; GFX90A-NEXT: ;;#ASMEND 7148; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7149; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 7150; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 7151; GFX90A-NEXT: s_waitcnt vmcnt(0) 7152; GFX90A-NEXT: s_setpc_b64 s[30:31] 7153; 7154; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_4_4_4: 7155; GFX940: ; %bb.0: 7156; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7157; GFX940-NEXT: ;;#ASMSTART 7158; GFX940-NEXT: ; def v[0:1] 7159; GFX940-NEXT: ;;#ASMEND 7160; GFX940-NEXT: v_mov_b32_e32 v2, 0 7161; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 7162; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 7163; GFX940-NEXT: s_waitcnt vmcnt(0) 7164; GFX940-NEXT: s_setpc_b64 s[30:31] 7165 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7166 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4> 7167 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7168 ret void 7169} 7170 7171define void @v_shuffle_v4i16_v4i16__4_4_4_4(ptr addrspace(1) inreg %ptr) { 7172; GFX9-LABEL: v_shuffle_v4i16_v4i16__4_4_4_4: 7173; GFX9: ; %bb.0: 7174; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7175; GFX9-NEXT: s_setpc_b64 s[30:31] 7176 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7177 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4> 7178 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7179 ret void 7180} 7181 7182define void @v_shuffle_v4i16_v4i16__5_4_4_4(ptr addrspace(1) inreg %ptr) { 7183; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_4_4_4: 7184; GFX900: ; %bb.0: 7185; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7186; GFX900-NEXT: ;;#ASMSTART 7187; GFX900-NEXT: ; def v[0:1] 7188; GFX900-NEXT: ;;#ASMEND 7189; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7190; GFX900-NEXT: v_mov_b32_e32 v2, 0 7191; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 7192; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16 7193; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 7194; GFX900-NEXT: s_waitcnt vmcnt(0) 7195; GFX900-NEXT: s_setpc_b64 s[30:31] 7196; 7197; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_4_4_4: 7198; GFX90A: ; %bb.0: 7199; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7200; GFX90A-NEXT: ;;#ASMSTART 7201; GFX90A-NEXT: ; def v[0:1] 7202; GFX90A-NEXT: ;;#ASMEND 7203; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7204; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7205; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 7206; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16 7207; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 7208; GFX90A-NEXT: s_waitcnt vmcnt(0) 7209; GFX90A-NEXT: s_setpc_b64 s[30:31] 7210; 7211; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_4_4_4: 7212; GFX940: ; %bb.0: 7213; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7214; GFX940-NEXT: ;;#ASMSTART 7215; GFX940-NEXT: ; def v[0:1] 7216; GFX940-NEXT: ;;#ASMEND 7217; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7218; GFX940-NEXT: v_mov_b32_e32 v2, 0 7219; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 7220; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 7221; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 7222; GFX940-NEXT: s_waitcnt vmcnt(0) 7223; GFX940-NEXT: s_setpc_b64 s[30:31] 7224 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7225 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7226 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4> 7227 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7228 ret void 7229} 7230 7231define void @v_shuffle_v4i16_v4i16__6_4_4_4(ptr addrspace(1) inreg %ptr) { 7232; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_4_4_4: 7233; GFX900: ; %bb.0: 7234; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7235; GFX900-NEXT: ;;#ASMSTART 7236; GFX900-NEXT: ; def v[0:1] 7237; GFX900-NEXT: ;;#ASMEND 7238; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7239; GFX900-NEXT: v_mov_b32_e32 v3, 0 7240; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 7241; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 7242; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 7243; GFX900-NEXT: s_waitcnt vmcnt(0) 7244; GFX900-NEXT: s_setpc_b64 s[30:31] 7245; 7246; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_4_4_4: 7247; GFX90A: ; %bb.0: 7248; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7249; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7250; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7251; GFX90A-NEXT: ;;#ASMSTART 7252; GFX90A-NEXT: ; def v[0:1] 7253; GFX90A-NEXT: ;;#ASMEND 7254; GFX90A-NEXT: v_perm_b32 v2, v0, v1, s4 7255; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 7256; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 7257; GFX90A-NEXT: s_waitcnt vmcnt(0) 7258; GFX90A-NEXT: s_setpc_b64 s[30:31] 7259; 7260; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_4_4_4: 7261; GFX940: ; %bb.0: 7262; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7263; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7264; GFX940-NEXT: v_mov_b32_e32 v4, 0 7265; GFX940-NEXT: ;;#ASMSTART 7266; GFX940-NEXT: ; def v[0:1] 7267; GFX940-NEXT: ;;#ASMEND 7268; GFX940-NEXT: s_nop 0 7269; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 7270; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 7271; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 7272; GFX940-NEXT: s_waitcnt vmcnt(0) 7273; GFX940-NEXT: s_setpc_b64 s[30:31] 7274 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7275 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7276 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4> 7277 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7278 ret void 7279} 7280 7281define void @v_shuffle_v4i16_v4i16__7_4_4_4(ptr addrspace(1) inreg %ptr) { 7282; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_4_4: 7283; GFX900: ; %bb.0: 7284; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7285; GFX900-NEXT: ;;#ASMSTART 7286; GFX900-NEXT: ; def v[0:1] 7287; GFX900-NEXT: ;;#ASMEND 7288; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7289; GFX900-NEXT: v_mov_b32_e32 v3, 0 7290; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 7291; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 7292; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 7293; GFX900-NEXT: s_waitcnt vmcnt(0) 7294; GFX900-NEXT: s_setpc_b64 s[30:31] 7295; 7296; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_4_4: 7297; GFX90A: ; %bb.0: 7298; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7299; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7300; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7301; GFX90A-NEXT: ;;#ASMSTART 7302; GFX90A-NEXT: ; def v[0:1] 7303; GFX90A-NEXT: ;;#ASMEND 7304; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 7305; GFX90A-NEXT: v_alignbit_b32 v2, v0, v1, 16 7306; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 7307; GFX90A-NEXT: s_waitcnt vmcnt(0) 7308; GFX90A-NEXT: s_setpc_b64 s[30:31] 7309; 7310; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_4_4: 7311; GFX940: ; %bb.0: 7312; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7313; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7314; GFX940-NEXT: v_mov_b32_e32 v4, 0 7315; GFX940-NEXT: ;;#ASMSTART 7316; GFX940-NEXT: ; def v[0:1] 7317; GFX940-NEXT: ;;#ASMEND 7318; GFX940-NEXT: s_nop 0 7319; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 7320; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 7321; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 7322; GFX940-NEXT: s_waitcnt vmcnt(0) 7323; GFX940-NEXT: s_setpc_b64 s[30:31] 7324 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7325 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7326 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4> 7327 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7328 ret void 7329} 7330 7331define void @v_shuffle_v4i16_v4i16__7_u_4_4(ptr addrspace(1) inreg %ptr) { 7332; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_4_4: 7333; GFX900: ; %bb.0: 7334; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7335; GFX900-NEXT: ;;#ASMSTART 7336; GFX900-NEXT: ; def v[0:1] 7337; GFX900-NEXT: ;;#ASMEND 7338; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7339; GFX900-NEXT: v_mov_b32_e32 v3, 0 7340; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 7341; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16 7342; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 7343; GFX900-NEXT: s_waitcnt vmcnt(0) 7344; GFX900-NEXT: s_setpc_b64 s[30:31] 7345; 7346; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_4_4: 7347; GFX90A: ; %bb.0: 7348; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7349; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7350; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7351; GFX90A-NEXT: ;;#ASMSTART 7352; GFX90A-NEXT: ; def v[0:1] 7353; GFX90A-NEXT: ;;#ASMEND 7354; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 7355; GFX90A-NEXT: v_alignbit_b32 v2, s4, v1, 16 7356; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 7357; GFX90A-NEXT: s_waitcnt vmcnt(0) 7358; GFX90A-NEXT: s_setpc_b64 s[30:31] 7359; 7360; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_4_4: 7361; GFX940: ; %bb.0: 7362; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7363; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7364; GFX940-NEXT: v_mov_b32_e32 v4, 0 7365; GFX940-NEXT: ;;#ASMSTART 7366; GFX940-NEXT: ; def v[0:1] 7367; GFX940-NEXT: ;;#ASMEND 7368; GFX940-NEXT: s_nop 0 7369; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 7370; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 7371; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 7372; GFX940-NEXT: s_waitcnt vmcnt(0) 7373; GFX940-NEXT: s_setpc_b64 s[30:31] 7374 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7375 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7376 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4> 7377 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7378 ret void 7379} 7380 7381define void @v_shuffle_v4i16_v4i16__7_0_4_4(ptr addrspace(1) inreg %ptr) { 7382; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_4_4: 7383; GFX900: ; %bb.0: 7384; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7385; GFX900-NEXT: ;;#ASMSTART 7386; GFX900-NEXT: ; def v[0:1] 7387; GFX900-NEXT: ;;#ASMEND 7388; GFX900-NEXT: ;;#ASMSTART 7389; GFX900-NEXT: ; def v[1:2] 7390; GFX900-NEXT: ;;#ASMEND 7391; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7392; GFX900-NEXT: v_mov_b32_e32 v3, 0 7393; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 7394; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 7395; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 7396; GFX900-NEXT: s_waitcnt vmcnt(0) 7397; GFX900-NEXT: s_setpc_b64 s[30:31] 7398; 7399; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_4_4: 7400; GFX90A: ; %bb.0: 7401; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7402; GFX90A-NEXT: ;;#ASMSTART 7403; GFX90A-NEXT: ; def v[0:1] 7404; GFX90A-NEXT: ;;#ASMEND 7405; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7406; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7407; GFX90A-NEXT: ;;#ASMSTART 7408; GFX90A-NEXT: ; def v[2:3] 7409; GFX90A-NEXT: ;;#ASMEND 7410; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 7411; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 7412; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7413; GFX90A-NEXT: s_waitcnt vmcnt(0) 7414; GFX90A-NEXT: s_setpc_b64 s[30:31] 7415; 7416; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_4_4: 7417; GFX940: ; %bb.0: 7418; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7419; GFX940-NEXT: ;;#ASMSTART 7420; GFX940-NEXT: ; def v[0:1] 7421; GFX940-NEXT: ;;#ASMEND 7422; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7423; GFX940-NEXT: v_mov_b32_e32 v4, 0 7424; GFX940-NEXT: ;;#ASMSTART 7425; GFX940-NEXT: ; def v[2:3] 7426; GFX940-NEXT: ;;#ASMEND 7427; GFX940-NEXT: s_nop 0 7428; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 7429; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 7430; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 7431; GFX940-NEXT: s_waitcnt vmcnt(0) 7432; GFX940-NEXT: s_setpc_b64 s[30:31] 7433 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7434 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7435 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4> 7436 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7437 ret void 7438} 7439 7440define void @v_shuffle_v4i16_v4i16__7_1_4_4(ptr addrspace(1) inreg %ptr) { 7441; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_4_4: 7442; GFX900: ; %bb.0: 7443; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7444; GFX900-NEXT: ;;#ASMSTART 7445; GFX900-NEXT: ; def v[0:1] 7446; GFX900-NEXT: ;;#ASMEND 7447; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7448; GFX900-NEXT: ;;#ASMSTART 7449; GFX900-NEXT: ; def v[1:2] 7450; GFX900-NEXT: ;;#ASMEND 7451; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 7452; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7453; GFX900-NEXT: v_mov_b32_e32 v3, 0 7454; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 7455; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 7456; GFX900-NEXT: s_waitcnt vmcnt(0) 7457; GFX900-NEXT: s_setpc_b64 s[30:31] 7458; 7459; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_4_4: 7460; GFX90A: ; %bb.0: 7461; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7462; GFX90A-NEXT: ;;#ASMSTART 7463; GFX90A-NEXT: ; def v[0:1] 7464; GFX90A-NEXT: ;;#ASMEND 7465; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7466; GFX90A-NEXT: ;;#ASMSTART 7467; GFX90A-NEXT: ; def v[2:3] 7468; GFX90A-NEXT: ;;#ASMEND 7469; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 7470; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7471; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7472; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 7473; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7474; GFX90A-NEXT: s_waitcnt vmcnt(0) 7475; GFX90A-NEXT: s_setpc_b64 s[30:31] 7476; 7477; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_4_4: 7478; GFX940: ; %bb.0: 7479; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7480; GFX940-NEXT: ;;#ASMSTART 7481; GFX940-NEXT: ; def v[0:1] 7482; GFX940-NEXT: ;;#ASMEND 7483; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7484; GFX940-NEXT: ;;#ASMSTART 7485; GFX940-NEXT: ; def v[2:3] 7486; GFX940-NEXT: ;;#ASMEND 7487; GFX940-NEXT: v_mov_b32_e32 v4, 0 7488; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 7489; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7490; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 7491; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 7492; GFX940-NEXT: s_waitcnt vmcnt(0) 7493; GFX940-NEXT: s_setpc_b64 s[30:31] 7494 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7495 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7496 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4> 7497 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7498 ret void 7499} 7500 7501define void @v_shuffle_v4i16_v4i16__7_2_4_4(ptr addrspace(1) inreg %ptr) { 7502; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_4_4: 7503; GFX900: ; %bb.0: 7504; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7505; GFX900-NEXT: ;;#ASMSTART 7506; GFX900-NEXT: ; def v[0:1] 7507; GFX900-NEXT: ;;#ASMEND 7508; GFX900-NEXT: ;;#ASMSTART 7509; GFX900-NEXT: ; def v[2:3] 7510; GFX900-NEXT: ;;#ASMEND 7511; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7512; GFX900-NEXT: v_mov_b32_e32 v4, 0 7513; GFX900-NEXT: v_perm_b32 v2, v2, v2, s4 7514; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 7515; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 7516; GFX900-NEXT: s_waitcnt vmcnt(0) 7517; GFX900-NEXT: s_setpc_b64 s[30:31] 7518; 7519; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_4_4: 7520; GFX90A: ; %bb.0: 7521; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7522; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7523; GFX90A-NEXT: v_mov_b32_e32 v6, 0 7524; GFX90A-NEXT: ;;#ASMSTART 7525; GFX90A-NEXT: ; def v[0:1] 7526; GFX90A-NEXT: ;;#ASMEND 7527; GFX90A-NEXT: ;;#ASMSTART 7528; GFX90A-NEXT: ; def v[2:3] 7529; GFX90A-NEXT: ;;#ASMEND 7530; GFX90A-NEXT: v_perm_b32 v5, v2, v2, s4 7531; GFX90A-NEXT: v_alignbit_b32 v4, v1, v3, 16 7532; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] 7533; GFX90A-NEXT: s_waitcnt vmcnt(0) 7534; GFX90A-NEXT: s_setpc_b64 s[30:31] 7535; 7536; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_4_4: 7537; GFX940: ; %bb.0: 7538; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7539; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7540; GFX940-NEXT: v_mov_b32_e32 v6, 0 7541; GFX940-NEXT: ;;#ASMSTART 7542; GFX940-NEXT: ; def v[0:1] 7543; GFX940-NEXT: ;;#ASMEND 7544; GFX940-NEXT: ;;#ASMSTART 7545; GFX940-NEXT: ; def v[2:3] 7546; GFX940-NEXT: ;;#ASMEND 7547; GFX940-NEXT: s_nop 0 7548; GFX940-NEXT: v_perm_b32 v5, v2, v2, s2 7549; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 7550; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 7551; GFX940-NEXT: s_waitcnt vmcnt(0) 7552; GFX940-NEXT: s_setpc_b64 s[30:31] 7553 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7554 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7555 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4> 7556 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7557 ret void 7558} 7559 7560define void @v_shuffle_v4i16_v4i16__7_3_4_4(ptr addrspace(1) inreg %ptr) { 7561; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_4_4: 7562; GFX900: ; %bb.0: 7563; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7564; GFX900-NEXT: ;;#ASMSTART 7565; GFX900-NEXT: ; def v[0:1] 7566; GFX900-NEXT: ;;#ASMEND 7567; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7568; GFX900-NEXT: ;;#ASMSTART 7569; GFX900-NEXT: ; def v[2:3] 7570; GFX900-NEXT: ;;#ASMEND 7571; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 7572; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7573; GFX900-NEXT: v_mov_b32_e32 v4, 0 7574; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 7575; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7576; GFX900-NEXT: s_waitcnt vmcnt(0) 7577; GFX900-NEXT: s_setpc_b64 s[30:31] 7578; 7579; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_4_4: 7580; GFX90A: ; %bb.0: 7581; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7582; GFX90A-NEXT: ;;#ASMSTART 7583; GFX90A-NEXT: ; def v[0:1] 7584; GFX90A-NEXT: ;;#ASMEND 7585; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7586; GFX90A-NEXT: ;;#ASMSTART 7587; GFX90A-NEXT: ; def v[2:3] 7588; GFX90A-NEXT: ;;#ASMEND 7589; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 7590; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7591; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7592; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 7593; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7594; GFX90A-NEXT: s_waitcnt vmcnt(0) 7595; GFX90A-NEXT: s_setpc_b64 s[30:31] 7596; 7597; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_4_4: 7598; GFX940: ; %bb.0: 7599; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7600; GFX940-NEXT: ;;#ASMSTART 7601; GFX940-NEXT: ; def v[0:1] 7602; GFX940-NEXT: ;;#ASMEND 7603; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7604; GFX940-NEXT: ;;#ASMSTART 7605; GFX940-NEXT: ; def v[2:3] 7606; GFX940-NEXT: ;;#ASMEND 7607; GFX940-NEXT: v_mov_b32_e32 v4, 0 7608; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 7609; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7610; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 7611; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 7612; GFX940-NEXT: s_waitcnt vmcnt(0) 7613; GFX940-NEXT: s_setpc_b64 s[30:31] 7614 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7615 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7616 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4> 7617 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7618 ret void 7619} 7620 7621define void @v_shuffle_v4i16_v4i16__7_5_4_4(ptr addrspace(1) inreg %ptr) { 7622; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_4_4: 7623; GFX900: ; %bb.0: 7624; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7625; GFX900-NEXT: ;;#ASMSTART 7626; GFX900-NEXT: ; def v[0:1] 7627; GFX900-NEXT: ;;#ASMEND 7628; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7629; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 7630; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7631; GFX900-NEXT: v_mov_b32_e32 v3, 0 7632; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 7633; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 7634; GFX900-NEXT: s_waitcnt vmcnt(0) 7635; GFX900-NEXT: s_setpc_b64 s[30:31] 7636; 7637; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_4_4: 7638; GFX90A: ; %bb.0: 7639; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7640; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7641; GFX90A-NEXT: ;;#ASMSTART 7642; GFX90A-NEXT: ; def v[0:1] 7643; GFX90A-NEXT: ;;#ASMEND 7644; GFX90A-NEXT: v_perm_b32 v2, v0, v1, s4 7645; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7646; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7647; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 7648; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 7649; GFX90A-NEXT: s_waitcnt vmcnt(0) 7650; GFX90A-NEXT: s_setpc_b64 s[30:31] 7651; 7652; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_4_4: 7653; GFX940: ; %bb.0: 7654; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7655; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7656; GFX940-NEXT: ;;#ASMSTART 7657; GFX940-NEXT: ; def v[0:1] 7658; GFX940-NEXT: ;;#ASMEND 7659; GFX940-NEXT: v_mov_b32_e32 v4, 0 7660; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 7661; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7662; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 7663; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 7664; GFX940-NEXT: s_waitcnt vmcnt(0) 7665; GFX940-NEXT: s_setpc_b64 s[30:31] 7666 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7667 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7668 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4> 7669 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7670 ret void 7671} 7672 7673define void @v_shuffle_v4i16_v4i16__7_6_4_4(ptr addrspace(1) inreg %ptr) { 7674; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_4_4: 7675; GFX900: ; %bb.0: 7676; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7677; GFX900-NEXT: ;;#ASMSTART 7678; GFX900-NEXT: ; def v[0:1] 7679; GFX900-NEXT: ;;#ASMEND 7680; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7681; GFX900-NEXT: v_mov_b32_e32 v3, 0 7682; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 7683; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16 7684; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 7685; GFX900-NEXT: s_waitcnt vmcnt(0) 7686; GFX900-NEXT: s_setpc_b64 s[30:31] 7687; 7688; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_4_4: 7689; GFX90A: ; %bb.0: 7690; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7691; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7692; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7693; GFX90A-NEXT: ;;#ASMSTART 7694; GFX90A-NEXT: ; def v[0:1] 7695; GFX90A-NEXT: ;;#ASMEND 7696; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 7697; GFX90A-NEXT: v_alignbit_b32 v2, v1, v1, 16 7698; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 7699; GFX90A-NEXT: s_waitcnt vmcnt(0) 7700; GFX90A-NEXT: s_setpc_b64 s[30:31] 7701; 7702; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_4_4: 7703; GFX940: ; %bb.0: 7704; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7705; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7706; GFX940-NEXT: v_mov_b32_e32 v4, 0 7707; GFX940-NEXT: ;;#ASMSTART 7708; GFX940-NEXT: ; def v[0:1] 7709; GFX940-NEXT: ;;#ASMEND 7710; GFX940-NEXT: s_nop 0 7711; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 7712; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 7713; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 7714; GFX940-NEXT: s_waitcnt vmcnt(0) 7715; GFX940-NEXT: s_setpc_b64 s[30:31] 7716 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7717 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7718 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4> 7719 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7720 ret void 7721} 7722 7723define void @v_shuffle_v4i16_v4i16__7_7_4_4(ptr addrspace(1) inreg %ptr) { 7724; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_4: 7725; GFX900: ; %bb.0: 7726; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7727; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7728; GFX900-NEXT: ;;#ASMSTART 7729; GFX900-NEXT: ; def v[0:1] 7730; GFX900-NEXT: ;;#ASMEND 7731; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 7732; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7733; GFX900-NEXT: v_mov_b32_e32 v3, 0 7734; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 7735; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 7736; GFX900-NEXT: s_waitcnt vmcnt(0) 7737; GFX900-NEXT: s_setpc_b64 s[30:31] 7738; 7739; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_4: 7740; GFX90A: ; %bb.0: 7741; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7742; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7743; GFX90A-NEXT: ;;#ASMSTART 7744; GFX90A-NEXT: ; def v[0:1] 7745; GFX90A-NEXT: ;;#ASMEND 7746; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 7747; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7748; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7749; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 7750; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 7751; GFX90A-NEXT: s_waitcnt vmcnt(0) 7752; GFX90A-NEXT: s_setpc_b64 s[30:31] 7753; 7754; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_4: 7755; GFX940: ; %bb.0: 7756; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7757; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7758; GFX940-NEXT: ;;#ASMSTART 7759; GFX940-NEXT: ; def v[0:1] 7760; GFX940-NEXT: ;;#ASMEND 7761; GFX940-NEXT: v_mov_b32_e32 v4, 0 7762; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 7763; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7764; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 7765; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 7766; GFX940-NEXT: s_waitcnt vmcnt(0) 7767; GFX940-NEXT: s_setpc_b64 s[30:31] 7768 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7769 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7770 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4> 7771 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7772 ret void 7773} 7774 7775define void @v_shuffle_v4i16_v4i16__7_7_u_4(ptr addrspace(1) inreg %ptr) { 7776; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_4: 7777; GFX900: ; %bb.0: 7778; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7779; GFX900-NEXT: ;;#ASMSTART 7780; GFX900-NEXT: ; def v[0:1] 7781; GFX900-NEXT: ;;#ASMEND 7782; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7783; GFX900-NEXT: v_mov_b32_e32 v3, 0 7784; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 7785; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 7786; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 7787; GFX900-NEXT: s_waitcnt vmcnt(0) 7788; GFX900-NEXT: s_setpc_b64 s[30:31] 7789; 7790; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_4: 7791; GFX90A: ; %bb.0: 7792; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7793; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7794; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7795; GFX90A-NEXT: ;;#ASMSTART 7796; GFX90A-NEXT: ; def v[0:1] 7797; GFX90A-NEXT: ;;#ASMEND 7798; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 7799; GFX90A-NEXT: v_lshlrev_b32_e32 v3, 16, v0 7800; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 7801; GFX90A-NEXT: s_waitcnt vmcnt(0) 7802; GFX90A-NEXT: s_setpc_b64 s[30:31] 7803; 7804; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_4: 7805; GFX940: ; %bb.0: 7806; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7807; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7808; GFX940-NEXT: v_mov_b32_e32 v4, 0 7809; GFX940-NEXT: ;;#ASMSTART 7810; GFX940-NEXT: ; def v[0:1] 7811; GFX940-NEXT: ;;#ASMEND 7812; GFX940-NEXT: s_nop 0 7813; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 7814; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 7815; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 7816; GFX940-NEXT: s_waitcnt vmcnt(0) 7817; GFX940-NEXT: s_setpc_b64 s[30:31] 7818 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7819 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7820 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4> 7821 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7822 ret void 7823} 7824 7825define void @v_shuffle_v4i16_v4i16__7_7_0_4(ptr addrspace(1) inreg %ptr) { 7826; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_4: 7827; GFX900: ; %bb.0: 7828; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7829; GFX900-NEXT: ;;#ASMSTART 7830; GFX900-NEXT: ; def v[0:1] 7831; GFX900-NEXT: ;;#ASMEND 7832; GFX900-NEXT: ;;#ASMSTART 7833; GFX900-NEXT: ; def v[1:2] 7834; GFX900-NEXT: ;;#ASMEND 7835; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7836; GFX900-NEXT: v_perm_b32 v1, v1, v0, s4 7837; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7838; GFX900-NEXT: v_mov_b32_e32 v3, 0 7839; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 7840; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 7841; GFX900-NEXT: s_waitcnt vmcnt(0) 7842; GFX900-NEXT: s_setpc_b64 s[30:31] 7843; 7844; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_4: 7845; GFX90A: ; %bb.0: 7846; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7847; GFX90A-NEXT: ;;#ASMSTART 7848; GFX90A-NEXT: ; def v[0:1] 7849; GFX90A-NEXT: ;;#ASMEND 7850; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7851; GFX90A-NEXT: ;;#ASMSTART 7852; GFX90A-NEXT: ; def v[2:3] 7853; GFX90A-NEXT: ;;#ASMEND 7854; GFX90A-NEXT: v_perm_b32 v1, v2, v0, s4 7855; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7856; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7857; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 7858; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7859; GFX90A-NEXT: s_waitcnt vmcnt(0) 7860; GFX90A-NEXT: s_setpc_b64 s[30:31] 7861; 7862; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_4: 7863; GFX940: ; %bb.0: 7864; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7865; GFX940-NEXT: ;;#ASMSTART 7866; GFX940-NEXT: ; def v[0:1] 7867; GFX940-NEXT: ;;#ASMEND 7868; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7869; GFX940-NEXT: ;;#ASMSTART 7870; GFX940-NEXT: ; def v[2:3] 7871; GFX940-NEXT: ;;#ASMEND 7872; GFX940-NEXT: v_mov_b32_e32 v4, 0 7873; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 7874; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7875; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 7876; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 7877; GFX940-NEXT: s_waitcnt vmcnt(0) 7878; GFX940-NEXT: s_setpc_b64 s[30:31] 7879 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7880 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7881 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4> 7882 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7883 ret void 7884} 7885 7886define void @v_shuffle_v4i16_v4i16__7_7_1_4(ptr addrspace(1) inreg %ptr) { 7887; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_4: 7888; GFX900: ; %bb.0: 7889; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7890; GFX900-NEXT: ;;#ASMSTART 7891; GFX900-NEXT: ; def v[0:1] 7892; GFX900-NEXT: ;;#ASMEND 7893; GFX900-NEXT: ;;#ASMSTART 7894; GFX900-NEXT: ; def v[1:2] 7895; GFX900-NEXT: ;;#ASMEND 7896; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7897; GFX900-NEXT: v_mov_b32_e32 v3, 0 7898; GFX900-NEXT: v_alignbit_b32 v1, v1, v0, 16 7899; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 7900; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 7901; GFX900-NEXT: s_waitcnt vmcnt(0) 7902; GFX900-NEXT: s_setpc_b64 s[30:31] 7903; 7904; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_4: 7905; GFX90A: ; %bb.0: 7906; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7907; GFX90A-NEXT: ;;#ASMSTART 7908; GFX90A-NEXT: ; def v[0:1] 7909; GFX90A-NEXT: ;;#ASMEND 7910; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7911; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7912; GFX90A-NEXT: ;;#ASMSTART 7913; GFX90A-NEXT: ; def v[2:3] 7914; GFX90A-NEXT: ;;#ASMEND 7915; GFX90A-NEXT: v_alignbit_b32 v1, v2, v0, 16 7916; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 7917; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7918; GFX90A-NEXT: s_waitcnt vmcnt(0) 7919; GFX90A-NEXT: s_setpc_b64 s[30:31] 7920; 7921; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_4: 7922; GFX940: ; %bb.0: 7923; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7924; GFX940-NEXT: ;;#ASMSTART 7925; GFX940-NEXT: ; def v[0:1] 7926; GFX940-NEXT: ;;#ASMEND 7927; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7928; GFX940-NEXT: v_mov_b32_e32 v4, 0 7929; GFX940-NEXT: ;;#ASMSTART 7930; GFX940-NEXT: ; def v[2:3] 7931; GFX940-NEXT: ;;#ASMEND 7932; GFX940-NEXT: s_nop 0 7933; GFX940-NEXT: v_alignbit_b32 v1, v2, v0, 16 7934; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 7935; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 7936; GFX940-NEXT: s_waitcnt vmcnt(0) 7937; GFX940-NEXT: s_setpc_b64 s[30:31] 7938 %vec0 = call <4 x i16> asm "; def $0", "=v"() 7939 %vec1 = call <4 x i16> asm "; def $0", "=v"() 7940 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4> 7941 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 7942 ret void 7943} 7944 7945define void @v_shuffle_v4i16_v4i16__7_7_2_4(ptr addrspace(1) inreg %ptr) { 7946; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_4: 7947; GFX900: ; %bb.0: 7948; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7949; GFX900-NEXT: ;;#ASMSTART 7950; GFX900-NEXT: ; def v[0:1] 7951; GFX900-NEXT: ;;#ASMEND 7952; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7953; GFX900-NEXT: ;;#ASMSTART 7954; GFX900-NEXT: ; def v[2:3] 7955; GFX900-NEXT: ;;#ASMEND 7956; GFX900-NEXT: v_perm_b32 v1, v2, v1, s4 7957; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7958; GFX900-NEXT: v_mov_b32_e32 v4, 0 7959; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 7960; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7961; GFX900-NEXT: s_waitcnt vmcnt(0) 7962; GFX900-NEXT: s_setpc_b64 s[30:31] 7963; 7964; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_4: 7965; GFX90A: ; %bb.0: 7966; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7967; GFX90A-NEXT: ;;#ASMSTART 7968; GFX90A-NEXT: ; def v[0:1] 7969; GFX90A-NEXT: ;;#ASMEND 7970; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7971; GFX90A-NEXT: ;;#ASMSTART 7972; GFX90A-NEXT: ; def v[2:3] 7973; GFX90A-NEXT: ;;#ASMEND 7974; GFX90A-NEXT: v_perm_b32 v1, v2, v1, s4 7975; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7976; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7977; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 7978; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 7979; GFX90A-NEXT: s_waitcnt vmcnt(0) 7980; GFX90A-NEXT: s_setpc_b64 s[30:31] 7981; 7982; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_4: 7983; GFX940: ; %bb.0: 7984; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7985; GFX940-NEXT: ;;#ASMSTART 7986; GFX940-NEXT: ; def v[0:1] 7987; GFX940-NEXT: ;;#ASMEND 7988; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7989; GFX940-NEXT: ;;#ASMSTART 7990; GFX940-NEXT: ; def v[2:3] 7991; GFX940-NEXT: ;;#ASMEND 7992; GFX940-NEXT: v_mov_b32_e32 v4, 0 7993; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 7994; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7995; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 7996; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 7997; GFX940-NEXT: s_waitcnt vmcnt(0) 7998; GFX940-NEXT: s_setpc_b64 s[30:31] 7999 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8000 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8001 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4> 8002 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8003 ret void 8004} 8005 8006define void @v_shuffle_v4i16_v4i16__7_7_3_4(ptr addrspace(1) inreg %ptr) { 8007; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_4: 8008; GFX900: ; %bb.0: 8009; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8010; GFX900-NEXT: ;;#ASMSTART 8011; GFX900-NEXT: ; def v[0:1] 8012; GFX900-NEXT: ;;#ASMEND 8013; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8014; GFX900-NEXT: v_mov_b32_e32 v4, 0 8015; GFX900-NEXT: ;;#ASMSTART 8016; GFX900-NEXT: ; def v[2:3] 8017; GFX900-NEXT: ;;#ASMEND 8018; GFX900-NEXT: v_alignbit_b32 v1, v2, v1, 16 8019; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 8020; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8021; GFX900-NEXT: s_waitcnt vmcnt(0) 8022; GFX900-NEXT: s_setpc_b64 s[30:31] 8023; 8024; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_4: 8025; GFX90A: ; %bb.0: 8026; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8027; GFX90A-NEXT: ;;#ASMSTART 8028; GFX90A-NEXT: ; def v[0:1] 8029; GFX90A-NEXT: ;;#ASMEND 8030; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8031; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8032; GFX90A-NEXT: ;;#ASMSTART 8033; GFX90A-NEXT: ; def v[2:3] 8034; GFX90A-NEXT: ;;#ASMEND 8035; GFX90A-NEXT: v_alignbit_b32 v1, v2, v1, 16 8036; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 8037; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8038; GFX90A-NEXT: s_waitcnt vmcnt(0) 8039; GFX90A-NEXT: s_setpc_b64 s[30:31] 8040; 8041; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_4: 8042; GFX940: ; %bb.0: 8043; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8044; GFX940-NEXT: ;;#ASMSTART 8045; GFX940-NEXT: ; def v[0:1] 8046; GFX940-NEXT: ;;#ASMEND 8047; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8048; GFX940-NEXT: v_mov_b32_e32 v4, 0 8049; GFX940-NEXT: ;;#ASMSTART 8050; GFX940-NEXT: ; def v[2:3] 8051; GFX940-NEXT: ;;#ASMEND 8052; GFX940-NEXT: s_nop 0 8053; GFX940-NEXT: v_alignbit_b32 v1, v2, v1, 16 8054; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 8055; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 8056; GFX940-NEXT: s_waitcnt vmcnt(0) 8057; GFX940-NEXT: s_setpc_b64 s[30:31] 8058 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8059 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8060 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4> 8061 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8062 ret void 8063} 8064 8065define void @v_shuffle_v4i16_v4i16__7_7_5_4(ptr addrspace(1) inreg %ptr) { 8066; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_4: 8067; GFX900: ; %bb.0: 8068; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8069; GFX900-NEXT: ;;#ASMSTART 8070; GFX900-NEXT: ; def v[0:1] 8071; GFX900-NEXT: ;;#ASMEND 8072; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8073; GFX900-NEXT: v_mov_b32_e32 v3, 0 8074; GFX900-NEXT: v_alignbit_b32 v2, v0, v0, 16 8075; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 8076; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 8077; GFX900-NEXT: s_waitcnt vmcnt(0) 8078; GFX900-NEXT: s_setpc_b64 s[30:31] 8079; 8080; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_4: 8081; GFX90A: ; %bb.0: 8082; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8083; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8084; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8085; GFX90A-NEXT: ;;#ASMSTART 8086; GFX90A-NEXT: ; def v[0:1] 8087; GFX90A-NEXT: ;;#ASMEND 8088; GFX90A-NEXT: v_alignbit_b32 v3, v0, v0, 16 8089; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 8090; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 8091; GFX90A-NEXT: s_waitcnt vmcnt(0) 8092; GFX90A-NEXT: s_setpc_b64 s[30:31] 8093; 8094; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_4: 8095; GFX940: ; %bb.0: 8096; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8097; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8098; GFX940-NEXT: v_mov_b32_e32 v4, 0 8099; GFX940-NEXT: ;;#ASMSTART 8100; GFX940-NEXT: ; def v[0:1] 8101; GFX940-NEXT: ;;#ASMEND 8102; GFX940-NEXT: s_nop 0 8103; GFX940-NEXT: v_alignbit_b32 v3, v0, v0, 16 8104; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 8105; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 8106; GFX940-NEXT: s_waitcnt vmcnt(0) 8107; GFX940-NEXT: s_setpc_b64 s[30:31] 8108 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8109 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8110 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4> 8111 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8112 ret void 8113} 8114 8115define void @v_shuffle_v4i16_v4i16__7_7_6_4(ptr addrspace(1) inreg %ptr) { 8116; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_4: 8117; GFX900: ; %bb.0: 8118; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8119; GFX900-NEXT: s_mov_b32 s4, 0x5040100 8120; GFX900-NEXT: ;;#ASMSTART 8121; GFX900-NEXT: ; def v[0:1] 8122; GFX900-NEXT: ;;#ASMEND 8123; GFX900-NEXT: v_perm_b32 v2, v0, v1, s4 8124; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8125; GFX900-NEXT: v_mov_b32_e32 v3, 0 8126; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 8127; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 8128; GFX900-NEXT: s_waitcnt vmcnt(0) 8129; GFX900-NEXT: s_setpc_b64 s[30:31] 8130; 8131; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_4: 8132; GFX90A: ; %bb.0: 8133; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8134; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 8135; GFX90A-NEXT: ;;#ASMSTART 8136; GFX90A-NEXT: ; def v[0:1] 8137; GFX90A-NEXT: ;;#ASMEND 8138; GFX90A-NEXT: v_perm_b32 v3, v0, v1, s4 8139; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8140; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8141; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 8142; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 8143; GFX90A-NEXT: s_waitcnt vmcnt(0) 8144; GFX90A-NEXT: s_setpc_b64 s[30:31] 8145; 8146; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_4: 8147; GFX940: ; %bb.0: 8148; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8149; GFX940-NEXT: s_mov_b32 s2, 0x5040100 8150; GFX940-NEXT: ;;#ASMSTART 8151; GFX940-NEXT: ; def v[0:1] 8152; GFX940-NEXT: ;;#ASMEND 8153; GFX940-NEXT: v_mov_b32_e32 v4, 0 8154; GFX940-NEXT: v_perm_b32 v3, v0, v1, s2 8155; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8156; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 8157; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 8158; GFX940-NEXT: s_waitcnt vmcnt(0) 8159; GFX940-NEXT: s_setpc_b64 s[30:31] 8160 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8161 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8162 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4> 8163 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8164 ret void 8165} 8166 8167define void @v_shuffle_v4i16_v4i16__u_5_5_5(ptr addrspace(1) inreg %ptr) { 8168; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_5_5_5: 8169; GFX900: ; %bb.0: 8170; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8171; GFX900-NEXT: ;;#ASMSTART 8172; GFX900-NEXT: ; def v[0:1] 8173; GFX900-NEXT: ;;#ASMEND 8174; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8175; GFX900-NEXT: v_mov_b32_e32 v2, 0 8176; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 8177; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 8178; GFX900-NEXT: s_waitcnt vmcnt(0) 8179; GFX900-NEXT: s_setpc_b64 s[30:31] 8180; 8181; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_5_5_5: 8182; GFX90A: ; %bb.0: 8183; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8184; GFX90A-NEXT: ;;#ASMSTART 8185; GFX90A-NEXT: ; def v[0:1] 8186; GFX90A-NEXT: ;;#ASMEND 8187; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8188; GFX90A-NEXT: v_mov_b32_e32 v2, 0 8189; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 8190; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 8191; GFX90A-NEXT: s_waitcnt vmcnt(0) 8192; GFX90A-NEXT: s_setpc_b64 s[30:31] 8193; 8194; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_5_5_5: 8195; GFX940: ; %bb.0: 8196; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8197; GFX940-NEXT: ;;#ASMSTART 8198; GFX940-NEXT: ; def v[0:1] 8199; GFX940-NEXT: ;;#ASMEND 8200; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8201; GFX940-NEXT: v_mov_b32_e32 v2, 0 8202; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 8203; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 8204; GFX940-NEXT: s_waitcnt vmcnt(0) 8205; GFX940-NEXT: s_setpc_b64 s[30:31] 8206 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8207 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8208 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5> 8209 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8210 ret void 8211} 8212 8213define void @v_shuffle_v4i16_v4i16__0_5_5_5(ptr addrspace(1) inreg %ptr) { 8214; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_5_5_5: 8215; GFX900: ; %bb.0: 8216; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8217; GFX900-NEXT: ;;#ASMSTART 8218; GFX900-NEXT: ; def v[0:1] 8219; GFX900-NEXT: ;;#ASMEND 8220; GFX900-NEXT: s_mov_b32 s4, 0xffff 8221; GFX900-NEXT: ;;#ASMSTART 8222; GFX900-NEXT: ; def v[1:2] 8223; GFX900-NEXT: ;;#ASMEND 8224; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 8225; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8226; GFX900-NEXT: v_mov_b32_e32 v3, 0 8227; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 8228; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 8229; GFX900-NEXT: s_waitcnt vmcnt(0) 8230; GFX900-NEXT: s_setpc_b64 s[30:31] 8231; 8232; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_5_5_5: 8233; GFX90A: ; %bb.0: 8234; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8235; GFX90A-NEXT: ;;#ASMSTART 8236; GFX90A-NEXT: ; def v[0:1] 8237; GFX90A-NEXT: ;;#ASMEND 8238; GFX90A-NEXT: s_mov_b32 s4, 0xffff 8239; GFX90A-NEXT: ;;#ASMSTART 8240; GFX90A-NEXT: ; def v[2:3] 8241; GFX90A-NEXT: ;;#ASMEND 8242; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2 8243; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8244; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8245; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 8246; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8247; GFX90A-NEXT: s_waitcnt vmcnt(0) 8248; GFX90A-NEXT: s_setpc_b64 s[30:31] 8249; 8250; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_5_5_5: 8251; GFX940: ; %bb.0: 8252; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8253; GFX940-NEXT: ;;#ASMSTART 8254; GFX940-NEXT: ; def v[0:1] 8255; GFX940-NEXT: ;;#ASMEND 8256; GFX940-NEXT: s_mov_b32 s2, 0xffff 8257; GFX940-NEXT: ;;#ASMSTART 8258; GFX940-NEXT: ; def v[2:3] 8259; GFX940-NEXT: ;;#ASMEND 8260; GFX940-NEXT: v_mov_b32_e32 v4, 0 8261; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 8262; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8263; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 8264; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 8265; GFX940-NEXT: s_waitcnt vmcnt(0) 8266; GFX940-NEXT: s_setpc_b64 s[30:31] 8267 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8268 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8269 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5> 8270 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8271 ret void 8272} 8273 8274define void @v_shuffle_v4i16_v4i16__1_5_5_5(ptr addrspace(1) inreg %ptr) { 8275; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_5_5_5: 8276; GFX900: ; %bb.0: 8277; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8278; GFX900-NEXT: ;;#ASMSTART 8279; GFX900-NEXT: ; def v[0:1] 8280; GFX900-NEXT: ;;#ASMEND 8281; GFX900-NEXT: ;;#ASMSTART 8282; GFX900-NEXT: ; def v[1:2] 8283; GFX900-NEXT: ;;#ASMEND 8284; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8285; GFX900-NEXT: v_mov_b32_e32 v3, 0 8286; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 8287; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 8288; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 8289; GFX900-NEXT: s_waitcnt vmcnt(0) 8290; GFX900-NEXT: s_setpc_b64 s[30:31] 8291; 8292; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_5_5_5: 8293; GFX90A: ; %bb.0: 8294; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8295; GFX90A-NEXT: ;;#ASMSTART 8296; GFX90A-NEXT: ; def v[0:1] 8297; GFX90A-NEXT: ;;#ASMEND 8298; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8299; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8300; GFX90A-NEXT: ;;#ASMSTART 8301; GFX90A-NEXT: ; def v[2:3] 8302; GFX90A-NEXT: ;;#ASMEND 8303; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 8304; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 8305; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8306; GFX90A-NEXT: s_waitcnt vmcnt(0) 8307; GFX90A-NEXT: s_setpc_b64 s[30:31] 8308; 8309; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_5_5_5: 8310; GFX940: ; %bb.0: 8311; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8312; GFX940-NEXT: ;;#ASMSTART 8313; GFX940-NEXT: ; def v[0:1] 8314; GFX940-NEXT: ;;#ASMEND 8315; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8316; GFX940-NEXT: v_mov_b32_e32 v4, 0 8317; GFX940-NEXT: ;;#ASMSTART 8318; GFX940-NEXT: ; def v[2:3] 8319; GFX940-NEXT: ;;#ASMEND 8320; GFX940-NEXT: s_nop 0 8321; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 8322; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 8323; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 8324; GFX940-NEXT: s_waitcnt vmcnt(0) 8325; GFX940-NEXT: s_setpc_b64 s[30:31] 8326 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8327 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8328 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5> 8329 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8330 ret void 8331} 8332 8333define void @v_shuffle_v4i16_v4i16__2_5_5_5(ptr addrspace(1) inreg %ptr) { 8334; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_5_5_5: 8335; GFX900: ; %bb.0: 8336; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8337; GFX900-NEXT: ;;#ASMSTART 8338; GFX900-NEXT: ; def v[0:1] 8339; GFX900-NEXT: ;;#ASMEND 8340; GFX900-NEXT: s_mov_b32 s4, 0xffff 8341; GFX900-NEXT: ;;#ASMSTART 8342; GFX900-NEXT: ; def v[2:3] 8343; GFX900-NEXT: ;;#ASMEND 8344; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2 8345; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8346; GFX900-NEXT: v_mov_b32_e32 v4, 0 8347; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 8348; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8349; GFX900-NEXT: s_waitcnt vmcnt(0) 8350; GFX900-NEXT: s_setpc_b64 s[30:31] 8351; 8352; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_5_5_5: 8353; GFX90A: ; %bb.0: 8354; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8355; GFX90A-NEXT: ;;#ASMSTART 8356; GFX90A-NEXT: ; def v[0:1] 8357; GFX90A-NEXT: ;;#ASMEND 8358; GFX90A-NEXT: s_mov_b32 s4, 0xffff 8359; GFX90A-NEXT: ;;#ASMSTART 8360; GFX90A-NEXT: ; def v[2:3] 8361; GFX90A-NEXT: ;;#ASMEND 8362; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2 8363; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8364; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8365; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 8366; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8367; GFX90A-NEXT: s_waitcnt vmcnt(0) 8368; GFX90A-NEXT: s_setpc_b64 s[30:31] 8369; 8370; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_5_5_5: 8371; GFX940: ; %bb.0: 8372; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8373; GFX940-NEXT: ;;#ASMSTART 8374; GFX940-NEXT: ; def v[0:1] 8375; GFX940-NEXT: ;;#ASMEND 8376; GFX940-NEXT: s_mov_b32 s2, 0xffff 8377; GFX940-NEXT: ;;#ASMSTART 8378; GFX940-NEXT: ; def v[2:3] 8379; GFX940-NEXT: ;;#ASMEND 8380; GFX940-NEXT: v_mov_b32_e32 v4, 0 8381; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 8382; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8383; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 8384; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 8385; GFX940-NEXT: s_waitcnt vmcnt(0) 8386; GFX940-NEXT: s_setpc_b64 s[30:31] 8387 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8388 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8389 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5> 8390 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8391 ret void 8392} 8393 8394define void @v_shuffle_v4i16_v4i16__3_5_5_5(ptr addrspace(1) inreg %ptr) { 8395; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_5_5_5: 8396; GFX900: ; %bb.0: 8397; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8398; GFX900-NEXT: ;;#ASMSTART 8399; GFX900-NEXT: ; def v[0:1] 8400; GFX900-NEXT: ;;#ASMEND 8401; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8402; GFX900-NEXT: v_mov_b32_e32 v4, 0 8403; GFX900-NEXT: ;;#ASMSTART 8404; GFX900-NEXT: ; def v[2:3] 8405; GFX900-NEXT: ;;#ASMEND 8406; GFX900-NEXT: v_perm_b32 v0, v2, v1, s4 8407; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 8408; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8409; GFX900-NEXT: s_waitcnt vmcnt(0) 8410; GFX900-NEXT: s_setpc_b64 s[30:31] 8411; 8412; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_5_5_5: 8413; GFX90A: ; %bb.0: 8414; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8415; GFX90A-NEXT: ;;#ASMSTART 8416; GFX90A-NEXT: ; def v[0:1] 8417; GFX90A-NEXT: ;;#ASMEND 8418; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8419; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8420; GFX90A-NEXT: ;;#ASMSTART 8421; GFX90A-NEXT: ; def v[2:3] 8422; GFX90A-NEXT: ;;#ASMEND 8423; GFX90A-NEXT: v_perm_b32 v0, v2, v1, s4 8424; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 8425; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8426; GFX90A-NEXT: s_waitcnt vmcnt(0) 8427; GFX90A-NEXT: s_setpc_b64 s[30:31] 8428; 8429; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_5_5_5: 8430; GFX940: ; %bb.0: 8431; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8432; GFX940-NEXT: ;;#ASMSTART 8433; GFX940-NEXT: ; def v[0:1] 8434; GFX940-NEXT: ;;#ASMEND 8435; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8436; GFX940-NEXT: v_mov_b32_e32 v4, 0 8437; GFX940-NEXT: ;;#ASMSTART 8438; GFX940-NEXT: ; def v[2:3] 8439; GFX940-NEXT: ;;#ASMEND 8440; GFX940-NEXT: s_nop 0 8441; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 8442; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 8443; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 8444; GFX940-NEXT: s_waitcnt vmcnt(0) 8445; GFX940-NEXT: s_setpc_b64 s[30:31] 8446 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8447 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8448 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5> 8449 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8450 ret void 8451} 8452 8453define void @v_shuffle_v4i16_v4i16__4_5_5_5(ptr addrspace(1) inreg %ptr) { 8454; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_5_5_5: 8455; GFX900: ; %bb.0: 8456; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8457; GFX900-NEXT: ;;#ASMSTART 8458; GFX900-NEXT: ; def v[0:1] 8459; GFX900-NEXT: ;;#ASMEND 8460; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8461; GFX900-NEXT: v_mov_b32_e32 v2, 0 8462; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 8463; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 8464; GFX900-NEXT: s_waitcnt vmcnt(0) 8465; GFX900-NEXT: s_setpc_b64 s[30:31] 8466; 8467; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_5_5_5: 8468; GFX90A: ; %bb.0: 8469; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8470; GFX90A-NEXT: ;;#ASMSTART 8471; GFX90A-NEXT: ; def v[0:1] 8472; GFX90A-NEXT: ;;#ASMEND 8473; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8474; GFX90A-NEXT: v_mov_b32_e32 v2, 0 8475; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 8476; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 8477; GFX90A-NEXT: s_waitcnt vmcnt(0) 8478; GFX90A-NEXT: s_setpc_b64 s[30:31] 8479; 8480; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_5_5_5: 8481; GFX940: ; %bb.0: 8482; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8483; GFX940-NEXT: ;;#ASMSTART 8484; GFX940-NEXT: ; def v[0:1] 8485; GFX940-NEXT: ;;#ASMEND 8486; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8487; GFX940-NEXT: v_mov_b32_e32 v2, 0 8488; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 8489; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 8490; GFX940-NEXT: s_waitcnt vmcnt(0) 8491; GFX940-NEXT: s_setpc_b64 s[30:31] 8492 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8493 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8494 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5> 8495 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8496 ret void 8497} 8498 8499define void @v_shuffle_v4i16_v4i16__5_5_5_5(ptr addrspace(1) inreg %ptr) { 8500; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_5_5_5: 8501; GFX900: ; %bb.0: 8502; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8503; GFX900-NEXT: ;;#ASMSTART 8504; GFX900-NEXT: ; def v[0:1] 8505; GFX900-NEXT: ;;#ASMEND 8506; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8507; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 8508; GFX900-NEXT: v_mov_b32_e32 v2, 0 8509; GFX900-NEXT: v_mov_b32_e32 v1, v0 8510; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 8511; GFX900-NEXT: s_waitcnt vmcnt(0) 8512; GFX900-NEXT: s_setpc_b64 s[30:31] 8513; 8514; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_5_5_5: 8515; GFX90A: ; %bb.0: 8516; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8517; GFX90A-NEXT: ;;#ASMSTART 8518; GFX90A-NEXT: ; def v[0:1] 8519; GFX90A-NEXT: ;;#ASMEND 8520; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8521; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 8522; GFX90A-NEXT: v_mov_b32_e32 v2, 0 8523; GFX90A-NEXT: v_mov_b32_e32 v1, v0 8524; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 8525; GFX90A-NEXT: s_waitcnt vmcnt(0) 8526; GFX90A-NEXT: s_setpc_b64 s[30:31] 8527; 8528; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_5_5_5: 8529; GFX940: ; %bb.0: 8530; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8531; GFX940-NEXT: ;;#ASMSTART 8532; GFX940-NEXT: ; def v[0:1] 8533; GFX940-NEXT: ;;#ASMEND 8534; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8535; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 8536; GFX940-NEXT: v_mov_b32_e32 v2, 0 8537; GFX940-NEXT: v_mov_b32_e32 v1, v0 8538; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 8539; GFX940-NEXT: s_waitcnt vmcnt(0) 8540; GFX940-NEXT: s_setpc_b64 s[30:31] 8541 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8542 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8543 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5> 8544 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8545 ret void 8546} 8547 8548define void @v_shuffle_v4i16_v4i16__6_5_5_5(ptr addrspace(1) inreg %ptr) { 8549; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_5_5_5: 8550; GFX900: ; %bb.0: 8551; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8552; GFX900-NEXT: ;;#ASMSTART 8553; GFX900-NEXT: ; def v[0:1] 8554; GFX900-NEXT: ;;#ASMEND 8555; GFX900-NEXT: s_mov_b32 s4, 0xffff 8556; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 8557; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8558; GFX900-NEXT: v_mov_b32_e32 v3, 0 8559; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 8560; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 8561; GFX900-NEXT: s_waitcnt vmcnt(0) 8562; GFX900-NEXT: s_setpc_b64 s[30:31] 8563; 8564; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_5_5_5: 8565; GFX90A: ; %bb.0: 8566; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8567; GFX90A-NEXT: s_mov_b32 s4, 0xffff 8568; GFX90A-NEXT: ;;#ASMSTART 8569; GFX90A-NEXT: ; def v[0:1] 8570; GFX90A-NEXT: ;;#ASMEND 8571; GFX90A-NEXT: v_bfi_b32 v2, s4, v1, v0 8572; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8573; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8574; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 8575; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 8576; GFX90A-NEXT: s_waitcnt vmcnt(0) 8577; GFX90A-NEXT: s_setpc_b64 s[30:31] 8578; 8579; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_5_5_5: 8580; GFX940: ; %bb.0: 8581; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8582; GFX940-NEXT: s_mov_b32 s2, 0xffff 8583; GFX940-NEXT: ;;#ASMSTART 8584; GFX940-NEXT: ; def v[0:1] 8585; GFX940-NEXT: ;;#ASMEND 8586; GFX940-NEXT: v_mov_b32_e32 v4, 0 8587; GFX940-NEXT: v_bfi_b32 v2, s2, v1, v0 8588; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8589; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 8590; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 8591; GFX940-NEXT: s_waitcnt vmcnt(0) 8592; GFX940-NEXT: s_setpc_b64 s[30:31] 8593 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8594 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8595 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5> 8596 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8597 ret void 8598} 8599 8600define void @v_shuffle_v4i16_v4i16__7_5_5_5(ptr addrspace(1) inreg %ptr) { 8601; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_5_5: 8602; GFX900: ; %bb.0: 8603; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8604; GFX900-NEXT: ;;#ASMSTART 8605; GFX900-NEXT: ; def v[0:1] 8606; GFX900-NEXT: ;;#ASMEND 8607; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8608; GFX900-NEXT: v_mov_b32_e32 v3, 0 8609; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 8610; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 8611; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 8612; GFX900-NEXT: s_waitcnt vmcnt(0) 8613; GFX900-NEXT: s_setpc_b64 s[30:31] 8614; 8615; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_5_5: 8616; GFX90A: ; %bb.0: 8617; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8618; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8619; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8620; GFX90A-NEXT: ;;#ASMSTART 8621; GFX90A-NEXT: ; def v[0:1] 8622; GFX90A-NEXT: ;;#ASMEND 8623; GFX90A-NEXT: v_perm_b32 v2, v0, v1, s4 8624; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 8625; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 8626; GFX90A-NEXT: s_waitcnt vmcnt(0) 8627; GFX90A-NEXT: s_setpc_b64 s[30:31] 8628; 8629; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_5_5: 8630; GFX940: ; %bb.0: 8631; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8632; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8633; GFX940-NEXT: v_mov_b32_e32 v4, 0 8634; GFX940-NEXT: ;;#ASMSTART 8635; GFX940-NEXT: ; def v[0:1] 8636; GFX940-NEXT: ;;#ASMEND 8637; GFX940-NEXT: s_nop 0 8638; GFX940-NEXT: v_perm_b32 v2, v0, v1, s2 8639; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 8640; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 8641; GFX940-NEXT: s_waitcnt vmcnt(0) 8642; GFX940-NEXT: s_setpc_b64 s[30:31] 8643 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8644 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8645 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5> 8646 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8647 ret void 8648} 8649 8650define void @v_shuffle_v4i16_v4i16__7_u_5_5(ptr addrspace(1) inreg %ptr) { 8651; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_5_5: 8652; GFX900: ; %bb.0: 8653; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8654; GFX900-NEXT: ;;#ASMSTART 8655; GFX900-NEXT: ; def v[0:1] 8656; GFX900-NEXT: ;;#ASMEND 8657; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8658; GFX900-NEXT: v_mov_b32_e32 v3, 0 8659; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 8660; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16 8661; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 8662; GFX900-NEXT: s_waitcnt vmcnt(0) 8663; GFX900-NEXT: s_setpc_b64 s[30:31] 8664; 8665; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_5_5: 8666; GFX90A: ; %bb.0: 8667; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8668; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8669; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8670; GFX90A-NEXT: ;;#ASMSTART 8671; GFX90A-NEXT: ; def v[0:1] 8672; GFX90A-NEXT: ;;#ASMEND 8673; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 8674; GFX90A-NEXT: v_alignbit_b32 v2, s4, v1, 16 8675; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 8676; GFX90A-NEXT: s_waitcnt vmcnt(0) 8677; GFX90A-NEXT: s_setpc_b64 s[30:31] 8678; 8679; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_5_5: 8680; GFX940: ; %bb.0: 8681; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8682; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8683; GFX940-NEXT: v_mov_b32_e32 v4, 0 8684; GFX940-NEXT: ;;#ASMSTART 8685; GFX940-NEXT: ; def v[0:1] 8686; GFX940-NEXT: ;;#ASMEND 8687; GFX940-NEXT: s_nop 0 8688; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 8689; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 8690; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 8691; GFX940-NEXT: s_waitcnt vmcnt(0) 8692; GFX940-NEXT: s_setpc_b64 s[30:31] 8693 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8694 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8695 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5> 8696 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8697 ret void 8698} 8699 8700define void @v_shuffle_v4i16_v4i16__7_0_5_5(ptr addrspace(1) inreg %ptr) { 8701; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_5_5: 8702; GFX900: ; %bb.0: 8703; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8704; GFX900-NEXT: ;;#ASMSTART 8705; GFX900-NEXT: ; def v[0:1] 8706; GFX900-NEXT: ;;#ASMEND 8707; GFX900-NEXT: ;;#ASMSTART 8708; GFX900-NEXT: ; def v[1:2] 8709; GFX900-NEXT: ;;#ASMEND 8710; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8711; GFX900-NEXT: v_mov_b32_e32 v3, 0 8712; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 8713; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 8714; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 8715; GFX900-NEXT: s_waitcnt vmcnt(0) 8716; GFX900-NEXT: s_setpc_b64 s[30:31] 8717; 8718; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_5_5: 8719; GFX90A: ; %bb.0: 8720; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8721; GFX90A-NEXT: ;;#ASMSTART 8722; GFX90A-NEXT: ; def v[0:1] 8723; GFX90A-NEXT: ;;#ASMEND 8724; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8725; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8726; GFX90A-NEXT: ;;#ASMSTART 8727; GFX90A-NEXT: ; def v[2:3] 8728; GFX90A-NEXT: ;;#ASMEND 8729; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 8730; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 8731; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8732; GFX90A-NEXT: s_waitcnt vmcnt(0) 8733; GFX90A-NEXT: s_setpc_b64 s[30:31] 8734; 8735; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_5_5: 8736; GFX940: ; %bb.0: 8737; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8738; GFX940-NEXT: ;;#ASMSTART 8739; GFX940-NEXT: ; def v[0:1] 8740; GFX940-NEXT: ;;#ASMEND 8741; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8742; GFX940-NEXT: v_mov_b32_e32 v4, 0 8743; GFX940-NEXT: ;;#ASMSTART 8744; GFX940-NEXT: ; def v[2:3] 8745; GFX940-NEXT: ;;#ASMEND 8746; GFX940-NEXT: s_nop 0 8747; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 8748; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 8749; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 8750; GFX940-NEXT: s_waitcnt vmcnt(0) 8751; GFX940-NEXT: s_setpc_b64 s[30:31] 8752 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8753 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8754 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5> 8755 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8756 ret void 8757} 8758 8759define void @v_shuffle_v4i16_v4i16__7_1_5_5(ptr addrspace(1) inreg %ptr) { 8760; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_5_5: 8761; GFX900: ; %bb.0: 8762; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8763; GFX900-NEXT: ;;#ASMSTART 8764; GFX900-NEXT: ; def v[0:1] 8765; GFX900-NEXT: ;;#ASMEND 8766; GFX900-NEXT: ;;#ASMSTART 8767; GFX900-NEXT: ; def v[1:2] 8768; GFX900-NEXT: ;;#ASMEND 8769; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8770; GFX900-NEXT: v_mov_b32_e32 v3, 0 8771; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 8772; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 8773; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 8774; GFX900-NEXT: s_waitcnt vmcnt(0) 8775; GFX900-NEXT: s_setpc_b64 s[30:31] 8776; 8777; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_5_5: 8778; GFX90A: ; %bb.0: 8779; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8780; GFX90A-NEXT: ;;#ASMSTART 8781; GFX90A-NEXT: ; def v[0:1] 8782; GFX90A-NEXT: ;;#ASMEND 8783; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8784; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8785; GFX90A-NEXT: ;;#ASMSTART 8786; GFX90A-NEXT: ; def v[2:3] 8787; GFX90A-NEXT: ;;#ASMEND 8788; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 8789; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 8790; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8791; GFX90A-NEXT: s_waitcnt vmcnt(0) 8792; GFX90A-NEXT: s_setpc_b64 s[30:31] 8793; 8794; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_5_5: 8795; GFX940: ; %bb.0: 8796; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8797; GFX940-NEXT: ;;#ASMSTART 8798; GFX940-NEXT: ; def v[0:1] 8799; GFX940-NEXT: ;;#ASMEND 8800; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8801; GFX940-NEXT: v_mov_b32_e32 v4, 0 8802; GFX940-NEXT: ;;#ASMSTART 8803; GFX940-NEXT: ; def v[2:3] 8804; GFX940-NEXT: ;;#ASMEND 8805; GFX940-NEXT: s_nop 0 8806; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 8807; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 8808; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 8809; GFX940-NEXT: s_waitcnt vmcnt(0) 8810; GFX940-NEXT: s_setpc_b64 s[30:31] 8811 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8812 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8813 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5> 8814 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8815 ret void 8816} 8817 8818define void @v_shuffle_v4i16_v4i16__7_2_5_5(ptr addrspace(1) inreg %ptr) { 8819; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_5_5: 8820; GFX900: ; %bb.0: 8821; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8822; GFX900-NEXT: ;;#ASMSTART 8823; GFX900-NEXT: ; def v[0:1] 8824; GFX900-NEXT: ;;#ASMEND 8825; GFX900-NEXT: ;;#ASMSTART 8826; GFX900-NEXT: ; def v[2:3] 8827; GFX900-NEXT: ;;#ASMEND 8828; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8829; GFX900-NEXT: v_mov_b32_e32 v4, 0 8830; GFX900-NEXT: v_perm_b32 v2, v2, v2, s4 8831; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 8832; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 8833; GFX900-NEXT: s_waitcnt vmcnt(0) 8834; GFX900-NEXT: s_setpc_b64 s[30:31] 8835; 8836; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_5_5: 8837; GFX90A: ; %bb.0: 8838; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8839; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8840; GFX90A-NEXT: v_mov_b32_e32 v6, 0 8841; GFX90A-NEXT: ;;#ASMSTART 8842; GFX90A-NEXT: ; def v[0:1] 8843; GFX90A-NEXT: ;;#ASMEND 8844; GFX90A-NEXT: ;;#ASMSTART 8845; GFX90A-NEXT: ; def v[2:3] 8846; GFX90A-NEXT: ;;#ASMEND 8847; GFX90A-NEXT: v_perm_b32 v5, v2, v2, s4 8848; GFX90A-NEXT: v_alignbit_b32 v4, v1, v3, 16 8849; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] 8850; GFX90A-NEXT: s_waitcnt vmcnt(0) 8851; GFX90A-NEXT: s_setpc_b64 s[30:31] 8852; 8853; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_5_5: 8854; GFX940: ; %bb.0: 8855; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8856; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8857; GFX940-NEXT: v_mov_b32_e32 v6, 0 8858; GFX940-NEXT: ;;#ASMSTART 8859; GFX940-NEXT: ; def v[0:1] 8860; GFX940-NEXT: ;;#ASMEND 8861; GFX940-NEXT: ;;#ASMSTART 8862; GFX940-NEXT: ; def v[2:3] 8863; GFX940-NEXT: ;;#ASMEND 8864; GFX940-NEXT: s_nop 0 8865; GFX940-NEXT: v_perm_b32 v5, v2, v2, s2 8866; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 8867; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 8868; GFX940-NEXT: s_waitcnt vmcnt(0) 8869; GFX940-NEXT: s_setpc_b64 s[30:31] 8870 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8871 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8872 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5> 8873 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8874 ret void 8875} 8876 8877define void @v_shuffle_v4i16_v4i16__7_3_5_5(ptr addrspace(1) inreg %ptr) { 8878; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_5_5: 8879; GFX900: ; %bb.0: 8880; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8881; GFX900-NEXT: ;;#ASMSTART 8882; GFX900-NEXT: ; def v[0:1] 8883; GFX900-NEXT: ;;#ASMEND 8884; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8885; GFX900-NEXT: v_mov_b32_e32 v4, 0 8886; GFX900-NEXT: ;;#ASMSTART 8887; GFX900-NEXT: ; def v[2:3] 8888; GFX900-NEXT: ;;#ASMEND 8889; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 8890; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 8891; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8892; GFX900-NEXT: s_waitcnt vmcnt(0) 8893; GFX900-NEXT: s_setpc_b64 s[30:31] 8894; 8895; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_5_5: 8896; GFX90A: ; %bb.0: 8897; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8898; GFX90A-NEXT: ;;#ASMSTART 8899; GFX90A-NEXT: ; def v[0:1] 8900; GFX90A-NEXT: ;;#ASMEND 8901; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8902; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8903; GFX90A-NEXT: ;;#ASMSTART 8904; GFX90A-NEXT: ; def v[2:3] 8905; GFX90A-NEXT: ;;#ASMEND 8906; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 8907; GFX90A-NEXT: v_perm_b32 v1, v2, v2, s4 8908; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 8909; GFX90A-NEXT: s_waitcnt vmcnt(0) 8910; GFX90A-NEXT: s_setpc_b64 s[30:31] 8911; 8912; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_5_5: 8913; GFX940: ; %bb.0: 8914; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8915; GFX940-NEXT: ;;#ASMSTART 8916; GFX940-NEXT: ; def v[0:1] 8917; GFX940-NEXT: ;;#ASMEND 8918; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8919; GFX940-NEXT: v_mov_b32_e32 v4, 0 8920; GFX940-NEXT: ;;#ASMSTART 8921; GFX940-NEXT: ; def v[2:3] 8922; GFX940-NEXT: ;;#ASMEND 8923; GFX940-NEXT: s_nop 0 8924; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 8925; GFX940-NEXT: v_perm_b32 v1, v2, v2, s2 8926; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 8927; GFX940-NEXT: s_waitcnt vmcnt(0) 8928; GFX940-NEXT: s_setpc_b64 s[30:31] 8929 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8930 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8931 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5> 8932 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8933 ret void 8934} 8935 8936define void @v_shuffle_v4i16_v4i16__7_4_5_5(ptr addrspace(1) inreg %ptr) { 8937; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_5_5: 8938; GFX900: ; %bb.0: 8939; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8940; GFX900-NEXT: ;;#ASMSTART 8941; GFX900-NEXT: ; def v[0:1] 8942; GFX900-NEXT: ;;#ASMEND 8943; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8944; GFX900-NEXT: v_mov_b32_e32 v3, 0 8945; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 8946; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 8947; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 8948; GFX900-NEXT: s_waitcnt vmcnt(0) 8949; GFX900-NEXT: s_setpc_b64 s[30:31] 8950; 8951; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_5_5: 8952; GFX90A: ; %bb.0: 8953; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8954; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8955; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8956; GFX90A-NEXT: ;;#ASMSTART 8957; GFX90A-NEXT: ; def v[0:1] 8958; GFX90A-NEXT: ;;#ASMEND 8959; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 8960; GFX90A-NEXT: v_alignbit_b32 v2, v0, v1, 16 8961; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 8962; GFX90A-NEXT: s_waitcnt vmcnt(0) 8963; GFX90A-NEXT: s_setpc_b64 s[30:31] 8964; 8965; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_5_5: 8966; GFX940: ; %bb.0: 8967; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8968; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8969; GFX940-NEXT: v_mov_b32_e32 v4, 0 8970; GFX940-NEXT: ;;#ASMSTART 8971; GFX940-NEXT: ; def v[0:1] 8972; GFX940-NEXT: ;;#ASMEND 8973; GFX940-NEXT: s_nop 0 8974; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 8975; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 8976; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 8977; GFX940-NEXT: s_waitcnt vmcnt(0) 8978; GFX940-NEXT: s_setpc_b64 s[30:31] 8979 %vec0 = call <4 x i16> asm "; def $0", "=v"() 8980 %vec1 = call <4 x i16> asm "; def $0", "=v"() 8981 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5> 8982 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 8983 ret void 8984} 8985 8986define void @v_shuffle_v4i16_v4i16__7_6_5_5(ptr addrspace(1) inreg %ptr) { 8987; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_5_5: 8988; GFX900: ; %bb.0: 8989; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8990; GFX900-NEXT: ;;#ASMSTART 8991; GFX900-NEXT: ; def v[0:1] 8992; GFX900-NEXT: ;;#ASMEND 8993; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8994; GFX900-NEXT: v_mov_b32_e32 v3, 0 8995; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 8996; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16 8997; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 8998; GFX900-NEXT: s_waitcnt vmcnt(0) 8999; GFX900-NEXT: s_setpc_b64 s[30:31] 9000; 9001; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_5_5: 9002; GFX90A: ; %bb.0: 9003; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9004; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 9005; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9006; GFX90A-NEXT: ;;#ASMSTART 9007; GFX90A-NEXT: ; def v[0:1] 9008; GFX90A-NEXT: ;;#ASMEND 9009; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 9010; GFX90A-NEXT: v_alignbit_b32 v2, v1, v1, 16 9011; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 9012; GFX90A-NEXT: s_waitcnt vmcnt(0) 9013; GFX90A-NEXT: s_setpc_b64 s[30:31] 9014; 9015; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_5_5: 9016; GFX940: ; %bb.0: 9017; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9018; GFX940-NEXT: s_mov_b32 s2, 0x7060302 9019; GFX940-NEXT: v_mov_b32_e32 v4, 0 9020; GFX940-NEXT: ;;#ASMSTART 9021; GFX940-NEXT: ; def v[0:1] 9022; GFX940-NEXT: ;;#ASMEND 9023; GFX940-NEXT: s_nop 0 9024; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 9025; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 9026; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 9027; GFX940-NEXT: s_waitcnt vmcnt(0) 9028; GFX940-NEXT: s_setpc_b64 s[30:31] 9029 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9030 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9031 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5> 9032 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9033 ret void 9034} 9035 9036define void @v_shuffle_v4i16_v4i16__7_7_5_5(ptr addrspace(1) inreg %ptr) { 9037; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_5: 9038; GFX900: ; %bb.0: 9039; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9040; GFX900-NEXT: ;;#ASMSTART 9041; GFX900-NEXT: ; def v[0:1] 9042; GFX900-NEXT: ;;#ASMEND 9043; GFX900-NEXT: s_mov_b32 s4, 0x7060302 9044; GFX900-NEXT: v_mov_b32_e32 v3, 0 9045; GFX900-NEXT: v_perm_b32 v2, v0, v0, s4 9046; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 9047; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 9048; GFX900-NEXT: s_waitcnt vmcnt(0) 9049; GFX900-NEXT: s_setpc_b64 s[30:31] 9050; 9051; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_5: 9052; GFX90A: ; %bb.0: 9053; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9054; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 9055; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9056; GFX90A-NEXT: ;;#ASMSTART 9057; GFX90A-NEXT: ; def v[0:1] 9058; GFX90A-NEXT: ;;#ASMEND 9059; GFX90A-NEXT: v_perm_b32 v3, v0, v0, s4 9060; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 9061; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 9062; GFX90A-NEXT: s_waitcnt vmcnt(0) 9063; GFX90A-NEXT: s_setpc_b64 s[30:31] 9064; 9065; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_5: 9066; GFX940: ; %bb.0: 9067; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9068; GFX940-NEXT: s_mov_b32 s2, 0x7060302 9069; GFX940-NEXT: v_mov_b32_e32 v4, 0 9070; GFX940-NEXT: ;;#ASMSTART 9071; GFX940-NEXT: ; def v[0:1] 9072; GFX940-NEXT: ;;#ASMEND 9073; GFX940-NEXT: s_nop 0 9074; GFX940-NEXT: v_perm_b32 v3, v0, v0, s2 9075; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 9076; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 9077; GFX940-NEXT: s_waitcnt vmcnt(0) 9078; GFX940-NEXT: s_setpc_b64 s[30:31] 9079 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9080 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9081 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5> 9082 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9083 ret void 9084} 9085 9086define void @v_shuffle_v4i16_v4i16__7_7_u_5(ptr addrspace(1) inreg %ptr) { 9087; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_5: 9088; GFX900: ; %bb.0: 9089; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9090; GFX900-NEXT: ;;#ASMSTART 9091; GFX900-NEXT: ; def v[0:1] 9092; GFX900-NEXT: ;;#ASMEND 9093; GFX900-NEXT: s_mov_b32 s4, 0x7060302 9094; GFX900-NEXT: v_mov_b32_e32 v3, 0 9095; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 9096; GFX900-NEXT: v_mov_b32_e32 v2, v0 9097; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 9098; GFX900-NEXT: s_waitcnt vmcnt(0) 9099; GFX900-NEXT: s_setpc_b64 s[30:31] 9100; 9101; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_5: 9102; GFX90A: ; %bb.0: 9103; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9104; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 9105; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9106; GFX90A-NEXT: ;;#ASMSTART 9107; GFX90A-NEXT: ; def v[0:1] 9108; GFX90A-NEXT: ;;#ASMEND 9109; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 9110; GFX90A-NEXT: v_mov_b32_e32 v3, v0 9111; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 9112; GFX90A-NEXT: s_waitcnt vmcnt(0) 9113; GFX90A-NEXT: s_setpc_b64 s[30:31] 9114; 9115; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_5: 9116; GFX940: ; %bb.0: 9117; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9118; GFX940-NEXT: s_mov_b32 s2, 0x7060302 9119; GFX940-NEXT: v_mov_b32_e32 v4, 0 9120; GFX940-NEXT: ;;#ASMSTART 9121; GFX940-NEXT: ; def v[0:1] 9122; GFX940-NEXT: ;;#ASMEND 9123; GFX940-NEXT: s_nop 0 9124; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 9125; GFX940-NEXT: v_mov_b32_e32 v3, v0 9126; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 9127; GFX940-NEXT: s_waitcnt vmcnt(0) 9128; GFX940-NEXT: s_setpc_b64 s[30:31] 9129 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9130 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9131 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5> 9132 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9133 ret void 9134} 9135 9136define void @v_shuffle_v4i16_v4i16__7_7_0_5(ptr addrspace(1) inreg %ptr) { 9137; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_5: 9138; GFX900: ; %bb.0: 9139; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9140; GFX900-NEXT: ;;#ASMSTART 9141; GFX900-NEXT: ; def v[0:1] 9142; GFX900-NEXT: ;;#ASMEND 9143; GFX900-NEXT: ;;#ASMSTART 9144; GFX900-NEXT: ; def v[1:2] 9145; GFX900-NEXT: ;;#ASMEND 9146; GFX900-NEXT: s_mov_b32 s4, 0xffff 9147; GFX900-NEXT: v_bfi_b32 v1, s4, v0, v1 9148; GFX900-NEXT: s_mov_b32 s4, 0x7060302 9149; GFX900-NEXT: v_mov_b32_e32 v3, 0 9150; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 9151; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 9152; GFX900-NEXT: s_waitcnt vmcnt(0) 9153; GFX900-NEXT: s_setpc_b64 s[30:31] 9154; 9155; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_5: 9156; GFX90A: ; %bb.0: 9157; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9158; GFX90A-NEXT: ;;#ASMSTART 9159; GFX90A-NEXT: ; def v[0:1] 9160; GFX90A-NEXT: ;;#ASMEND 9161; GFX90A-NEXT: s_mov_b32 s4, 0xffff 9162; GFX90A-NEXT: ;;#ASMSTART 9163; GFX90A-NEXT: ; def v[2:3] 9164; GFX90A-NEXT: ;;#ASMEND 9165; GFX90A-NEXT: v_bfi_b32 v1, s4, v0, v2 9166; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 9167; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9168; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 9169; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 9170; GFX90A-NEXT: s_waitcnt vmcnt(0) 9171; GFX90A-NEXT: s_setpc_b64 s[30:31] 9172; 9173; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_5: 9174; GFX940: ; %bb.0: 9175; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9176; GFX940-NEXT: ;;#ASMSTART 9177; GFX940-NEXT: ; def v[0:1] 9178; GFX940-NEXT: ;;#ASMEND 9179; GFX940-NEXT: s_mov_b32 s2, 0xffff 9180; GFX940-NEXT: ;;#ASMSTART 9181; GFX940-NEXT: ; def v[2:3] 9182; GFX940-NEXT: ;;#ASMEND 9183; GFX940-NEXT: v_mov_b32_e32 v4, 0 9184; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v2 9185; GFX940-NEXT: s_mov_b32 s2, 0x7060302 9186; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 9187; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 9188; GFX940-NEXT: s_waitcnt vmcnt(0) 9189; GFX940-NEXT: s_setpc_b64 s[30:31] 9190 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9191 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9192 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5> 9193 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9194 ret void 9195} 9196 9197define void @v_shuffle_v4i16_v4i16__7_7_1_5(ptr addrspace(1) inreg %ptr) { 9198; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_5: 9199; GFX900: ; %bb.0: 9200; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9201; GFX900-NEXT: ;;#ASMSTART 9202; GFX900-NEXT: ; def v[0:1] 9203; GFX900-NEXT: ;;#ASMEND 9204; GFX900-NEXT: ;;#ASMSTART 9205; GFX900-NEXT: ; def v[1:2] 9206; GFX900-NEXT: ;;#ASMEND 9207; GFX900-NEXT: s_mov_b32 s4, 0x7060302 9208; GFX900-NEXT: v_mov_b32_e32 v3, 0 9209; GFX900-NEXT: v_perm_b32 v1, v1, v0, s4 9210; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 9211; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 9212; GFX900-NEXT: s_waitcnt vmcnt(0) 9213; GFX900-NEXT: s_setpc_b64 s[30:31] 9214; 9215; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_5: 9216; GFX90A: ; %bb.0: 9217; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9218; GFX90A-NEXT: ;;#ASMSTART 9219; GFX90A-NEXT: ; def v[0:1] 9220; GFX90A-NEXT: ;;#ASMEND 9221; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 9222; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9223; GFX90A-NEXT: ;;#ASMSTART 9224; GFX90A-NEXT: ; def v[2:3] 9225; GFX90A-NEXT: ;;#ASMEND 9226; GFX90A-NEXT: v_perm_b32 v1, v2, v0, s4 9227; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 9228; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 9229; GFX90A-NEXT: s_waitcnt vmcnt(0) 9230; GFX90A-NEXT: s_setpc_b64 s[30:31] 9231; 9232; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_5: 9233; GFX940: ; %bb.0: 9234; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9235; GFX940-NEXT: ;;#ASMSTART 9236; GFX940-NEXT: ; def v[0:1] 9237; GFX940-NEXT: ;;#ASMEND 9238; GFX940-NEXT: s_mov_b32 s2, 0x7060302 9239; GFX940-NEXT: v_mov_b32_e32 v4, 0 9240; GFX940-NEXT: ;;#ASMSTART 9241; GFX940-NEXT: ; def v[2:3] 9242; GFX940-NEXT: ;;#ASMEND 9243; GFX940-NEXT: s_nop 0 9244; GFX940-NEXT: v_perm_b32 v1, v2, v0, s2 9245; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 9246; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 9247; GFX940-NEXT: s_waitcnt vmcnt(0) 9248; GFX940-NEXT: s_setpc_b64 s[30:31] 9249 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9250 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9251 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5> 9252 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9253 ret void 9254} 9255 9256define void @v_shuffle_v4i16_v4i16__7_7_2_5(ptr addrspace(1) inreg %ptr) { 9257; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_5: 9258; GFX900: ; %bb.0: 9259; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9260; GFX900-NEXT: ;;#ASMSTART 9261; GFX900-NEXT: ; def v[0:1] 9262; GFX900-NEXT: ;;#ASMEND 9263; GFX900-NEXT: s_mov_b32 s4, 0xffff 9264; GFX900-NEXT: ;;#ASMSTART 9265; GFX900-NEXT: ; def v[2:3] 9266; GFX900-NEXT: ;;#ASMEND 9267; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v2 9268; GFX900-NEXT: s_mov_b32 s4, 0x7060302 9269; GFX900-NEXT: v_mov_b32_e32 v4, 0 9270; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 9271; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 9272; GFX900-NEXT: s_waitcnt vmcnt(0) 9273; GFX900-NEXT: s_setpc_b64 s[30:31] 9274; 9275; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_5: 9276; GFX90A: ; %bb.0: 9277; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9278; GFX90A-NEXT: ;;#ASMSTART 9279; GFX90A-NEXT: ; def v[0:1] 9280; GFX90A-NEXT: ;;#ASMEND 9281; GFX90A-NEXT: s_mov_b32 s4, 0xffff 9282; GFX90A-NEXT: ;;#ASMSTART 9283; GFX90A-NEXT: ; def v[2:3] 9284; GFX90A-NEXT: ;;#ASMEND 9285; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v2 9286; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 9287; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9288; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 9289; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 9290; GFX90A-NEXT: s_waitcnt vmcnt(0) 9291; GFX90A-NEXT: s_setpc_b64 s[30:31] 9292; 9293; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_5: 9294; GFX940: ; %bb.0: 9295; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9296; GFX940-NEXT: ;;#ASMSTART 9297; GFX940-NEXT: ; def v[0:1] 9298; GFX940-NEXT: ;;#ASMEND 9299; GFX940-NEXT: s_mov_b32 s2, 0xffff 9300; GFX940-NEXT: ;;#ASMSTART 9301; GFX940-NEXT: ; def v[2:3] 9302; GFX940-NEXT: ;;#ASMEND 9303; GFX940-NEXT: v_mov_b32_e32 v4, 0 9304; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v2 9305; GFX940-NEXT: s_mov_b32 s2, 0x7060302 9306; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 9307; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 9308; GFX940-NEXT: s_waitcnt vmcnt(0) 9309; GFX940-NEXT: s_setpc_b64 s[30:31] 9310 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9311 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9312 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5> 9313 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9314 ret void 9315} 9316 9317define void @v_shuffle_v4i16_v4i16__7_7_3_5(ptr addrspace(1) inreg %ptr) { 9318; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_5: 9319; GFX900: ; %bb.0: 9320; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9321; GFX900-NEXT: ;;#ASMSTART 9322; GFX900-NEXT: ; def v[0:1] 9323; GFX900-NEXT: ;;#ASMEND 9324; GFX900-NEXT: s_mov_b32 s4, 0x7060302 9325; GFX900-NEXT: v_mov_b32_e32 v4, 0 9326; GFX900-NEXT: ;;#ASMSTART 9327; GFX900-NEXT: ; def v[2:3] 9328; GFX900-NEXT: ;;#ASMEND 9329; GFX900-NEXT: v_perm_b32 v1, v2, v1, s4 9330; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 9331; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 9332; GFX900-NEXT: s_waitcnt vmcnt(0) 9333; GFX900-NEXT: s_setpc_b64 s[30:31] 9334; 9335; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_5: 9336; GFX90A: ; %bb.0: 9337; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9338; GFX90A-NEXT: ;;#ASMSTART 9339; GFX90A-NEXT: ; def v[0:1] 9340; GFX90A-NEXT: ;;#ASMEND 9341; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 9342; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9343; GFX90A-NEXT: ;;#ASMSTART 9344; GFX90A-NEXT: ; def v[2:3] 9345; GFX90A-NEXT: ;;#ASMEND 9346; GFX90A-NEXT: v_perm_b32 v1, v2, v1, s4 9347; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 9348; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 9349; GFX90A-NEXT: s_waitcnt vmcnt(0) 9350; GFX90A-NEXT: s_setpc_b64 s[30:31] 9351; 9352; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_5: 9353; GFX940: ; %bb.0: 9354; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9355; GFX940-NEXT: ;;#ASMSTART 9356; GFX940-NEXT: ; def v[0:1] 9357; GFX940-NEXT: ;;#ASMEND 9358; GFX940-NEXT: s_mov_b32 s2, 0x7060302 9359; GFX940-NEXT: v_mov_b32_e32 v4, 0 9360; GFX940-NEXT: ;;#ASMSTART 9361; GFX940-NEXT: ; def v[2:3] 9362; GFX940-NEXT: ;;#ASMEND 9363; GFX940-NEXT: s_nop 0 9364; GFX940-NEXT: v_perm_b32 v1, v2, v1, s2 9365; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 9366; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 9367; GFX940-NEXT: s_waitcnt vmcnt(0) 9368; GFX940-NEXT: s_setpc_b64 s[30:31] 9369 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9370 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9371 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5> 9372 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9373 ret void 9374} 9375 9376define void @v_shuffle_v4i16_v4i16__7_7_4_5(ptr addrspace(1) inreg %ptr) { 9377; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_5: 9378; GFX900: ; %bb.0: 9379; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9380; GFX900-NEXT: ;;#ASMSTART 9381; GFX900-NEXT: ; def v[0:1] 9382; GFX900-NEXT: ;;#ASMEND 9383; GFX900-NEXT: s_mov_b32 s4, 0x7060302 9384; GFX900-NEXT: v_mov_b32_e32 v3, 0 9385; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 9386; GFX900-NEXT: v_mov_b32_e32 v2, v0 9387; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 9388; GFX900-NEXT: s_waitcnt vmcnt(0) 9389; GFX900-NEXT: s_setpc_b64 s[30:31] 9390; 9391; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_5: 9392; GFX90A: ; %bb.0: 9393; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9394; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 9395; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9396; GFX90A-NEXT: ;;#ASMSTART 9397; GFX90A-NEXT: ; def v[0:1] 9398; GFX90A-NEXT: ;;#ASMEND 9399; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 9400; GFX90A-NEXT: v_mov_b32_e32 v3, v0 9401; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 9402; GFX90A-NEXT: s_waitcnt vmcnt(0) 9403; GFX90A-NEXT: s_setpc_b64 s[30:31] 9404; 9405; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_5: 9406; GFX940: ; %bb.0: 9407; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9408; GFX940-NEXT: s_mov_b32 s2, 0x7060302 9409; GFX940-NEXT: v_mov_b32_e32 v4, 0 9410; GFX940-NEXT: ;;#ASMSTART 9411; GFX940-NEXT: ; def v[0:1] 9412; GFX940-NEXT: ;;#ASMEND 9413; GFX940-NEXT: s_nop 0 9414; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 9415; GFX940-NEXT: v_mov_b32_e32 v3, v0 9416; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 9417; GFX940-NEXT: s_waitcnt vmcnt(0) 9418; GFX940-NEXT: s_setpc_b64 s[30:31] 9419 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9420 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9421 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5> 9422 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9423 ret void 9424} 9425 9426define void @v_shuffle_v4i16_v4i16__7_7_6_5(ptr addrspace(1) inreg %ptr) { 9427; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_5: 9428; GFX900: ; %bb.0: 9429; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9430; GFX900-NEXT: s_mov_b32 s4, 0xffff 9431; GFX900-NEXT: ;;#ASMSTART 9432; GFX900-NEXT: ; def v[0:1] 9433; GFX900-NEXT: ;;#ASMEND 9434; GFX900-NEXT: v_bfi_b32 v2, s4, v1, v0 9435; GFX900-NEXT: s_mov_b32 s4, 0x7060302 9436; GFX900-NEXT: v_mov_b32_e32 v3, 0 9437; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 9438; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 9439; GFX900-NEXT: s_waitcnt vmcnt(0) 9440; GFX900-NEXT: s_setpc_b64 s[30:31] 9441; 9442; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_5: 9443; GFX90A: ; %bb.0: 9444; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9445; GFX90A-NEXT: s_mov_b32 s4, 0xffff 9446; GFX90A-NEXT: ;;#ASMSTART 9447; GFX90A-NEXT: ; def v[0:1] 9448; GFX90A-NEXT: ;;#ASMEND 9449; GFX90A-NEXT: v_bfi_b32 v3, s4, v1, v0 9450; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 9451; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9452; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 9453; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 9454; GFX90A-NEXT: s_waitcnt vmcnt(0) 9455; GFX90A-NEXT: s_setpc_b64 s[30:31] 9456; 9457; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_5: 9458; GFX940: ; %bb.0: 9459; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9460; GFX940-NEXT: s_mov_b32 s2, 0xffff 9461; GFX940-NEXT: ;;#ASMSTART 9462; GFX940-NEXT: ; def v[0:1] 9463; GFX940-NEXT: ;;#ASMEND 9464; GFX940-NEXT: v_mov_b32_e32 v4, 0 9465; GFX940-NEXT: v_bfi_b32 v3, s2, v1, v0 9466; GFX940-NEXT: s_mov_b32 s2, 0x7060302 9467; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 9468; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 9469; GFX940-NEXT: s_waitcnt vmcnt(0) 9470; GFX940-NEXT: s_setpc_b64 s[30:31] 9471 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9472 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9473 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5> 9474 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9475 ret void 9476} 9477 9478define void @v_shuffle_v4i16_v4i16__u_6_6_6(ptr addrspace(1) inreg %ptr) { 9479; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_6_6_6: 9480; GFX900: ; %bb.0: 9481; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9482; GFX900-NEXT: ;;#ASMSTART 9483; GFX900-NEXT: ; def v[0:1] 9484; GFX900-NEXT: ;;#ASMEND 9485; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9486; GFX900-NEXT: v_mov_b32_e32 v3, 0 9487; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 9488; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1 9489; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 9490; GFX900-NEXT: s_waitcnt vmcnt(0) 9491; GFX900-NEXT: s_setpc_b64 s[30:31] 9492; 9493; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_6_6_6: 9494; GFX90A: ; %bb.0: 9495; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9496; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9497; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9498; GFX90A-NEXT: ;;#ASMSTART 9499; GFX90A-NEXT: ; def v[0:1] 9500; GFX90A-NEXT: ;;#ASMEND 9501; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 9502; GFX90A-NEXT: v_lshlrev_b32_e32 v2, 16, v1 9503; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 9504; GFX90A-NEXT: s_waitcnt vmcnt(0) 9505; GFX90A-NEXT: s_setpc_b64 s[30:31] 9506; 9507; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_6_6_6: 9508; GFX940: ; %bb.0: 9509; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9510; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9511; GFX940-NEXT: v_mov_b32_e32 v4, 0 9512; GFX940-NEXT: ;;#ASMSTART 9513; GFX940-NEXT: ; def v[0:1] 9514; GFX940-NEXT: ;;#ASMEND 9515; GFX940-NEXT: s_nop 0 9516; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 9517; GFX940-NEXT: v_lshlrev_b32_e32 v2, 16, v1 9518; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 9519; GFX940-NEXT: s_waitcnt vmcnt(0) 9520; GFX940-NEXT: s_setpc_b64 s[30:31] 9521 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9522 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9523 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6> 9524 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9525 ret void 9526} 9527 9528define void @v_shuffle_v4i16_v4i16__0_6_6_6(ptr addrspace(1) inreg %ptr) { 9529; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_6_6_6: 9530; GFX900: ; %bb.0: 9531; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9532; GFX900-NEXT: ;;#ASMSTART 9533; GFX900-NEXT: ; def v[0:1] 9534; GFX900-NEXT: ;;#ASMEND 9535; GFX900-NEXT: ;;#ASMSTART 9536; GFX900-NEXT: ; def v[1:2] 9537; GFX900-NEXT: ;;#ASMEND 9538; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9539; GFX900-NEXT: v_mov_b32_e32 v3, 0 9540; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 9541; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 9542; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 9543; GFX900-NEXT: s_waitcnt vmcnt(0) 9544; GFX900-NEXT: s_setpc_b64 s[30:31] 9545; 9546; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_6_6_6: 9547; GFX90A: ; %bb.0: 9548; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9549; GFX90A-NEXT: ;;#ASMSTART 9550; GFX90A-NEXT: ; def v[0:1] 9551; GFX90A-NEXT: ;;#ASMEND 9552; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9553; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9554; GFX90A-NEXT: ;;#ASMSTART 9555; GFX90A-NEXT: ; def v[2:3] 9556; GFX90A-NEXT: ;;#ASMEND 9557; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 9558; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 9559; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 9560; GFX90A-NEXT: s_waitcnt vmcnt(0) 9561; GFX90A-NEXT: s_setpc_b64 s[30:31] 9562; 9563; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_6_6_6: 9564; GFX940: ; %bb.0: 9565; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9566; GFX940-NEXT: ;;#ASMSTART 9567; GFX940-NEXT: ; def v[0:1] 9568; GFX940-NEXT: ;;#ASMEND 9569; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9570; GFX940-NEXT: v_mov_b32_e32 v4, 0 9571; GFX940-NEXT: ;;#ASMSTART 9572; GFX940-NEXT: ; def v[2:3] 9573; GFX940-NEXT: ;;#ASMEND 9574; GFX940-NEXT: s_nop 0 9575; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 9576; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 9577; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 9578; GFX940-NEXT: s_waitcnt vmcnt(0) 9579; GFX940-NEXT: s_setpc_b64 s[30:31] 9580 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9581 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9582 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6> 9583 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9584 ret void 9585} 9586 9587define void @v_shuffle_v4i16_v4i16__1_6_6_6(ptr addrspace(1) inreg %ptr) { 9588; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_6_6_6: 9589; GFX900: ; %bb.0: 9590; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9591; GFX900-NEXT: ;;#ASMSTART 9592; GFX900-NEXT: ; def v[0:1] 9593; GFX900-NEXT: ;;#ASMEND 9594; GFX900-NEXT: ;;#ASMSTART 9595; GFX900-NEXT: ; def v[1:2] 9596; GFX900-NEXT: ;;#ASMEND 9597; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9598; GFX900-NEXT: v_mov_b32_e32 v3, 0 9599; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 9600; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16 9601; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 9602; GFX900-NEXT: s_waitcnt vmcnt(0) 9603; GFX900-NEXT: s_setpc_b64 s[30:31] 9604; 9605; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_6_6_6: 9606; GFX90A: ; %bb.0: 9607; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9608; GFX90A-NEXT: ;;#ASMSTART 9609; GFX90A-NEXT: ; def v[0:1] 9610; GFX90A-NEXT: ;;#ASMEND 9611; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9612; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9613; GFX90A-NEXT: ;;#ASMSTART 9614; GFX90A-NEXT: ; def v[2:3] 9615; GFX90A-NEXT: ;;#ASMEND 9616; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 9617; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16 9618; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 9619; GFX90A-NEXT: s_waitcnt vmcnt(0) 9620; GFX90A-NEXT: s_setpc_b64 s[30:31] 9621; 9622; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_6_6_6: 9623; GFX940: ; %bb.0: 9624; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9625; GFX940-NEXT: ;;#ASMSTART 9626; GFX940-NEXT: ; def v[0:1] 9627; GFX940-NEXT: ;;#ASMEND 9628; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9629; GFX940-NEXT: v_mov_b32_e32 v4, 0 9630; GFX940-NEXT: ;;#ASMSTART 9631; GFX940-NEXT: ; def v[2:3] 9632; GFX940-NEXT: ;;#ASMEND 9633; GFX940-NEXT: s_nop 0 9634; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 9635; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 9636; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 9637; GFX940-NEXT: s_waitcnt vmcnt(0) 9638; GFX940-NEXT: s_setpc_b64 s[30:31] 9639 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9640 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9641 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6> 9642 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9643 ret void 9644} 9645 9646define void @v_shuffle_v4i16_v4i16__2_6_6_6(ptr addrspace(1) inreg %ptr) { 9647; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_6_6_6: 9648; GFX900: ; %bb.0: 9649; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9650; GFX900-NEXT: ;;#ASMSTART 9651; GFX900-NEXT: ; def v[0:1] 9652; GFX900-NEXT: ;;#ASMEND 9653; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9654; GFX900-NEXT: v_mov_b32_e32 v4, 0 9655; GFX900-NEXT: ;;#ASMSTART 9656; GFX900-NEXT: ; def v[2:3] 9657; GFX900-NEXT: ;;#ASMEND 9658; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 9659; GFX900-NEXT: v_perm_b32 v1, v3, v3, s4 9660; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 9661; GFX900-NEXT: s_waitcnt vmcnt(0) 9662; GFX900-NEXT: s_setpc_b64 s[30:31] 9663; 9664; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_6_6_6: 9665; GFX90A: ; %bb.0: 9666; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9667; GFX90A-NEXT: ;;#ASMSTART 9668; GFX90A-NEXT: ; def v[0:1] 9669; GFX90A-NEXT: ;;#ASMEND 9670; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9671; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9672; GFX90A-NEXT: ;;#ASMSTART 9673; GFX90A-NEXT: ; def v[2:3] 9674; GFX90A-NEXT: ;;#ASMEND 9675; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 9676; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 9677; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 9678; GFX90A-NEXT: s_waitcnt vmcnt(0) 9679; GFX90A-NEXT: s_setpc_b64 s[30:31] 9680; 9681; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_6_6_6: 9682; GFX940: ; %bb.0: 9683; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9684; GFX940-NEXT: ;;#ASMSTART 9685; GFX940-NEXT: ; def v[0:1] 9686; GFX940-NEXT: ;;#ASMEND 9687; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9688; GFX940-NEXT: v_mov_b32_e32 v4, 0 9689; GFX940-NEXT: ;;#ASMSTART 9690; GFX940-NEXT: ; def v[2:3] 9691; GFX940-NEXT: ;;#ASMEND 9692; GFX940-NEXT: s_nop 0 9693; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 9694; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 9695; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 9696; GFX940-NEXT: s_waitcnt vmcnt(0) 9697; GFX940-NEXT: s_setpc_b64 s[30:31] 9698 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9699 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9700 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6> 9701 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9702 ret void 9703} 9704 9705define void @v_shuffle_v4i16_v4i16__3_6_6_6(ptr addrspace(1) inreg %ptr) { 9706; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_6_6_6: 9707; GFX900: ; %bb.0: 9708; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9709; GFX900-NEXT: ;;#ASMSTART 9710; GFX900-NEXT: ; def v[0:1] 9711; GFX900-NEXT: ;;#ASMEND 9712; GFX900-NEXT: ;;#ASMSTART 9713; GFX900-NEXT: ; def v[2:3] 9714; GFX900-NEXT: ;;#ASMEND 9715; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9716; GFX900-NEXT: v_mov_b32_e32 v4, 0 9717; GFX900-NEXT: v_perm_b32 v2, v3, v3, s4 9718; GFX900-NEXT: v_alignbit_b32 v1, v3, v1, 16 9719; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 9720; GFX900-NEXT: s_waitcnt vmcnt(0) 9721; GFX900-NEXT: s_setpc_b64 s[30:31] 9722; 9723; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_6_6_6: 9724; GFX90A: ; %bb.0: 9725; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9726; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9727; GFX90A-NEXT: v_mov_b32_e32 v6, 0 9728; GFX90A-NEXT: ;;#ASMSTART 9729; GFX90A-NEXT: ; def v[0:1] 9730; GFX90A-NEXT: ;;#ASMEND 9731; GFX90A-NEXT: ;;#ASMSTART 9732; GFX90A-NEXT: ; def v[2:3] 9733; GFX90A-NEXT: ;;#ASMEND 9734; GFX90A-NEXT: v_perm_b32 v5, v3, v3, s4 9735; GFX90A-NEXT: v_alignbit_b32 v4, v3, v1, 16 9736; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] 9737; GFX90A-NEXT: s_waitcnt vmcnt(0) 9738; GFX90A-NEXT: s_setpc_b64 s[30:31] 9739; 9740; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_6_6_6: 9741; GFX940: ; %bb.0: 9742; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9743; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9744; GFX940-NEXT: v_mov_b32_e32 v6, 0 9745; GFX940-NEXT: ;;#ASMSTART 9746; GFX940-NEXT: ; def v[0:1] 9747; GFX940-NEXT: ;;#ASMEND 9748; GFX940-NEXT: ;;#ASMSTART 9749; GFX940-NEXT: ; def v[2:3] 9750; GFX940-NEXT: ;;#ASMEND 9751; GFX940-NEXT: s_nop 0 9752; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 9753; GFX940-NEXT: v_alignbit_b32 v4, v3, v1, 16 9754; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 9755; GFX940-NEXT: s_waitcnt vmcnt(0) 9756; GFX940-NEXT: s_setpc_b64 s[30:31] 9757 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9758 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9759 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6> 9760 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9761 ret void 9762} 9763 9764define void @v_shuffle_v4i16_v4i16__4_6_6_6(ptr addrspace(1) inreg %ptr) { 9765; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_6_6_6: 9766; GFX900: ; %bb.0: 9767; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9768; GFX900-NEXT: ;;#ASMSTART 9769; GFX900-NEXT: ; def v[0:1] 9770; GFX900-NEXT: ;;#ASMEND 9771; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9772; GFX900-NEXT: v_mov_b32_e32 v2, 0 9773; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 9774; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 9775; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 9776; GFX900-NEXT: s_waitcnt vmcnt(0) 9777; GFX900-NEXT: s_setpc_b64 s[30:31] 9778; 9779; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_6_6_6: 9780; GFX90A: ; %bb.0: 9781; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9782; GFX90A-NEXT: ;;#ASMSTART 9783; GFX90A-NEXT: ; def v[0:1] 9784; GFX90A-NEXT: ;;#ASMEND 9785; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9786; GFX90A-NEXT: v_mov_b32_e32 v2, 0 9787; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 9788; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 9789; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 9790; GFX90A-NEXT: s_waitcnt vmcnt(0) 9791; GFX90A-NEXT: s_setpc_b64 s[30:31] 9792; 9793; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_6_6_6: 9794; GFX940: ; %bb.0: 9795; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9796; GFX940-NEXT: ;;#ASMSTART 9797; GFX940-NEXT: ; def v[0:1] 9798; GFX940-NEXT: ;;#ASMEND 9799; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9800; GFX940-NEXT: v_mov_b32_e32 v2, 0 9801; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 9802; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 9803; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 9804; GFX940-NEXT: s_waitcnt vmcnt(0) 9805; GFX940-NEXT: s_setpc_b64 s[30:31] 9806 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9807 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9808 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6> 9809 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9810 ret void 9811} 9812 9813define void @v_shuffle_v4i16_v4i16__5_6_6_6(ptr addrspace(1) inreg %ptr) { 9814; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_6_6_6: 9815; GFX900: ; %bb.0: 9816; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9817; GFX900-NEXT: ;;#ASMSTART 9818; GFX900-NEXT: ; def v[0:1] 9819; GFX900-NEXT: ;;#ASMEND 9820; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9821; GFX900-NEXT: v_mov_b32_e32 v3, 0 9822; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 9823; GFX900-NEXT: v_alignbit_b32 v1, v1, v0, 16 9824; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 9825; GFX900-NEXT: s_waitcnt vmcnt(0) 9826; GFX900-NEXT: s_setpc_b64 s[30:31] 9827; 9828; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_6_6_6: 9829; GFX90A: ; %bb.0: 9830; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9831; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9832; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9833; GFX90A-NEXT: ;;#ASMSTART 9834; GFX90A-NEXT: ; def v[0:1] 9835; GFX90A-NEXT: ;;#ASMEND 9836; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 9837; GFX90A-NEXT: v_alignbit_b32 v2, v1, v0, 16 9838; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 9839; GFX90A-NEXT: s_waitcnt vmcnt(0) 9840; GFX90A-NEXT: s_setpc_b64 s[30:31] 9841; 9842; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_6_6_6: 9843; GFX940: ; %bb.0: 9844; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9845; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9846; GFX940-NEXT: v_mov_b32_e32 v4, 0 9847; GFX940-NEXT: ;;#ASMSTART 9848; GFX940-NEXT: ; def v[0:1] 9849; GFX940-NEXT: ;;#ASMEND 9850; GFX940-NEXT: s_nop 0 9851; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 9852; GFX940-NEXT: v_alignbit_b32 v2, v1, v0, 16 9853; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 9854; GFX940-NEXT: s_waitcnt vmcnt(0) 9855; GFX940-NEXT: s_setpc_b64 s[30:31] 9856 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9857 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9858 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6> 9859 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9860 ret void 9861} 9862 9863define void @v_shuffle_v4i16_v4i16__6_6_6_6(ptr addrspace(1) inreg %ptr) { 9864; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_6_6_6: 9865; GFX900: ; %bb.0: 9866; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9867; GFX900-NEXT: ;;#ASMSTART 9868; GFX900-NEXT: ; def v[0:1] 9869; GFX900-NEXT: ;;#ASMEND 9870; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9871; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 9872; GFX900-NEXT: v_mov_b32_e32 v2, 0 9873; GFX900-NEXT: v_mov_b32_e32 v1, v0 9874; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 9875; GFX900-NEXT: s_waitcnt vmcnt(0) 9876; GFX900-NEXT: s_setpc_b64 s[30:31] 9877; 9878; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_6_6_6: 9879; GFX90A: ; %bb.0: 9880; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9881; GFX90A-NEXT: ;;#ASMSTART 9882; GFX90A-NEXT: ; def v[0:1] 9883; GFX90A-NEXT: ;;#ASMEND 9884; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9885; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 9886; GFX90A-NEXT: v_mov_b32_e32 v2, 0 9887; GFX90A-NEXT: v_mov_b32_e32 v1, v0 9888; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 9889; GFX90A-NEXT: s_waitcnt vmcnt(0) 9890; GFX90A-NEXT: s_setpc_b64 s[30:31] 9891; 9892; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_6_6_6: 9893; GFX940: ; %bb.0: 9894; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9895; GFX940-NEXT: ;;#ASMSTART 9896; GFX940-NEXT: ; def v[0:1] 9897; GFX940-NEXT: ;;#ASMEND 9898; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9899; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 9900; GFX940-NEXT: v_mov_b32_e32 v2, 0 9901; GFX940-NEXT: v_mov_b32_e32 v1, v0 9902; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 9903; GFX940-NEXT: s_waitcnt vmcnt(0) 9904; GFX940-NEXT: s_setpc_b64 s[30:31] 9905 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9906 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9907 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6> 9908 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9909 ret void 9910} 9911 9912define void @v_shuffle_v4i16_v4i16__7_6_6_6(ptr addrspace(1) inreg %ptr) { 9913; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_6_6: 9914; GFX900: ; %bb.0: 9915; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9916; GFX900-NEXT: ;;#ASMSTART 9917; GFX900-NEXT: ; def v[0:1] 9918; GFX900-NEXT: ;;#ASMEND 9919; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9920; GFX900-NEXT: v_mov_b32_e32 v3, 0 9921; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 9922; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16 9923; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 9924; GFX900-NEXT: s_waitcnt vmcnt(0) 9925; GFX900-NEXT: s_setpc_b64 s[30:31] 9926; 9927; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_6_6: 9928; GFX90A: ; %bb.0: 9929; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9930; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9931; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9932; GFX90A-NEXT: ;;#ASMSTART 9933; GFX90A-NEXT: ; def v[0:1] 9934; GFX90A-NEXT: ;;#ASMEND 9935; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 9936; GFX90A-NEXT: v_alignbit_b32 v2, v1, v1, 16 9937; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 9938; GFX90A-NEXT: s_waitcnt vmcnt(0) 9939; GFX90A-NEXT: s_setpc_b64 s[30:31] 9940; 9941; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_6_6: 9942; GFX940: ; %bb.0: 9943; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9944; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9945; GFX940-NEXT: v_mov_b32_e32 v4, 0 9946; GFX940-NEXT: ;;#ASMSTART 9947; GFX940-NEXT: ; def v[0:1] 9948; GFX940-NEXT: ;;#ASMEND 9949; GFX940-NEXT: s_nop 0 9950; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 9951; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 9952; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 9953; GFX940-NEXT: s_waitcnt vmcnt(0) 9954; GFX940-NEXT: s_setpc_b64 s[30:31] 9955 %vec0 = call <4 x i16> asm "; def $0", "=v"() 9956 %vec1 = call <4 x i16> asm "; def $0", "=v"() 9957 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 6, i32 6> 9958 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 9959 ret void 9960} 9961 9962define void @v_shuffle_v4i16_v4i16__7_u_6_6(ptr addrspace(1) inreg %ptr) { 9963; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_6_6: 9964; GFX900: ; %bb.0: 9965; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9966; GFX900-NEXT: ;;#ASMSTART 9967; GFX900-NEXT: ; def v[0:1] 9968; GFX900-NEXT: ;;#ASMEND 9969; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 9970; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9971; GFX900-NEXT: v_mov_b32_e32 v2, 0 9972; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 9973; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 9974; GFX900-NEXT: s_waitcnt vmcnt(0) 9975; GFX900-NEXT: s_setpc_b64 s[30:31] 9976; 9977; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_6_6: 9978; GFX90A: ; %bb.0: 9979; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9980; GFX90A-NEXT: ;;#ASMSTART 9981; GFX90A-NEXT: ; def v[0:1] 9982; GFX90A-NEXT: ;;#ASMEND 9983; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 9984; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9985; GFX90A-NEXT: v_mov_b32_e32 v2, 0 9986; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 9987; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 9988; GFX90A-NEXT: s_waitcnt vmcnt(0) 9989; GFX90A-NEXT: s_setpc_b64 s[30:31] 9990; 9991; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_6_6: 9992; GFX940: ; %bb.0: 9993; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9994; GFX940-NEXT: ;;#ASMSTART 9995; GFX940-NEXT: ; def v[0:1] 9996; GFX940-NEXT: ;;#ASMEND 9997; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9998; GFX940-NEXT: v_mov_b32_e32 v2, 0 9999; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 10000; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 10001; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 10002; GFX940-NEXT: s_waitcnt vmcnt(0) 10003; GFX940-NEXT: s_setpc_b64 s[30:31] 10004 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10005 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10006 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 6, i32 6> 10007 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10008 ret void 10009} 10010 10011define void @v_shuffle_v4i16_v4i16__7_0_6_6(ptr addrspace(1) inreg %ptr) { 10012; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_6_6: 10013; GFX900: ; %bb.0: 10014; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10015; GFX900-NEXT: ;;#ASMSTART 10016; GFX900-NEXT: ; def v[0:1] 10017; GFX900-NEXT: ;;#ASMEND 10018; GFX900-NEXT: ;;#ASMSTART 10019; GFX900-NEXT: ; def v[1:2] 10020; GFX900-NEXT: ;;#ASMEND 10021; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10022; GFX900-NEXT: v_mov_b32_e32 v3, 0 10023; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 10024; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 10025; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 10026; GFX900-NEXT: s_waitcnt vmcnt(0) 10027; GFX900-NEXT: s_setpc_b64 s[30:31] 10028; 10029; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_6_6: 10030; GFX90A: ; %bb.0: 10031; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10032; GFX90A-NEXT: ;;#ASMSTART 10033; GFX90A-NEXT: ; def v[0:1] 10034; GFX90A-NEXT: ;;#ASMEND 10035; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10036; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10037; GFX90A-NEXT: ;;#ASMSTART 10038; GFX90A-NEXT: ; def v[2:3] 10039; GFX90A-NEXT: ;;#ASMEND 10040; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 10041; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 10042; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10043; GFX90A-NEXT: s_waitcnt vmcnt(0) 10044; GFX90A-NEXT: s_setpc_b64 s[30:31] 10045; 10046; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_6_6: 10047; GFX940: ; %bb.0: 10048; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10049; GFX940-NEXT: ;;#ASMSTART 10050; GFX940-NEXT: ; def v[0:1] 10051; GFX940-NEXT: ;;#ASMEND 10052; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10053; GFX940-NEXT: v_mov_b32_e32 v4, 0 10054; GFX940-NEXT: ;;#ASMSTART 10055; GFX940-NEXT: ; def v[2:3] 10056; GFX940-NEXT: ;;#ASMEND 10057; GFX940-NEXT: s_nop 0 10058; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 10059; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 10060; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 10061; GFX940-NEXT: s_waitcnt vmcnt(0) 10062; GFX940-NEXT: s_setpc_b64 s[30:31] 10063 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10064 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10065 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 6, i32 6> 10066 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10067 ret void 10068} 10069 10070define void @v_shuffle_v4i16_v4i16__7_1_6_6(ptr addrspace(1) inreg %ptr) { 10071; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_6_6: 10072; GFX900: ; %bb.0: 10073; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10074; GFX900-NEXT: ;;#ASMSTART 10075; GFX900-NEXT: ; def v[0:1] 10076; GFX900-NEXT: ;;#ASMEND 10077; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10078; GFX900-NEXT: ;;#ASMSTART 10079; GFX900-NEXT: ; def v[1:2] 10080; GFX900-NEXT: ;;#ASMEND 10081; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 10082; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10083; GFX900-NEXT: v_mov_b32_e32 v3, 0 10084; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 10085; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 10086; GFX900-NEXT: s_waitcnt vmcnt(0) 10087; GFX900-NEXT: s_setpc_b64 s[30:31] 10088; 10089; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_6_6: 10090; GFX90A: ; %bb.0: 10091; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10092; GFX90A-NEXT: ;;#ASMSTART 10093; GFX90A-NEXT: ; def v[0:1] 10094; GFX90A-NEXT: ;;#ASMEND 10095; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10096; GFX90A-NEXT: ;;#ASMSTART 10097; GFX90A-NEXT: ; def v[2:3] 10098; GFX90A-NEXT: ;;#ASMEND 10099; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 10100; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10101; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10102; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 10103; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10104; GFX90A-NEXT: s_waitcnt vmcnt(0) 10105; GFX90A-NEXT: s_setpc_b64 s[30:31] 10106; 10107; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_6_6: 10108; GFX940: ; %bb.0: 10109; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10110; GFX940-NEXT: ;;#ASMSTART 10111; GFX940-NEXT: ; def v[0:1] 10112; GFX940-NEXT: ;;#ASMEND 10113; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10114; GFX940-NEXT: ;;#ASMSTART 10115; GFX940-NEXT: ; def v[2:3] 10116; GFX940-NEXT: ;;#ASMEND 10117; GFX940-NEXT: v_mov_b32_e32 v4, 0 10118; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 10119; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10120; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 10121; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 10122; GFX940-NEXT: s_waitcnt vmcnt(0) 10123; GFX940-NEXT: s_setpc_b64 s[30:31] 10124 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10125 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10126 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 6, i32 6> 10127 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10128 ret void 10129} 10130 10131define void @v_shuffle_v4i16_v4i16__7_2_6_6(ptr addrspace(1) inreg %ptr) { 10132; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_6_6: 10133; GFX900: ; %bb.0: 10134; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10135; GFX900-NEXT: ;;#ASMSTART 10136; GFX900-NEXT: ; def v[0:1] 10137; GFX900-NEXT: ;;#ASMEND 10138; GFX900-NEXT: ;;#ASMSTART 10139; GFX900-NEXT: ; def v[2:3] 10140; GFX900-NEXT: ;;#ASMEND 10141; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10142; GFX900-NEXT: v_mov_b32_e32 v4, 0 10143; GFX900-NEXT: v_perm_b32 v2, v3, v3, s4 10144; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 10145; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 10146; GFX900-NEXT: s_waitcnt vmcnt(0) 10147; GFX900-NEXT: s_setpc_b64 s[30:31] 10148; 10149; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_6_6: 10150; GFX90A: ; %bb.0: 10151; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10152; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10153; GFX90A-NEXT: v_mov_b32_e32 v6, 0 10154; GFX90A-NEXT: ;;#ASMSTART 10155; GFX90A-NEXT: ; def v[0:1] 10156; GFX90A-NEXT: ;;#ASMEND 10157; GFX90A-NEXT: ;;#ASMSTART 10158; GFX90A-NEXT: ; def v[2:3] 10159; GFX90A-NEXT: ;;#ASMEND 10160; GFX90A-NEXT: v_perm_b32 v5, v3, v3, s4 10161; GFX90A-NEXT: v_alignbit_b32 v4, v1, v3, 16 10162; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] 10163; GFX90A-NEXT: s_waitcnt vmcnt(0) 10164; GFX90A-NEXT: s_setpc_b64 s[30:31] 10165; 10166; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_6_6: 10167; GFX940: ; %bb.0: 10168; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10169; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10170; GFX940-NEXT: v_mov_b32_e32 v6, 0 10171; GFX940-NEXT: ;;#ASMSTART 10172; GFX940-NEXT: ; def v[0:1] 10173; GFX940-NEXT: ;;#ASMEND 10174; GFX940-NEXT: ;;#ASMSTART 10175; GFX940-NEXT: ; def v[2:3] 10176; GFX940-NEXT: ;;#ASMEND 10177; GFX940-NEXT: s_nop 0 10178; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 10179; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 10180; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 10181; GFX940-NEXT: s_waitcnt vmcnt(0) 10182; GFX940-NEXT: s_setpc_b64 s[30:31] 10183 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10184 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10185 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 6, i32 6> 10186 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10187 ret void 10188} 10189 10190define void @v_shuffle_v4i16_v4i16__7_3_6_6(ptr addrspace(1) inreg %ptr) { 10191; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_6_6: 10192; GFX900: ; %bb.0: 10193; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10194; GFX900-NEXT: ;;#ASMSTART 10195; GFX900-NEXT: ; def v[0:1] 10196; GFX900-NEXT: ;;#ASMEND 10197; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10198; GFX900-NEXT: ;;#ASMSTART 10199; GFX900-NEXT: ; def v[2:3] 10200; GFX900-NEXT: ;;#ASMEND 10201; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 10202; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10203; GFX900-NEXT: v_mov_b32_e32 v4, 0 10204; GFX900-NEXT: v_perm_b32 v1, v3, v3, s4 10205; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10206; GFX900-NEXT: s_waitcnt vmcnt(0) 10207; GFX900-NEXT: s_setpc_b64 s[30:31] 10208; 10209; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_6_6: 10210; GFX90A: ; %bb.0: 10211; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10212; GFX90A-NEXT: ;;#ASMSTART 10213; GFX90A-NEXT: ; def v[0:1] 10214; GFX90A-NEXT: ;;#ASMEND 10215; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10216; GFX90A-NEXT: ;;#ASMSTART 10217; GFX90A-NEXT: ; def v[2:3] 10218; GFX90A-NEXT: ;;#ASMEND 10219; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 10220; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10221; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10222; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 10223; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10224; GFX90A-NEXT: s_waitcnt vmcnt(0) 10225; GFX90A-NEXT: s_setpc_b64 s[30:31] 10226; 10227; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_6_6: 10228; GFX940: ; %bb.0: 10229; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10230; GFX940-NEXT: ;;#ASMSTART 10231; GFX940-NEXT: ; def v[0:1] 10232; GFX940-NEXT: ;;#ASMEND 10233; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10234; GFX940-NEXT: ;;#ASMSTART 10235; GFX940-NEXT: ; def v[2:3] 10236; GFX940-NEXT: ;;#ASMEND 10237; GFX940-NEXT: v_mov_b32_e32 v4, 0 10238; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 10239; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10240; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 10241; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 10242; GFX940-NEXT: s_waitcnt vmcnt(0) 10243; GFX940-NEXT: s_setpc_b64 s[30:31] 10244 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10245 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10246 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 6, i32 6> 10247 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10248 ret void 10249} 10250 10251define void @v_shuffle_v4i16_v4i16__7_4_6_6(ptr addrspace(1) inreg %ptr) { 10252; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_6_6: 10253; GFX900: ; %bb.0: 10254; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10255; GFX900-NEXT: ;;#ASMSTART 10256; GFX900-NEXT: ; def v[0:1] 10257; GFX900-NEXT: ;;#ASMEND 10258; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10259; GFX900-NEXT: v_mov_b32_e32 v2, 0 10260; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 10261; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 10262; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 10263; GFX900-NEXT: s_waitcnt vmcnt(0) 10264; GFX900-NEXT: s_setpc_b64 s[30:31] 10265; 10266; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_6_6: 10267; GFX90A: ; %bb.0: 10268; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10269; GFX90A-NEXT: ;;#ASMSTART 10270; GFX90A-NEXT: ; def v[0:1] 10271; GFX90A-NEXT: ;;#ASMEND 10272; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10273; GFX90A-NEXT: v_mov_b32_e32 v2, 0 10274; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 10275; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 10276; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 10277; GFX90A-NEXT: s_waitcnt vmcnt(0) 10278; GFX90A-NEXT: s_setpc_b64 s[30:31] 10279; 10280; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_6_6: 10281; GFX940: ; %bb.0: 10282; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10283; GFX940-NEXT: ;;#ASMSTART 10284; GFX940-NEXT: ; def v[0:1] 10285; GFX940-NEXT: ;;#ASMEND 10286; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10287; GFX940-NEXT: v_mov_b32_e32 v2, 0 10288; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 10289; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 10290; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 10291; GFX940-NEXT: s_waitcnt vmcnt(0) 10292; GFX940-NEXT: s_setpc_b64 s[30:31] 10293 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10294 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10295 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 6, i32 6> 10296 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10297 ret void 10298} 10299 10300define void @v_shuffle_v4i16_v4i16__7_5_6_6(ptr addrspace(1) inreg %ptr) { 10301; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_6_6: 10302; GFX900: ; %bb.0: 10303; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10304; GFX900-NEXT: ;;#ASMSTART 10305; GFX900-NEXT: ; def v[0:1] 10306; GFX900-NEXT: ;;#ASMEND 10307; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10308; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 10309; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10310; GFX900-NEXT: v_mov_b32_e32 v2, 0 10311; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 10312; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 10313; GFX900-NEXT: s_waitcnt vmcnt(0) 10314; GFX900-NEXT: s_setpc_b64 s[30:31] 10315; 10316; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_6_6: 10317; GFX90A: ; %bb.0: 10318; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10319; GFX90A-NEXT: ;;#ASMSTART 10320; GFX90A-NEXT: ; def v[0:1] 10321; GFX90A-NEXT: ;;#ASMEND 10322; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10323; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 10324; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10325; GFX90A-NEXT: v_mov_b32_e32 v2, 0 10326; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 10327; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 10328; GFX90A-NEXT: s_waitcnt vmcnt(0) 10329; GFX90A-NEXT: s_setpc_b64 s[30:31] 10330; 10331; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_6_6: 10332; GFX940: ; %bb.0: 10333; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10334; GFX940-NEXT: ;;#ASMSTART 10335; GFX940-NEXT: ; def v[0:1] 10336; GFX940-NEXT: ;;#ASMEND 10337; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10338; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 10339; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10340; GFX940-NEXT: v_mov_b32_e32 v2, 0 10341; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 10342; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 10343; GFX940-NEXT: s_waitcnt vmcnt(0) 10344; GFX940-NEXT: s_setpc_b64 s[30:31] 10345 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10346 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10347 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 6, i32 6> 10348 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10349 ret void 10350} 10351 10352define void @v_shuffle_v4i16_v4i16__7_7_6_6(ptr addrspace(1) inreg %ptr) { 10353; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_6: 10354; GFX900: ; %bb.0: 10355; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10356; GFX900-NEXT: ;;#ASMSTART 10357; GFX900-NEXT: ; def v[0:1] 10358; GFX900-NEXT: ;;#ASMEND 10359; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10360; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 10361; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10362; GFX900-NEXT: v_mov_b32_e32 v2, 0 10363; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 10364; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 10365; GFX900-NEXT: s_waitcnt vmcnt(0) 10366; GFX900-NEXT: s_setpc_b64 s[30:31] 10367; 10368; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_6: 10369; GFX90A: ; %bb.0: 10370; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10371; GFX90A-NEXT: ;;#ASMSTART 10372; GFX90A-NEXT: ; def v[0:1] 10373; GFX90A-NEXT: ;;#ASMEND 10374; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10375; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 10376; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10377; GFX90A-NEXT: v_mov_b32_e32 v2, 0 10378; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 10379; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 10380; GFX90A-NEXT: s_waitcnt vmcnt(0) 10381; GFX90A-NEXT: s_setpc_b64 s[30:31] 10382; 10383; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_6: 10384; GFX940: ; %bb.0: 10385; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10386; GFX940-NEXT: ;;#ASMSTART 10387; GFX940-NEXT: ; def v[0:1] 10388; GFX940-NEXT: ;;#ASMEND 10389; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10390; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 10391; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10392; GFX940-NEXT: v_mov_b32_e32 v2, 0 10393; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 10394; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 10395; GFX940-NEXT: s_waitcnt vmcnt(0) 10396; GFX940-NEXT: s_setpc_b64 s[30:31] 10397 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10398 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10399 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 6> 10400 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10401 ret void 10402} 10403 10404define void @v_shuffle_v4i16_v4i16__7_7_u_6(ptr addrspace(1) inreg %ptr) { 10405; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_6: 10406; GFX900: ; %bb.0: 10407; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10408; GFX900-NEXT: ;;#ASMSTART 10409; GFX900-NEXT: ; def v[0:1] 10410; GFX900-NEXT: ;;#ASMEND 10411; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10412; GFX900-NEXT: v_mov_b32_e32 v2, 0 10413; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 10414; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v1 10415; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 10416; GFX900-NEXT: s_waitcnt vmcnt(0) 10417; GFX900-NEXT: s_setpc_b64 s[30:31] 10418; 10419; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_6: 10420; GFX90A: ; %bb.0: 10421; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10422; GFX90A-NEXT: ;;#ASMSTART 10423; GFX90A-NEXT: ; def v[0:1] 10424; GFX90A-NEXT: ;;#ASMEND 10425; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10426; GFX90A-NEXT: v_mov_b32_e32 v2, 0 10427; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 10428; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v1 10429; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 10430; GFX90A-NEXT: s_waitcnt vmcnt(0) 10431; GFX90A-NEXT: s_setpc_b64 s[30:31] 10432; 10433; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_6: 10434; GFX940: ; %bb.0: 10435; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10436; GFX940-NEXT: ;;#ASMSTART 10437; GFX940-NEXT: ; def v[0:1] 10438; GFX940-NEXT: ;;#ASMEND 10439; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10440; GFX940-NEXT: v_mov_b32_e32 v2, 0 10441; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 10442; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v1 10443; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 10444; GFX940-NEXT: s_waitcnt vmcnt(0) 10445; GFX940-NEXT: s_setpc_b64 s[30:31] 10446 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10447 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10448 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 6> 10449 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10450 ret void 10451} 10452 10453define void @v_shuffle_v4i16_v4i16__7_7_0_6(ptr addrspace(1) inreg %ptr) { 10454; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_6: 10455; GFX900: ; %bb.0: 10456; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10457; GFX900-NEXT: ;;#ASMSTART 10458; GFX900-NEXT: ; def v[0:1] 10459; GFX900-NEXT: ;;#ASMEND 10460; GFX900-NEXT: ;;#ASMSTART 10461; GFX900-NEXT: ; def v[1:2] 10462; GFX900-NEXT: ;;#ASMEND 10463; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10464; GFX900-NEXT: v_perm_b32 v1, v2, v0, s4 10465; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10466; GFX900-NEXT: v_mov_b32_e32 v3, 0 10467; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 10468; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 10469; GFX900-NEXT: s_waitcnt vmcnt(0) 10470; GFX900-NEXT: s_setpc_b64 s[30:31] 10471; 10472; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_6: 10473; GFX90A: ; %bb.0: 10474; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10475; GFX90A-NEXT: ;;#ASMSTART 10476; GFX90A-NEXT: ; def v[0:1] 10477; GFX90A-NEXT: ;;#ASMEND 10478; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10479; GFX90A-NEXT: ;;#ASMSTART 10480; GFX90A-NEXT: ; def v[2:3] 10481; GFX90A-NEXT: ;;#ASMEND 10482; GFX90A-NEXT: v_perm_b32 v1, v3, v0, s4 10483; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10484; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10485; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 10486; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10487; GFX90A-NEXT: s_waitcnt vmcnt(0) 10488; GFX90A-NEXT: s_setpc_b64 s[30:31] 10489; 10490; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_6: 10491; GFX940: ; %bb.0: 10492; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10493; GFX940-NEXT: ;;#ASMSTART 10494; GFX940-NEXT: ; def v[0:1] 10495; GFX940-NEXT: ;;#ASMEND 10496; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10497; GFX940-NEXT: ;;#ASMSTART 10498; GFX940-NEXT: ; def v[2:3] 10499; GFX940-NEXT: ;;#ASMEND 10500; GFX940-NEXT: v_mov_b32_e32 v4, 0 10501; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 10502; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10503; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 10504; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 10505; GFX940-NEXT: s_waitcnt vmcnt(0) 10506; GFX940-NEXT: s_setpc_b64 s[30:31] 10507 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10508 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10509 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 6> 10510 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10511 ret void 10512} 10513 10514define void @v_shuffle_v4i16_v4i16__7_7_1_6(ptr addrspace(1) inreg %ptr) { 10515; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_6: 10516; GFX900: ; %bb.0: 10517; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10518; GFX900-NEXT: ;;#ASMSTART 10519; GFX900-NEXT: ; def v[0:1] 10520; GFX900-NEXT: ;;#ASMEND 10521; GFX900-NEXT: ;;#ASMSTART 10522; GFX900-NEXT: ; def v[1:2] 10523; GFX900-NEXT: ;;#ASMEND 10524; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10525; GFX900-NEXT: v_mov_b32_e32 v3, 0 10526; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 10527; GFX900-NEXT: v_alignbit_b32 v2, v2, v0, 16 10528; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 10529; GFX900-NEXT: s_waitcnt vmcnt(0) 10530; GFX900-NEXT: s_setpc_b64 s[30:31] 10531; 10532; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_6: 10533; GFX90A: ; %bb.0: 10534; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10535; GFX90A-NEXT: ;;#ASMSTART 10536; GFX90A-NEXT: ; def v[2:3] 10537; GFX90A-NEXT: ;;#ASMEND 10538; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10539; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10540; GFX90A-NEXT: ;;#ASMSTART 10541; GFX90A-NEXT: ; def v[0:1] 10542; GFX90A-NEXT: ;;#ASMEND 10543; GFX90A-NEXT: v_perm_b32 v2, v3, v3, s4 10544; GFX90A-NEXT: v_alignbit_b32 v3, v3, v0, 16 10545; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 10546; GFX90A-NEXT: s_waitcnt vmcnt(0) 10547; GFX90A-NEXT: s_setpc_b64 s[30:31] 10548; 10549; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_6: 10550; GFX940: ; %bb.0: 10551; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10552; GFX940-NEXT: ;;#ASMSTART 10553; GFX940-NEXT: ; def v[2:3] 10554; GFX940-NEXT: ;;#ASMEND 10555; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10556; GFX940-NEXT: v_mov_b32_e32 v4, 0 10557; GFX940-NEXT: ;;#ASMSTART 10558; GFX940-NEXT: ; def v[0:1] 10559; GFX940-NEXT: ;;#ASMEND 10560; GFX940-NEXT: v_perm_b32 v2, v3, v3, s2 10561; GFX940-NEXT: v_alignbit_b32 v3, v3, v0, 16 10562; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 10563; GFX940-NEXT: s_waitcnt vmcnt(0) 10564; GFX940-NEXT: s_setpc_b64 s[30:31] 10565 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10566 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10567 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 6> 10568 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10569 ret void 10570} 10571 10572define void @v_shuffle_v4i16_v4i16__7_7_2_6(ptr addrspace(1) inreg %ptr) { 10573; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_6: 10574; GFX900: ; %bb.0: 10575; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10576; GFX900-NEXT: ;;#ASMSTART 10577; GFX900-NEXT: ; def v[0:1] 10578; GFX900-NEXT: ;;#ASMEND 10579; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10580; GFX900-NEXT: ;;#ASMSTART 10581; GFX900-NEXT: ; def v[2:3] 10582; GFX900-NEXT: ;;#ASMEND 10583; GFX900-NEXT: v_perm_b32 v1, v3, v1, s4 10584; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10585; GFX900-NEXT: v_mov_b32_e32 v4, 0 10586; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 10587; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10588; GFX900-NEXT: s_waitcnt vmcnt(0) 10589; GFX900-NEXT: s_setpc_b64 s[30:31] 10590; 10591; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_6: 10592; GFX90A: ; %bb.0: 10593; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10594; GFX90A-NEXT: ;;#ASMSTART 10595; GFX90A-NEXT: ; def v[0:1] 10596; GFX90A-NEXT: ;;#ASMEND 10597; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10598; GFX90A-NEXT: ;;#ASMSTART 10599; GFX90A-NEXT: ; def v[2:3] 10600; GFX90A-NEXT: ;;#ASMEND 10601; GFX90A-NEXT: v_perm_b32 v1, v3, v1, s4 10602; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10603; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10604; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 10605; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10606; GFX90A-NEXT: s_waitcnt vmcnt(0) 10607; GFX90A-NEXT: s_setpc_b64 s[30:31] 10608; 10609; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_6: 10610; GFX940: ; %bb.0: 10611; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10612; GFX940-NEXT: ;;#ASMSTART 10613; GFX940-NEXT: ; def v[0:1] 10614; GFX940-NEXT: ;;#ASMEND 10615; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10616; GFX940-NEXT: ;;#ASMSTART 10617; GFX940-NEXT: ; def v[2:3] 10618; GFX940-NEXT: ;;#ASMEND 10619; GFX940-NEXT: v_mov_b32_e32 v4, 0 10620; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 10621; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10622; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 10623; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 10624; GFX940-NEXT: s_waitcnt vmcnt(0) 10625; GFX940-NEXT: s_setpc_b64 s[30:31] 10626 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10627 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10628 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 6> 10629 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10630 ret void 10631} 10632 10633define void @v_shuffle_v4i16_v4i16__7_7_3_6(ptr addrspace(1) inreg %ptr) { 10634; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_6: 10635; GFX900: ; %bb.0: 10636; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10637; GFX900-NEXT: ;;#ASMSTART 10638; GFX900-NEXT: ; def v[0:1] 10639; GFX900-NEXT: ;;#ASMEND 10640; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10641; GFX900-NEXT: v_mov_b32_e32 v4, 0 10642; GFX900-NEXT: ;;#ASMSTART 10643; GFX900-NEXT: ; def v[2:3] 10644; GFX900-NEXT: ;;#ASMEND 10645; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 10646; GFX900-NEXT: v_alignbit_b32 v1, v3, v1, 16 10647; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10648; GFX900-NEXT: s_waitcnt vmcnt(0) 10649; GFX900-NEXT: s_setpc_b64 s[30:31] 10650; 10651; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_6: 10652; GFX90A: ; %bb.0: 10653; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10654; GFX90A-NEXT: ;;#ASMSTART 10655; GFX90A-NEXT: ; def v[0:1] 10656; GFX90A-NEXT: ;;#ASMEND 10657; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10658; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10659; GFX90A-NEXT: ;;#ASMSTART 10660; GFX90A-NEXT: ; def v[2:3] 10661; GFX90A-NEXT: ;;#ASMEND 10662; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 10663; GFX90A-NEXT: v_alignbit_b32 v1, v3, v1, 16 10664; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10665; GFX90A-NEXT: s_waitcnt vmcnt(0) 10666; GFX90A-NEXT: s_setpc_b64 s[30:31] 10667; 10668; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_6: 10669; GFX940: ; %bb.0: 10670; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10671; GFX940-NEXT: ;;#ASMSTART 10672; GFX940-NEXT: ; def v[0:1] 10673; GFX940-NEXT: ;;#ASMEND 10674; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10675; GFX940-NEXT: v_mov_b32_e32 v4, 0 10676; GFX940-NEXT: ;;#ASMSTART 10677; GFX940-NEXT: ; def v[2:3] 10678; GFX940-NEXT: ;;#ASMEND 10679; GFX940-NEXT: s_nop 0 10680; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 10681; GFX940-NEXT: v_alignbit_b32 v1, v3, v1, 16 10682; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 10683; GFX940-NEXT: s_waitcnt vmcnt(0) 10684; GFX940-NEXT: s_setpc_b64 s[30:31] 10685 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10686 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10687 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 6> 10688 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10689 ret void 10690} 10691 10692define void @v_shuffle_v4i16_v4i16__7_7_4_6(ptr addrspace(1) inreg %ptr) { 10693; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_6: 10694; GFX900: ; %bb.0: 10695; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10696; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10697; GFX900-NEXT: ;;#ASMSTART 10698; GFX900-NEXT: ; def v[0:1] 10699; GFX900-NEXT: ;;#ASMEND 10700; GFX900-NEXT: v_perm_b32 v2, v1, v0, s4 10701; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10702; GFX900-NEXT: v_mov_b32_e32 v3, 0 10703; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 10704; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 10705; GFX900-NEXT: s_waitcnt vmcnt(0) 10706; GFX900-NEXT: s_setpc_b64 s[30:31] 10707; 10708; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_6: 10709; GFX90A: ; %bb.0: 10710; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10711; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10712; GFX90A-NEXT: ;;#ASMSTART 10713; GFX90A-NEXT: ; def v[0:1] 10714; GFX90A-NEXT: ;;#ASMEND 10715; GFX90A-NEXT: v_perm_b32 v3, v1, v0, s4 10716; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10717; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10718; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 10719; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 10720; GFX90A-NEXT: s_waitcnt vmcnt(0) 10721; GFX90A-NEXT: s_setpc_b64 s[30:31] 10722; 10723; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_6: 10724; GFX940: ; %bb.0: 10725; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10726; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10727; GFX940-NEXT: ;;#ASMSTART 10728; GFX940-NEXT: ; def v[0:1] 10729; GFX940-NEXT: ;;#ASMEND 10730; GFX940-NEXT: v_mov_b32_e32 v4, 0 10731; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 10732; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10733; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 10734; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 10735; GFX940-NEXT: s_waitcnt vmcnt(0) 10736; GFX940-NEXT: s_setpc_b64 s[30:31] 10737 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10738 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10739 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 6> 10740 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10741 ret void 10742} 10743 10744define void @v_shuffle_v4i16_v4i16__7_7_5_6(ptr addrspace(1) inreg %ptr) { 10745; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_6: 10746; GFX900: ; %bb.0: 10747; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10748; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10749; GFX900-NEXT: v_mov_b32_e32 v4, 0 10750; GFX900-NEXT: ;;#ASMSTART 10751; GFX900-NEXT: ; def v[0:1] 10752; GFX900-NEXT: ;;#ASMEND 10753; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 10754; GFX900-NEXT: v_alignbit_b32 v3, v1, v0, 16 10755; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 10756; GFX900-NEXT: s_waitcnt vmcnt(0) 10757; GFX900-NEXT: s_setpc_b64 s[30:31] 10758; 10759; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_6: 10760; GFX90A: ; %bb.0: 10761; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10762; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10763; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10764; GFX90A-NEXT: ;;#ASMSTART 10765; GFX90A-NEXT: ; def v[0:1] 10766; GFX90A-NEXT: ;;#ASMEND 10767; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 10768; GFX90A-NEXT: v_alignbit_b32 v3, v1, v0, 16 10769; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 10770; GFX90A-NEXT: s_waitcnt vmcnt(0) 10771; GFX90A-NEXT: s_setpc_b64 s[30:31] 10772; 10773; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_6: 10774; GFX940: ; %bb.0: 10775; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10776; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10777; GFX940-NEXT: v_mov_b32_e32 v4, 0 10778; GFX940-NEXT: ;;#ASMSTART 10779; GFX940-NEXT: ; def v[0:1] 10780; GFX940-NEXT: ;;#ASMEND 10781; GFX940-NEXT: s_nop 0 10782; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 10783; GFX940-NEXT: v_alignbit_b32 v3, v1, v0, 16 10784; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 10785; GFX940-NEXT: s_waitcnt vmcnt(0) 10786; GFX940-NEXT: s_setpc_b64 s[30:31] 10787 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10788 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10789 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 6> 10790 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10791 ret void 10792} 10793 10794define void @v_shuffle_v4i16_v4i16__u_7_7_7(ptr addrspace(1) inreg %ptr) { 10795; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_7_7_7: 10796; GFX900: ; %bb.0: 10797; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10798; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10799; GFX900-NEXT: v_mov_b32_e32 v3, 0 10800; GFX900-NEXT: ;;#ASMSTART 10801; GFX900-NEXT: ; def v[0:1] 10802; GFX900-NEXT: ;;#ASMEND 10803; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 10804; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 10805; GFX900-NEXT: s_waitcnt vmcnt(0) 10806; GFX900-NEXT: s_setpc_b64 s[30:31] 10807; 10808; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_7_7_7: 10809; GFX90A: ; %bb.0: 10810; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10811; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10812; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10813; GFX90A-NEXT: ;;#ASMSTART 10814; GFX90A-NEXT: ; def v[0:1] 10815; GFX90A-NEXT: ;;#ASMEND 10816; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 10817; GFX90A-NEXT: v_mov_b32_e32 v2, v1 10818; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 10819; GFX90A-NEXT: s_waitcnt vmcnt(0) 10820; GFX90A-NEXT: s_setpc_b64 s[30:31] 10821; 10822; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_7_7_7: 10823; GFX940: ; %bb.0: 10824; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10825; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10826; GFX940-NEXT: v_mov_b32_e32 v4, 0 10827; GFX940-NEXT: ;;#ASMSTART 10828; GFX940-NEXT: ; def v[0:1] 10829; GFX940-NEXT: ;;#ASMEND 10830; GFX940-NEXT: s_nop 0 10831; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 10832; GFX940-NEXT: v_mov_b32_e32 v2, v1 10833; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 10834; GFX940-NEXT: s_waitcnt vmcnt(0) 10835; GFX940-NEXT: s_setpc_b64 s[30:31] 10836 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10837 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10838 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 7, i32 7, i32 7> 10839 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10840 ret void 10841} 10842 10843define void @v_shuffle_v4i16_v4i16__0_7_7_7(ptr addrspace(1) inreg %ptr) { 10844; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_7_7_7: 10845; GFX900: ; %bb.0: 10846; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10847; GFX900-NEXT: ;;#ASMSTART 10848; GFX900-NEXT: ; def v[0:1] 10849; GFX900-NEXT: ;;#ASMEND 10850; GFX900-NEXT: s_mov_b32 s4, 0xffff 10851; GFX900-NEXT: ;;#ASMSTART 10852; GFX900-NEXT: ; def v[1:2] 10853; GFX900-NEXT: ;;#ASMEND 10854; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v2 10855; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10856; GFX900-NEXT: v_mov_b32_e32 v3, 0 10857; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 10858; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 10859; GFX900-NEXT: s_waitcnt vmcnt(0) 10860; GFX900-NEXT: s_setpc_b64 s[30:31] 10861; 10862; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_7_7_7: 10863; GFX90A: ; %bb.0: 10864; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10865; GFX90A-NEXT: ;;#ASMSTART 10866; GFX90A-NEXT: ; def v[0:1] 10867; GFX90A-NEXT: ;;#ASMEND 10868; GFX90A-NEXT: s_mov_b32 s4, 0xffff 10869; GFX90A-NEXT: ;;#ASMSTART 10870; GFX90A-NEXT: ; def v[2:3] 10871; GFX90A-NEXT: ;;#ASMEND 10872; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v3 10873; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10874; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10875; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 10876; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10877; GFX90A-NEXT: s_waitcnt vmcnt(0) 10878; GFX90A-NEXT: s_setpc_b64 s[30:31] 10879; 10880; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_7_7_7: 10881; GFX940: ; %bb.0: 10882; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10883; GFX940-NEXT: ;;#ASMSTART 10884; GFX940-NEXT: ; def v[0:1] 10885; GFX940-NEXT: ;;#ASMEND 10886; GFX940-NEXT: s_mov_b32 s2, 0xffff 10887; GFX940-NEXT: ;;#ASMSTART 10888; GFX940-NEXT: ; def v[2:3] 10889; GFX940-NEXT: ;;#ASMEND 10890; GFX940-NEXT: v_mov_b32_e32 v4, 0 10891; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 10892; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10893; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 10894; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 10895; GFX940-NEXT: s_waitcnt vmcnt(0) 10896; GFX940-NEXT: s_setpc_b64 s[30:31] 10897 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10898 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10899 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 7, i32 7, i32 7> 10900 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10901 ret void 10902} 10903 10904define void @v_shuffle_v4i16_v4i16__1_7_7_7(ptr addrspace(1) inreg %ptr) { 10905; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_7_7_7: 10906; GFX900: ; %bb.0: 10907; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10908; GFX900-NEXT: ;;#ASMSTART 10909; GFX900-NEXT: ; def v[0:1] 10910; GFX900-NEXT: ;;#ASMEND 10911; GFX900-NEXT: ;;#ASMSTART 10912; GFX900-NEXT: ; def v[1:2] 10913; GFX900-NEXT: ;;#ASMEND 10914; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10915; GFX900-NEXT: v_mov_b32_e32 v3, 0 10916; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 10917; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 10918; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 10919; GFX900-NEXT: s_waitcnt vmcnt(0) 10920; GFX900-NEXT: s_setpc_b64 s[30:31] 10921; 10922; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_7_7_7: 10923; GFX90A: ; %bb.0: 10924; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10925; GFX90A-NEXT: ;;#ASMSTART 10926; GFX90A-NEXT: ; def v[0:1] 10927; GFX90A-NEXT: ;;#ASMEND 10928; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10929; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10930; GFX90A-NEXT: ;;#ASMSTART 10931; GFX90A-NEXT: ; def v[2:3] 10932; GFX90A-NEXT: ;;#ASMEND 10933; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 10934; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 10935; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10936; GFX90A-NEXT: s_waitcnt vmcnt(0) 10937; GFX90A-NEXT: s_setpc_b64 s[30:31] 10938; 10939; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_7_7_7: 10940; GFX940: ; %bb.0: 10941; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10942; GFX940-NEXT: ;;#ASMSTART 10943; GFX940-NEXT: ; def v[0:1] 10944; GFX940-NEXT: ;;#ASMEND 10945; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10946; GFX940-NEXT: v_mov_b32_e32 v4, 0 10947; GFX940-NEXT: ;;#ASMSTART 10948; GFX940-NEXT: ; def v[2:3] 10949; GFX940-NEXT: ;;#ASMEND 10950; GFX940-NEXT: s_nop 0 10951; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 10952; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 10953; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 10954; GFX940-NEXT: s_waitcnt vmcnt(0) 10955; GFX940-NEXT: s_setpc_b64 s[30:31] 10956 %vec0 = call <4 x i16> asm "; def $0", "=v"() 10957 %vec1 = call <4 x i16> asm "; def $0", "=v"() 10958 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 7, i32 7, i32 7> 10959 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 10960 ret void 10961} 10962 10963define void @v_shuffle_v4i16_v4i16__2_7_7_7(ptr addrspace(1) inreg %ptr) { 10964; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_7_7_7: 10965; GFX900: ; %bb.0: 10966; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10967; GFX900-NEXT: ;;#ASMSTART 10968; GFX900-NEXT: ; def v[0:1] 10969; GFX900-NEXT: ;;#ASMEND 10970; GFX900-NEXT: s_mov_b32 s4, 0xffff 10971; GFX900-NEXT: ;;#ASMSTART 10972; GFX900-NEXT: ; def v[2:3] 10973; GFX900-NEXT: ;;#ASMEND 10974; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v3 10975; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10976; GFX900-NEXT: v_mov_b32_e32 v4, 0 10977; GFX900-NEXT: v_perm_b32 v1, v3, v3, s4 10978; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10979; GFX900-NEXT: s_waitcnt vmcnt(0) 10980; GFX900-NEXT: s_setpc_b64 s[30:31] 10981; 10982; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_7_7_7: 10983; GFX90A: ; %bb.0: 10984; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10985; GFX90A-NEXT: ;;#ASMSTART 10986; GFX90A-NEXT: ; def v[0:1] 10987; GFX90A-NEXT: ;;#ASMEND 10988; GFX90A-NEXT: s_mov_b32 s4, 0xffff 10989; GFX90A-NEXT: ;;#ASMSTART 10990; GFX90A-NEXT: ; def v[2:3] 10991; GFX90A-NEXT: ;;#ASMEND 10992; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v3 10993; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10994; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10995; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 10996; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 10997; GFX90A-NEXT: s_waitcnt vmcnt(0) 10998; GFX90A-NEXT: s_setpc_b64 s[30:31] 10999; 11000; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_7_7_7: 11001; GFX940: ; %bb.0: 11002; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11003; GFX940-NEXT: ;;#ASMSTART 11004; GFX940-NEXT: ; def v[0:1] 11005; GFX940-NEXT: ;;#ASMEND 11006; GFX940-NEXT: s_mov_b32 s2, 0xffff 11007; GFX940-NEXT: ;;#ASMSTART 11008; GFX940-NEXT: ; def v[2:3] 11009; GFX940-NEXT: ;;#ASMEND 11010; GFX940-NEXT: v_mov_b32_e32 v4, 0 11011; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 11012; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11013; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 11014; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 11015; GFX940-NEXT: s_waitcnt vmcnt(0) 11016; GFX940-NEXT: s_setpc_b64 s[30:31] 11017 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11018 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11019 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 7, i32 7, i32 7> 11020 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11021 ret void 11022} 11023 11024define void @v_shuffle_v4i16_v4i16__3_7_7_7(ptr addrspace(1) inreg %ptr) { 11025; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_7_7_7: 11026; GFX900: ; %bb.0: 11027; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11028; GFX900-NEXT: ;;#ASMSTART 11029; GFX900-NEXT: ; def v[0:1] 11030; GFX900-NEXT: ;;#ASMEND 11031; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11032; GFX900-NEXT: v_mov_b32_e32 v4, 0 11033; GFX900-NEXT: ;;#ASMSTART 11034; GFX900-NEXT: ; def v[2:3] 11035; GFX900-NEXT: ;;#ASMEND 11036; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 11037; GFX900-NEXT: v_perm_b32 v1, v3, v3, s4 11038; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11039; GFX900-NEXT: s_waitcnt vmcnt(0) 11040; GFX900-NEXT: s_setpc_b64 s[30:31] 11041; 11042; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_7_7_7: 11043; GFX90A: ; %bb.0: 11044; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11045; GFX90A-NEXT: ;;#ASMSTART 11046; GFX90A-NEXT: ; def v[0:1] 11047; GFX90A-NEXT: ;;#ASMEND 11048; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11049; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11050; GFX90A-NEXT: ;;#ASMSTART 11051; GFX90A-NEXT: ; def v[2:3] 11052; GFX90A-NEXT: ;;#ASMEND 11053; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 11054; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 11055; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11056; GFX90A-NEXT: s_waitcnt vmcnt(0) 11057; GFX90A-NEXT: s_setpc_b64 s[30:31] 11058; 11059; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_7_7_7: 11060; GFX940: ; %bb.0: 11061; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11062; GFX940-NEXT: ;;#ASMSTART 11063; GFX940-NEXT: ; def v[0:1] 11064; GFX940-NEXT: ;;#ASMEND 11065; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11066; GFX940-NEXT: v_mov_b32_e32 v4, 0 11067; GFX940-NEXT: ;;#ASMSTART 11068; GFX940-NEXT: ; def v[2:3] 11069; GFX940-NEXT: ;;#ASMEND 11070; GFX940-NEXT: s_nop 0 11071; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 11072; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 11073; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 11074; GFX940-NEXT: s_waitcnt vmcnt(0) 11075; GFX940-NEXT: s_setpc_b64 s[30:31] 11076 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11077 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11078 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7> 11079 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11080 ret void 11081} 11082 11083define void @v_shuffle_v4i16_v4i16__4_7_7_7(ptr addrspace(1) inreg %ptr) { 11084; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_7_7_7: 11085; GFX900: ; %bb.0: 11086; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11087; GFX900-NEXT: ;;#ASMSTART 11088; GFX900-NEXT: ; def v[0:1] 11089; GFX900-NEXT: ;;#ASMEND 11090; GFX900-NEXT: s_mov_b32 s4, 0xffff 11091; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 11092; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11093; GFX900-NEXT: v_mov_b32_e32 v2, 0 11094; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 11095; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 11096; GFX900-NEXT: s_waitcnt vmcnt(0) 11097; GFX900-NEXT: s_setpc_b64 s[30:31] 11098; 11099; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_7_7_7: 11100; GFX90A: ; %bb.0: 11101; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11102; GFX90A-NEXT: ;;#ASMSTART 11103; GFX90A-NEXT: ; def v[0:1] 11104; GFX90A-NEXT: ;;#ASMEND 11105; GFX90A-NEXT: s_mov_b32 s4, 0xffff 11106; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 11107; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11108; GFX90A-NEXT: v_mov_b32_e32 v2, 0 11109; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 11110; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 11111; GFX90A-NEXT: s_waitcnt vmcnt(0) 11112; GFX90A-NEXT: s_setpc_b64 s[30:31] 11113; 11114; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_7_7_7: 11115; GFX940: ; %bb.0: 11116; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11117; GFX940-NEXT: ;;#ASMSTART 11118; GFX940-NEXT: ; def v[0:1] 11119; GFX940-NEXT: ;;#ASMEND 11120; GFX940-NEXT: s_mov_b32 s2, 0xffff 11121; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 11122; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11123; GFX940-NEXT: v_mov_b32_e32 v2, 0 11124; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 11125; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 11126; GFX940-NEXT: s_waitcnt vmcnt(0) 11127; GFX940-NEXT: s_setpc_b64 s[30:31] 11128 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11129 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11130 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7> 11131 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11132 ret void 11133} 11134 11135define void @v_shuffle_v4i16_v4i16__5_7_7_7(ptr addrspace(1) inreg %ptr) { 11136; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_7_7_7: 11137; GFX900: ; %bb.0: 11138; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11139; GFX900-NEXT: ;;#ASMSTART 11140; GFX900-NEXT: ; def v[0:1] 11141; GFX900-NEXT: ;;#ASMEND 11142; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11143; GFX900-NEXT: v_mov_b32_e32 v2, 0 11144; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 11145; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 11146; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 11147; GFX900-NEXT: s_waitcnt vmcnt(0) 11148; GFX900-NEXT: s_setpc_b64 s[30:31] 11149; 11150; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_7_7_7: 11151; GFX90A: ; %bb.0: 11152; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11153; GFX90A-NEXT: ;;#ASMSTART 11154; GFX90A-NEXT: ; def v[0:1] 11155; GFX90A-NEXT: ;;#ASMEND 11156; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11157; GFX90A-NEXT: v_mov_b32_e32 v2, 0 11158; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 11159; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 11160; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 11161; GFX90A-NEXT: s_waitcnt vmcnt(0) 11162; GFX90A-NEXT: s_setpc_b64 s[30:31] 11163; 11164; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_7_7_7: 11165; GFX940: ; %bb.0: 11166; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11167; GFX940-NEXT: ;;#ASMSTART 11168; GFX940-NEXT: ; def v[0:1] 11169; GFX940-NEXT: ;;#ASMEND 11170; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11171; GFX940-NEXT: v_mov_b32_e32 v2, 0 11172; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 11173; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 11174; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 11175; GFX940-NEXT: s_waitcnt vmcnt(0) 11176; GFX940-NEXT: s_setpc_b64 s[30:31] 11177 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11178 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11179 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7> 11180 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11181 ret void 11182} 11183 11184define void @v_shuffle_v4i16_v4i16__6_7_7_7(ptr addrspace(1) inreg %ptr) { 11185; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_7_7_7: 11186; GFX900: ; %bb.0: 11187; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11188; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11189; GFX900-NEXT: v_mov_b32_e32 v3, 0 11190; GFX900-NEXT: ;;#ASMSTART 11191; GFX900-NEXT: ; def v[0:1] 11192; GFX900-NEXT: ;;#ASMEND 11193; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 11194; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 11195; GFX900-NEXT: s_waitcnt vmcnt(0) 11196; GFX900-NEXT: s_setpc_b64 s[30:31] 11197; 11198; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_7_7_7: 11199; GFX90A: ; %bb.0: 11200; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11201; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11202; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11203; GFX90A-NEXT: ;;#ASMSTART 11204; GFX90A-NEXT: ; def v[0:1] 11205; GFX90A-NEXT: ;;#ASMEND 11206; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 11207; GFX90A-NEXT: v_mov_b32_e32 v2, v1 11208; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 11209; GFX90A-NEXT: s_waitcnt vmcnt(0) 11210; GFX90A-NEXT: s_setpc_b64 s[30:31] 11211; 11212; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_7_7_7: 11213; GFX940: ; %bb.0: 11214; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11215; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11216; GFX940-NEXT: v_mov_b32_e32 v4, 0 11217; GFX940-NEXT: ;;#ASMSTART 11218; GFX940-NEXT: ; def v[0:1] 11219; GFX940-NEXT: ;;#ASMEND 11220; GFX940-NEXT: s_nop 0 11221; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 11222; GFX940-NEXT: v_mov_b32_e32 v2, v1 11223; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 11224; GFX940-NEXT: s_waitcnt vmcnt(0) 11225; GFX940-NEXT: s_setpc_b64 s[30:31] 11226 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11227 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11228 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7> 11229 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11230 ret void 11231} 11232 11233define void @v_shuffle_v4i16_v4i16__7_u_7_7(ptr addrspace(1) inreg %ptr) { 11234; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_7_7: 11235; GFX900: ; %bb.0: 11236; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11237; GFX900-NEXT: ;;#ASMSTART 11238; GFX900-NEXT: ; def v[0:1] 11239; GFX900-NEXT: ;;#ASMEND 11240; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11241; GFX900-NEXT: v_mov_b32_e32 v3, 0 11242; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 11243; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16 11244; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 11245; GFX900-NEXT: s_waitcnt vmcnt(0) 11246; GFX900-NEXT: s_setpc_b64 s[30:31] 11247; 11248; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_7_7: 11249; GFX90A: ; %bb.0: 11250; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11251; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11252; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11253; GFX90A-NEXT: ;;#ASMSTART 11254; GFX90A-NEXT: ; def v[0:1] 11255; GFX90A-NEXT: ;;#ASMEND 11256; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 11257; GFX90A-NEXT: v_alignbit_b32 v2, s4, v1, 16 11258; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 11259; GFX90A-NEXT: s_waitcnt vmcnt(0) 11260; GFX90A-NEXT: s_setpc_b64 s[30:31] 11261; 11262; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_7_7: 11263; GFX940: ; %bb.0: 11264; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11265; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11266; GFX940-NEXT: v_mov_b32_e32 v4, 0 11267; GFX940-NEXT: ;;#ASMSTART 11268; GFX940-NEXT: ; def v[0:1] 11269; GFX940-NEXT: ;;#ASMEND 11270; GFX940-NEXT: s_nop 0 11271; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 11272; GFX940-NEXT: v_alignbit_b32 v2, s0, v1, 16 11273; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 11274; GFX940-NEXT: s_waitcnt vmcnt(0) 11275; GFX940-NEXT: s_setpc_b64 s[30:31] 11276 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11277 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11278 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 7, i32 7> 11279 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11280 ret void 11281} 11282 11283define void @v_shuffle_v4i16_v4i16__7_0_7_7(ptr addrspace(1) inreg %ptr) { 11284; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_7_7: 11285; GFX900: ; %bb.0: 11286; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11287; GFX900-NEXT: ;;#ASMSTART 11288; GFX900-NEXT: ; def v[0:1] 11289; GFX900-NEXT: ;;#ASMEND 11290; GFX900-NEXT: ;;#ASMSTART 11291; GFX900-NEXT: ; def v[1:2] 11292; GFX900-NEXT: ;;#ASMEND 11293; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11294; GFX900-NEXT: v_mov_b32_e32 v3, 0 11295; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 11296; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 11297; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 11298; GFX900-NEXT: s_waitcnt vmcnt(0) 11299; GFX900-NEXT: s_setpc_b64 s[30:31] 11300; 11301; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_7_7: 11302; GFX90A: ; %bb.0: 11303; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11304; GFX90A-NEXT: ;;#ASMSTART 11305; GFX90A-NEXT: ; def v[0:1] 11306; GFX90A-NEXT: ;;#ASMEND 11307; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11308; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11309; GFX90A-NEXT: ;;#ASMSTART 11310; GFX90A-NEXT: ; def v[2:3] 11311; GFX90A-NEXT: ;;#ASMEND 11312; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 11313; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 11314; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11315; GFX90A-NEXT: s_waitcnt vmcnt(0) 11316; GFX90A-NEXT: s_setpc_b64 s[30:31] 11317; 11318; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_7_7: 11319; GFX940: ; %bb.0: 11320; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11321; GFX940-NEXT: ;;#ASMSTART 11322; GFX940-NEXT: ; def v[0:1] 11323; GFX940-NEXT: ;;#ASMEND 11324; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11325; GFX940-NEXT: v_mov_b32_e32 v4, 0 11326; GFX940-NEXT: ;;#ASMSTART 11327; GFX940-NEXT: ; def v[2:3] 11328; GFX940-NEXT: ;;#ASMEND 11329; GFX940-NEXT: s_nop 0 11330; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 11331; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 11332; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 11333; GFX940-NEXT: s_waitcnt vmcnt(0) 11334; GFX940-NEXT: s_setpc_b64 s[30:31] 11335 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11336 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11337 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 7, i32 7> 11338 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11339 ret void 11340} 11341 11342define void @v_shuffle_v4i16_v4i16__7_1_7_7(ptr addrspace(1) inreg %ptr) { 11343; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_7_7: 11344; GFX900: ; %bb.0: 11345; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11346; GFX900-NEXT: ;;#ASMSTART 11347; GFX900-NEXT: ; def v[0:1] 11348; GFX900-NEXT: ;;#ASMEND 11349; GFX900-NEXT: ;;#ASMSTART 11350; GFX900-NEXT: ; def v[1:2] 11351; GFX900-NEXT: ;;#ASMEND 11352; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11353; GFX900-NEXT: v_mov_b32_e32 v3, 0 11354; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 11355; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 11356; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 11357; GFX900-NEXT: s_waitcnt vmcnt(0) 11358; GFX900-NEXT: s_setpc_b64 s[30:31] 11359; 11360; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_7_7: 11361; GFX90A: ; %bb.0: 11362; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11363; GFX90A-NEXT: ;;#ASMSTART 11364; GFX90A-NEXT: ; def v[0:1] 11365; GFX90A-NEXT: ;;#ASMEND 11366; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11367; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11368; GFX90A-NEXT: ;;#ASMSTART 11369; GFX90A-NEXT: ; def v[2:3] 11370; GFX90A-NEXT: ;;#ASMEND 11371; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 11372; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 11373; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11374; GFX90A-NEXT: s_waitcnt vmcnt(0) 11375; GFX90A-NEXT: s_setpc_b64 s[30:31] 11376; 11377; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_7_7: 11378; GFX940: ; %bb.0: 11379; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11380; GFX940-NEXT: ;;#ASMSTART 11381; GFX940-NEXT: ; def v[0:1] 11382; GFX940-NEXT: ;;#ASMEND 11383; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11384; GFX940-NEXT: v_mov_b32_e32 v4, 0 11385; GFX940-NEXT: ;;#ASMSTART 11386; GFX940-NEXT: ; def v[2:3] 11387; GFX940-NEXT: ;;#ASMEND 11388; GFX940-NEXT: s_nop 0 11389; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 11390; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 11391; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 11392; GFX940-NEXT: s_waitcnt vmcnt(0) 11393; GFX940-NEXT: s_setpc_b64 s[30:31] 11394 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11395 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11396 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 7, i32 7> 11397 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11398 ret void 11399} 11400 11401define void @v_shuffle_v4i16_v4i16__7_2_7_7(ptr addrspace(1) inreg %ptr) { 11402; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_7_7: 11403; GFX900: ; %bb.0: 11404; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11405; GFX900-NEXT: ;;#ASMSTART 11406; GFX900-NEXT: ; def v[0:1] 11407; GFX900-NEXT: ;;#ASMEND 11408; GFX900-NEXT: ;;#ASMSTART 11409; GFX900-NEXT: ; def v[2:3] 11410; GFX900-NEXT: ;;#ASMEND 11411; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11412; GFX900-NEXT: v_mov_b32_e32 v4, 0 11413; GFX900-NEXT: v_perm_b32 v2, v3, v3, s4 11414; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 11415; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17] 11416; GFX900-NEXT: s_waitcnt vmcnt(0) 11417; GFX900-NEXT: s_setpc_b64 s[30:31] 11418; 11419; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_7_7: 11420; GFX90A: ; %bb.0: 11421; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11422; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11423; GFX90A-NEXT: v_mov_b32_e32 v6, 0 11424; GFX90A-NEXT: ;;#ASMSTART 11425; GFX90A-NEXT: ; def v[0:1] 11426; GFX90A-NEXT: ;;#ASMEND 11427; GFX90A-NEXT: ;;#ASMSTART 11428; GFX90A-NEXT: ; def v[2:3] 11429; GFX90A-NEXT: ;;#ASMEND 11430; GFX90A-NEXT: v_perm_b32 v5, v3, v3, s4 11431; GFX90A-NEXT: v_alignbit_b32 v4, v1, v3, 16 11432; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] 11433; GFX90A-NEXT: s_waitcnt vmcnt(0) 11434; GFX90A-NEXT: s_setpc_b64 s[30:31] 11435; 11436; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_7_7: 11437; GFX940: ; %bb.0: 11438; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11439; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11440; GFX940-NEXT: v_mov_b32_e32 v6, 0 11441; GFX940-NEXT: ;;#ASMSTART 11442; GFX940-NEXT: ; def v[0:1] 11443; GFX940-NEXT: ;;#ASMEND 11444; GFX940-NEXT: ;;#ASMSTART 11445; GFX940-NEXT: ; def v[2:3] 11446; GFX940-NEXT: ;;#ASMEND 11447; GFX940-NEXT: s_nop 0 11448; GFX940-NEXT: v_perm_b32 v5, v3, v3, s2 11449; GFX940-NEXT: v_alignbit_b32 v4, v1, v3, 16 11450; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1 11451; GFX940-NEXT: s_waitcnt vmcnt(0) 11452; GFX940-NEXT: s_setpc_b64 s[30:31] 11453 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11454 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11455 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 7, i32 7> 11456 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11457 ret void 11458} 11459 11460define void @v_shuffle_v4i16_v4i16__7_3_7_7(ptr addrspace(1) inreg %ptr) { 11461; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_7_7: 11462; GFX900: ; %bb.0: 11463; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11464; GFX900-NEXT: ;;#ASMSTART 11465; GFX900-NEXT: ; def v[0:1] 11466; GFX900-NEXT: ;;#ASMEND 11467; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11468; GFX900-NEXT: v_mov_b32_e32 v4, 0 11469; GFX900-NEXT: ;;#ASMSTART 11470; GFX900-NEXT: ; def v[2:3] 11471; GFX900-NEXT: ;;#ASMEND 11472; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 11473; GFX900-NEXT: v_perm_b32 v1, v3, v3, s4 11474; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11475; GFX900-NEXT: s_waitcnt vmcnt(0) 11476; GFX900-NEXT: s_setpc_b64 s[30:31] 11477; 11478; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_7_7: 11479; GFX90A: ; %bb.0: 11480; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11481; GFX90A-NEXT: ;;#ASMSTART 11482; GFX90A-NEXT: ; def v[0:1] 11483; GFX90A-NEXT: ;;#ASMEND 11484; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11485; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11486; GFX90A-NEXT: ;;#ASMSTART 11487; GFX90A-NEXT: ; def v[2:3] 11488; GFX90A-NEXT: ;;#ASMEND 11489; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 11490; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 11491; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11492; GFX90A-NEXT: s_waitcnt vmcnt(0) 11493; GFX90A-NEXT: s_setpc_b64 s[30:31] 11494; 11495; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_7_7: 11496; GFX940: ; %bb.0: 11497; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11498; GFX940-NEXT: ;;#ASMSTART 11499; GFX940-NEXT: ; def v[0:1] 11500; GFX940-NEXT: ;;#ASMEND 11501; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11502; GFX940-NEXT: v_mov_b32_e32 v4, 0 11503; GFX940-NEXT: ;;#ASMSTART 11504; GFX940-NEXT: ; def v[2:3] 11505; GFX940-NEXT: ;;#ASMEND 11506; GFX940-NEXT: s_nop 0 11507; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 11508; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 11509; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 11510; GFX940-NEXT: s_waitcnt vmcnt(0) 11511; GFX940-NEXT: s_setpc_b64 s[30:31] 11512 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11513 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11514 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 7, i32 7> 11515 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11516 ret void 11517} 11518 11519define void @v_shuffle_v4i16_v4i16__7_4_7_7(ptr addrspace(1) inreg %ptr) { 11520; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_7_7: 11521; GFX900: ; %bb.0: 11522; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11523; GFX900-NEXT: ;;#ASMSTART 11524; GFX900-NEXT: ; def v[0:1] 11525; GFX900-NEXT: ;;#ASMEND 11526; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11527; GFX900-NEXT: v_mov_b32_e32 v3, 0 11528; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 11529; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 11530; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 11531; GFX900-NEXT: s_waitcnt vmcnt(0) 11532; GFX900-NEXT: s_setpc_b64 s[30:31] 11533; 11534; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_7_7: 11535; GFX90A: ; %bb.0: 11536; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11537; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11538; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11539; GFX90A-NEXT: ;;#ASMSTART 11540; GFX90A-NEXT: ; def v[0:1] 11541; GFX90A-NEXT: ;;#ASMEND 11542; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 11543; GFX90A-NEXT: v_alignbit_b32 v2, v0, v1, 16 11544; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 11545; GFX90A-NEXT: s_waitcnt vmcnt(0) 11546; GFX90A-NEXT: s_setpc_b64 s[30:31] 11547; 11548; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_7_7: 11549; GFX940: ; %bb.0: 11550; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11551; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11552; GFX940-NEXT: v_mov_b32_e32 v4, 0 11553; GFX940-NEXT: ;;#ASMSTART 11554; GFX940-NEXT: ; def v[0:1] 11555; GFX940-NEXT: ;;#ASMEND 11556; GFX940-NEXT: s_nop 0 11557; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 11558; GFX940-NEXT: v_alignbit_b32 v2, v0, v1, 16 11559; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 11560; GFX940-NEXT: s_waitcnt vmcnt(0) 11561; GFX940-NEXT: s_setpc_b64 s[30:31] 11562 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11563 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11564 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 7, i32 7> 11565 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11566 ret void 11567} 11568 11569define void @v_shuffle_v4i16_v4i16__7_5_7_7(ptr addrspace(1) inreg %ptr) { 11570; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_7_7: 11571; GFX900: ; %bb.0: 11572; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11573; GFX900-NEXT: ;;#ASMSTART 11574; GFX900-NEXT: ; def v[0:1] 11575; GFX900-NEXT: ;;#ASMEND 11576; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11577; GFX900-NEXT: v_mov_b32_e32 v2, 0 11578; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 11579; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 11580; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 11581; GFX900-NEXT: s_waitcnt vmcnt(0) 11582; GFX900-NEXT: s_setpc_b64 s[30:31] 11583; 11584; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_7_7: 11585; GFX90A: ; %bb.0: 11586; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11587; GFX90A-NEXT: ;;#ASMSTART 11588; GFX90A-NEXT: ; def v[0:1] 11589; GFX90A-NEXT: ;;#ASMEND 11590; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11591; GFX90A-NEXT: v_mov_b32_e32 v2, 0 11592; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 11593; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 11594; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 11595; GFX90A-NEXT: s_waitcnt vmcnt(0) 11596; GFX90A-NEXT: s_setpc_b64 s[30:31] 11597; 11598; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_7_7: 11599; GFX940: ; %bb.0: 11600; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11601; GFX940-NEXT: ;;#ASMSTART 11602; GFX940-NEXT: ; def v[0:1] 11603; GFX940-NEXT: ;;#ASMEND 11604; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11605; GFX940-NEXT: v_mov_b32_e32 v2, 0 11606; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 11607; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 11608; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 11609; GFX940-NEXT: s_waitcnt vmcnt(0) 11610; GFX940-NEXT: s_setpc_b64 s[30:31] 11611 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11612 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11613 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 7, i32 7> 11614 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11615 ret void 11616} 11617 11618define void @v_shuffle_v4i16_v4i16__7_6_7_7(ptr addrspace(1) inreg %ptr) { 11619; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_7_7: 11620; GFX900: ; %bb.0: 11621; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11622; GFX900-NEXT: ;;#ASMSTART 11623; GFX900-NEXT: ; def v[0:1] 11624; GFX900-NEXT: ;;#ASMEND 11625; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11626; GFX900-NEXT: v_mov_b32_e32 v3, 0 11627; GFX900-NEXT: v_perm_b32 v2, v1, v1, s4 11628; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16 11629; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 11630; GFX900-NEXT: s_waitcnt vmcnt(0) 11631; GFX900-NEXT: s_setpc_b64 s[30:31] 11632; 11633; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_7_7: 11634; GFX90A: ; %bb.0: 11635; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11636; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11637; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11638; GFX90A-NEXT: ;;#ASMSTART 11639; GFX90A-NEXT: ; def v[0:1] 11640; GFX90A-NEXT: ;;#ASMEND 11641; GFX90A-NEXT: v_perm_b32 v3, v1, v1, s4 11642; GFX90A-NEXT: v_alignbit_b32 v2, v1, v1, 16 11643; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 11644; GFX90A-NEXT: s_waitcnt vmcnt(0) 11645; GFX90A-NEXT: s_setpc_b64 s[30:31] 11646; 11647; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_7_7: 11648; GFX940: ; %bb.0: 11649; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11650; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11651; GFX940-NEXT: v_mov_b32_e32 v4, 0 11652; GFX940-NEXT: ;;#ASMSTART 11653; GFX940-NEXT: ; def v[0:1] 11654; GFX940-NEXT: ;;#ASMEND 11655; GFX940-NEXT: s_nop 0 11656; GFX940-NEXT: v_perm_b32 v3, v1, v1, s2 11657; GFX940-NEXT: v_alignbit_b32 v2, v1, v1, 16 11658; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 11659; GFX940-NEXT: s_waitcnt vmcnt(0) 11660; GFX940-NEXT: s_setpc_b64 s[30:31] 11661 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11662 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11663 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 7, i32 7> 11664 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11665 ret void 11666} 11667 11668define void @v_shuffle_v4i16_v4i16__7_7_u_7(ptr addrspace(1) inreg %ptr) { 11669; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_7: 11670; GFX900: ; %bb.0: 11671; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11672; GFX900-NEXT: ;;#ASMSTART 11673; GFX900-NEXT: ; def v[0:1] 11674; GFX900-NEXT: ;;#ASMEND 11675; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11676; GFX900-NEXT: v_mov_b32_e32 v2, 0 11677; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 11678; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 11679; GFX900-NEXT: s_waitcnt vmcnt(0) 11680; GFX900-NEXT: s_setpc_b64 s[30:31] 11681; 11682; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_7: 11683; GFX90A: ; %bb.0: 11684; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11685; GFX90A-NEXT: ;;#ASMSTART 11686; GFX90A-NEXT: ; def v[0:1] 11687; GFX90A-NEXT: ;;#ASMEND 11688; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11689; GFX90A-NEXT: v_mov_b32_e32 v2, 0 11690; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 11691; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 11692; GFX90A-NEXT: s_waitcnt vmcnt(0) 11693; GFX90A-NEXT: s_setpc_b64 s[30:31] 11694; 11695; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_7: 11696; GFX940: ; %bb.0: 11697; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11698; GFX940-NEXT: ;;#ASMSTART 11699; GFX940-NEXT: ; def v[0:1] 11700; GFX940-NEXT: ;;#ASMEND 11701; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11702; GFX940-NEXT: v_mov_b32_e32 v2, 0 11703; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 11704; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 11705; GFX940-NEXT: s_waitcnt vmcnt(0) 11706; GFX940-NEXT: s_setpc_b64 s[30:31] 11707 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11708 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11709 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 7> 11710 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11711 ret void 11712} 11713 11714define void @v_shuffle_v4i16_v4i16__7_7_0_7(ptr addrspace(1) inreg %ptr) { 11715; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_7: 11716; GFX900: ; %bb.0: 11717; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11718; GFX900-NEXT: ;;#ASMSTART 11719; GFX900-NEXT: ; def v[0:1] 11720; GFX900-NEXT: ;;#ASMEND 11721; GFX900-NEXT: ;;#ASMSTART 11722; GFX900-NEXT: ; def v[1:2] 11723; GFX900-NEXT: ;;#ASMEND 11724; GFX900-NEXT: s_mov_b32 s4, 0xffff 11725; GFX900-NEXT: v_bfi_b32 v1, s4, v0, v2 11726; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11727; GFX900-NEXT: v_mov_b32_e32 v3, 0 11728; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 11729; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 11730; GFX900-NEXT: s_waitcnt vmcnt(0) 11731; GFX900-NEXT: s_setpc_b64 s[30:31] 11732; 11733; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_7: 11734; GFX90A: ; %bb.0: 11735; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11736; GFX90A-NEXT: ;;#ASMSTART 11737; GFX90A-NEXT: ; def v[0:1] 11738; GFX90A-NEXT: ;;#ASMEND 11739; GFX90A-NEXT: s_mov_b32 s4, 0xffff 11740; GFX90A-NEXT: ;;#ASMSTART 11741; GFX90A-NEXT: ; def v[2:3] 11742; GFX90A-NEXT: ;;#ASMEND 11743; GFX90A-NEXT: v_bfi_b32 v1, s4, v0, v3 11744; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11745; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11746; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 11747; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11748; GFX90A-NEXT: s_waitcnt vmcnt(0) 11749; GFX90A-NEXT: s_setpc_b64 s[30:31] 11750; 11751; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_7: 11752; GFX940: ; %bb.0: 11753; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11754; GFX940-NEXT: ;;#ASMSTART 11755; GFX940-NEXT: ; def v[0:1] 11756; GFX940-NEXT: ;;#ASMEND 11757; GFX940-NEXT: s_mov_b32 s2, 0xffff 11758; GFX940-NEXT: ;;#ASMSTART 11759; GFX940-NEXT: ; def v[2:3] 11760; GFX940-NEXT: ;;#ASMEND 11761; GFX940-NEXT: v_mov_b32_e32 v4, 0 11762; GFX940-NEXT: v_bfi_b32 v1, s2, v0, v3 11763; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11764; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 11765; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 11766; GFX940-NEXT: s_waitcnt vmcnt(0) 11767; GFX940-NEXT: s_setpc_b64 s[30:31] 11768 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11769 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11770 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 7> 11771 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11772 ret void 11773} 11774 11775define void @v_shuffle_v4i16_v4i16__7_7_1_7(ptr addrspace(1) inreg %ptr) { 11776; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_7: 11777; GFX900: ; %bb.0: 11778; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11779; GFX900-NEXT: ;;#ASMSTART 11780; GFX900-NEXT: ; def v[0:1] 11781; GFX900-NEXT: ;;#ASMEND 11782; GFX900-NEXT: ;;#ASMSTART 11783; GFX900-NEXT: ; def v[1:2] 11784; GFX900-NEXT: ;;#ASMEND 11785; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11786; GFX900-NEXT: v_mov_b32_e32 v3, 0 11787; GFX900-NEXT: v_perm_b32 v1, v2, v0, s4 11788; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 11789; GFX900-NEXT: global_store_dwordx2 v3, v[0:1], s[16:17] 11790; GFX900-NEXT: s_waitcnt vmcnt(0) 11791; GFX900-NEXT: s_setpc_b64 s[30:31] 11792; 11793; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_7: 11794; GFX90A: ; %bb.0: 11795; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11796; GFX90A-NEXT: ;;#ASMSTART 11797; GFX90A-NEXT: ; def v[0:1] 11798; GFX90A-NEXT: ;;#ASMEND 11799; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11800; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11801; GFX90A-NEXT: ;;#ASMSTART 11802; GFX90A-NEXT: ; def v[2:3] 11803; GFX90A-NEXT: ;;#ASMEND 11804; GFX90A-NEXT: v_perm_b32 v1, v3, v0, s4 11805; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 11806; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11807; GFX90A-NEXT: s_waitcnt vmcnt(0) 11808; GFX90A-NEXT: s_setpc_b64 s[30:31] 11809; 11810; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_7: 11811; GFX940: ; %bb.0: 11812; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11813; GFX940-NEXT: ;;#ASMSTART 11814; GFX940-NEXT: ; def v[0:1] 11815; GFX940-NEXT: ;;#ASMEND 11816; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11817; GFX940-NEXT: v_mov_b32_e32 v4, 0 11818; GFX940-NEXT: ;;#ASMSTART 11819; GFX940-NEXT: ; def v[2:3] 11820; GFX940-NEXT: ;;#ASMEND 11821; GFX940-NEXT: s_nop 0 11822; GFX940-NEXT: v_perm_b32 v1, v3, v0, s2 11823; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 11824; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 11825; GFX940-NEXT: s_waitcnt vmcnt(0) 11826; GFX940-NEXT: s_setpc_b64 s[30:31] 11827 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11828 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11829 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 7> 11830 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11831 ret void 11832} 11833 11834define void @v_shuffle_v4i16_v4i16__7_7_2_7(ptr addrspace(1) inreg %ptr) { 11835; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_7: 11836; GFX900: ; %bb.0: 11837; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11838; GFX900-NEXT: ;;#ASMSTART 11839; GFX900-NEXT: ; def v[0:1] 11840; GFX900-NEXT: ;;#ASMEND 11841; GFX900-NEXT: s_mov_b32 s4, 0xffff 11842; GFX900-NEXT: ;;#ASMSTART 11843; GFX900-NEXT: ; def v[2:3] 11844; GFX900-NEXT: ;;#ASMEND 11845; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v3 11846; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11847; GFX900-NEXT: v_mov_b32_e32 v4, 0 11848; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 11849; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11850; GFX900-NEXT: s_waitcnt vmcnt(0) 11851; GFX900-NEXT: s_setpc_b64 s[30:31] 11852; 11853; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_7: 11854; GFX90A: ; %bb.0: 11855; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11856; GFX90A-NEXT: ;;#ASMSTART 11857; GFX90A-NEXT: ; def v[0:1] 11858; GFX90A-NEXT: ;;#ASMEND 11859; GFX90A-NEXT: s_mov_b32 s4, 0xffff 11860; GFX90A-NEXT: ;;#ASMSTART 11861; GFX90A-NEXT: ; def v[2:3] 11862; GFX90A-NEXT: ;;#ASMEND 11863; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v3 11864; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11865; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11866; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 11867; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11868; GFX90A-NEXT: s_waitcnt vmcnt(0) 11869; GFX90A-NEXT: s_setpc_b64 s[30:31] 11870; 11871; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_7: 11872; GFX940: ; %bb.0: 11873; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11874; GFX940-NEXT: ;;#ASMSTART 11875; GFX940-NEXT: ; def v[0:1] 11876; GFX940-NEXT: ;;#ASMEND 11877; GFX940-NEXT: s_mov_b32 s2, 0xffff 11878; GFX940-NEXT: ;;#ASMSTART 11879; GFX940-NEXT: ; def v[2:3] 11880; GFX940-NEXT: ;;#ASMEND 11881; GFX940-NEXT: v_mov_b32_e32 v4, 0 11882; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v3 11883; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11884; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 11885; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 11886; GFX940-NEXT: s_waitcnt vmcnt(0) 11887; GFX940-NEXT: s_setpc_b64 s[30:31] 11888 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11889 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11890 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 7> 11891 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11892 ret void 11893} 11894 11895define void @v_shuffle_v4i16_v4i16__7_7_3_7(ptr addrspace(1) inreg %ptr) { 11896; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_7: 11897; GFX900: ; %bb.0: 11898; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11899; GFX900-NEXT: ;;#ASMSTART 11900; GFX900-NEXT: ; def v[0:1] 11901; GFX900-NEXT: ;;#ASMEND 11902; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11903; GFX900-NEXT: v_mov_b32_e32 v4, 0 11904; GFX900-NEXT: ;;#ASMSTART 11905; GFX900-NEXT: ; def v[2:3] 11906; GFX900-NEXT: ;;#ASMEND 11907; GFX900-NEXT: v_perm_b32 v1, v3, v1, s4 11908; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 11909; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11910; GFX900-NEXT: s_waitcnt vmcnt(0) 11911; GFX900-NEXT: s_setpc_b64 s[30:31] 11912; 11913; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_7: 11914; GFX90A: ; %bb.0: 11915; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11916; GFX90A-NEXT: ;;#ASMSTART 11917; GFX90A-NEXT: ; def v[0:1] 11918; GFX90A-NEXT: ;;#ASMEND 11919; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11920; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11921; GFX90A-NEXT: ;;#ASMSTART 11922; GFX90A-NEXT: ; def v[2:3] 11923; GFX90A-NEXT: ;;#ASMEND 11924; GFX90A-NEXT: v_perm_b32 v1, v3, v1, s4 11925; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 11926; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] 11927; GFX90A-NEXT: s_waitcnt vmcnt(0) 11928; GFX90A-NEXT: s_setpc_b64 s[30:31] 11929; 11930; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_7: 11931; GFX940: ; %bb.0: 11932; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11933; GFX940-NEXT: ;;#ASMSTART 11934; GFX940-NEXT: ; def v[0:1] 11935; GFX940-NEXT: ;;#ASMEND 11936; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11937; GFX940-NEXT: v_mov_b32_e32 v4, 0 11938; GFX940-NEXT: ;;#ASMSTART 11939; GFX940-NEXT: ; def v[2:3] 11940; GFX940-NEXT: ;;#ASMEND 11941; GFX940-NEXT: s_nop 0 11942; GFX940-NEXT: v_perm_b32 v1, v3, v1, s2 11943; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 11944; GFX940-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1 11945; GFX940-NEXT: s_waitcnt vmcnt(0) 11946; GFX940-NEXT: s_setpc_b64 s[30:31] 11947 %vec0 = call <4 x i16> asm "; def $0", "=v"() 11948 %vec1 = call <4 x i16> asm "; def $0", "=v"() 11949 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 7> 11950 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 11951 ret void 11952} 11953 11954define void @v_shuffle_v4i16_v4i16__7_7_4_7(ptr addrspace(1) inreg %ptr) { 11955; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_7: 11956; GFX900: ; %bb.0: 11957; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11958; GFX900-NEXT: s_mov_b32 s4, 0xffff 11959; GFX900-NEXT: ;;#ASMSTART 11960; GFX900-NEXT: ; def v[0:1] 11961; GFX900-NEXT: ;;#ASMEND 11962; GFX900-NEXT: v_bfi_b32 v2, s4, v0, v1 11963; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11964; GFX900-NEXT: v_mov_b32_e32 v3, 0 11965; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 11966; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 11967; GFX900-NEXT: s_waitcnt vmcnt(0) 11968; GFX900-NEXT: s_setpc_b64 s[30:31] 11969; 11970; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_7: 11971; GFX90A: ; %bb.0: 11972; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11973; GFX90A-NEXT: s_mov_b32 s4, 0xffff 11974; GFX90A-NEXT: ;;#ASMSTART 11975; GFX90A-NEXT: ; def v[0:1] 11976; GFX90A-NEXT: ;;#ASMEND 11977; GFX90A-NEXT: v_bfi_b32 v3, s4, v0, v1 11978; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11979; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11980; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 11981; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 11982; GFX90A-NEXT: s_waitcnt vmcnt(0) 11983; GFX90A-NEXT: s_setpc_b64 s[30:31] 11984; 11985; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_7: 11986; GFX940: ; %bb.0: 11987; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11988; GFX940-NEXT: s_mov_b32 s2, 0xffff 11989; GFX940-NEXT: ;;#ASMSTART 11990; GFX940-NEXT: ; def v[0:1] 11991; GFX940-NEXT: ;;#ASMEND 11992; GFX940-NEXT: v_mov_b32_e32 v4, 0 11993; GFX940-NEXT: v_bfi_b32 v3, s2, v0, v1 11994; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11995; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 11996; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 11997; GFX940-NEXT: s_waitcnt vmcnt(0) 11998; GFX940-NEXT: s_setpc_b64 s[30:31] 11999 %vec0 = call <4 x i16> asm "; def $0", "=v"() 12000 %vec1 = call <4 x i16> asm "; def $0", "=v"() 12001 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 7> 12002 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 12003 ret void 12004} 12005 12006define void @v_shuffle_v4i16_v4i16__7_7_5_7(ptr addrspace(1) inreg %ptr) { 12007; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_7: 12008; GFX900: ; %bb.0: 12009; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12010; GFX900-NEXT: ;;#ASMSTART 12011; GFX900-NEXT: ; def v[0:1] 12012; GFX900-NEXT: ;;#ASMEND 12013; GFX900-NEXT: s_mov_b32 s4, 0x7060302 12014; GFX900-NEXT: v_mov_b32_e32 v3, 0 12015; GFX900-NEXT: v_perm_b32 v2, v1, v0, s4 12016; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 12017; GFX900-NEXT: global_store_dwordx2 v3, v[1:2], s[16:17] 12018; GFX900-NEXT: s_waitcnt vmcnt(0) 12019; GFX900-NEXT: s_setpc_b64 s[30:31] 12020; 12021; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_7: 12022; GFX90A: ; %bb.0: 12023; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12024; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 12025; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12026; GFX90A-NEXT: ;;#ASMSTART 12027; GFX90A-NEXT: ; def v[0:1] 12028; GFX90A-NEXT: ;;#ASMEND 12029; GFX90A-NEXT: v_perm_b32 v3, v1, v0, s4 12030; GFX90A-NEXT: v_perm_b32 v2, v1, v1, s4 12031; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] 12032; GFX90A-NEXT: s_waitcnt vmcnt(0) 12033; GFX90A-NEXT: s_setpc_b64 s[30:31] 12034; 12035; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_7: 12036; GFX940: ; %bb.0: 12037; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12038; GFX940-NEXT: s_mov_b32 s2, 0x7060302 12039; GFX940-NEXT: v_mov_b32_e32 v4, 0 12040; GFX940-NEXT: ;;#ASMSTART 12041; GFX940-NEXT: ; def v[0:1] 12042; GFX940-NEXT: ;;#ASMEND 12043; GFX940-NEXT: s_nop 0 12044; GFX940-NEXT: v_perm_b32 v3, v1, v0, s2 12045; GFX940-NEXT: v_perm_b32 v2, v1, v1, s2 12046; GFX940-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1 12047; GFX940-NEXT: s_waitcnt vmcnt(0) 12048; GFX940-NEXT: s_setpc_b64 s[30:31] 12049 %vec0 = call <4 x i16> asm "; def $0", "=v"() 12050 %vec1 = call <4 x i16> asm "; def $0", "=v"() 12051 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 7> 12052 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 12053 ret void 12054} 12055 12056define void @v_shuffle_v4i16_v4i16__7_7_6_7(ptr addrspace(1) inreg %ptr) { 12057; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_7: 12058; GFX900: ; %bb.0: 12059; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12060; GFX900-NEXT: ;;#ASMSTART 12061; GFX900-NEXT: ; def v[0:1] 12062; GFX900-NEXT: ;;#ASMEND 12063; GFX900-NEXT: s_mov_b32 s4, 0x7060302 12064; GFX900-NEXT: v_mov_b32_e32 v2, 0 12065; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 12066; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 12067; GFX900-NEXT: s_waitcnt vmcnt(0) 12068; GFX900-NEXT: s_setpc_b64 s[30:31] 12069; 12070; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_7: 12071; GFX90A: ; %bb.0: 12072; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12073; GFX90A-NEXT: ;;#ASMSTART 12074; GFX90A-NEXT: ; def v[0:1] 12075; GFX90A-NEXT: ;;#ASMEND 12076; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 12077; GFX90A-NEXT: v_mov_b32_e32 v2, 0 12078; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 12079; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[16:17] 12080; GFX90A-NEXT: s_waitcnt vmcnt(0) 12081; GFX90A-NEXT: s_setpc_b64 s[30:31] 12082; 12083; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_7: 12084; GFX940: ; %bb.0: 12085; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12086; GFX940-NEXT: ;;#ASMSTART 12087; GFX940-NEXT: ; def v[0:1] 12088; GFX940-NEXT: ;;#ASMEND 12089; GFX940-NEXT: s_mov_b32 s2, 0x7060302 12090; GFX940-NEXT: v_mov_b32_e32 v2, 0 12091; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 12092; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 12093; GFX940-NEXT: s_waitcnt vmcnt(0) 12094; GFX940-NEXT: s_setpc_b64 s[30:31] 12095 %vec0 = call <4 x i16> asm "; def $0", "=v"() 12096 %vec1 = call <4 x i16> asm "; def $0", "=v"() 12097 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 7> 12098 store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8 12099 ret void 12100} 12101 12102define void @s_shuffle_v4i16_v4i16__u_u_u_u() { 12103; GFX9-LABEL: s_shuffle_v4i16_v4i16__u_u_u_u: 12104; GFX9: ; %bb.0: 12105; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12106; GFX9-NEXT: ;;#ASMSTART 12107; GFX9-NEXT: ; use s[8:9] 12108; GFX9-NEXT: ;;#ASMEND 12109; GFX9-NEXT: s_setpc_b64 s[30:31] 12110 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12111 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> poison 12112 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12113 ret void 12114} 12115 12116define void @s_shuffle_v4i16_v4i16__0_u_u_u() { 12117; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_u_u_u: 12118; GFX900: ; %bb.0: 12119; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12120; GFX900-NEXT: ;;#ASMSTART 12121; GFX900-NEXT: ; def s[8:9] 12122; GFX900-NEXT: ;;#ASMEND 12123; GFX900-NEXT: ;;#ASMSTART 12124; GFX900-NEXT: ; use s[8:9] 12125; GFX900-NEXT: ;;#ASMEND 12126; GFX900-NEXT: s_setpc_b64 s[30:31] 12127; 12128; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_u_u_u: 12129; GFX90A: ; %bb.0: 12130; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12131; GFX90A-NEXT: ;;#ASMSTART 12132; GFX90A-NEXT: ; def s[8:9] 12133; GFX90A-NEXT: ;;#ASMEND 12134; GFX90A-NEXT: ;;#ASMSTART 12135; GFX90A-NEXT: ; use s[8:9] 12136; GFX90A-NEXT: ;;#ASMEND 12137; GFX90A-NEXT: s_setpc_b64 s[30:31] 12138; 12139; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_u_u_u: 12140; GFX940: ; %bb.0: 12141; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12142; GFX940-NEXT: ;;#ASMSTART 12143; GFX940-NEXT: ; def s[8:9] 12144; GFX940-NEXT: ;;#ASMEND 12145; GFX940-NEXT: s_nop 0 12146; GFX940-NEXT: ;;#ASMSTART 12147; GFX940-NEXT: ; use s[8:9] 12148; GFX940-NEXT: ;;#ASMEND 12149; GFX940-NEXT: s_setpc_b64 s[30:31] 12150 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12151 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 12152 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12153 ret void 12154} 12155 12156define void @s_shuffle_v4i16_v4i16__1_u_u_u() { 12157; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_u_u_u: 12158; GFX900: ; %bb.0: 12159; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12160; GFX900-NEXT: ;;#ASMSTART 12161; GFX900-NEXT: ; def s[4:5] 12162; GFX900-NEXT: ;;#ASMEND 12163; GFX900-NEXT: s_lshr_b32 s8, s4, 16 12164; GFX900-NEXT: ;;#ASMSTART 12165; GFX900-NEXT: ; use s[8:9] 12166; GFX900-NEXT: ;;#ASMEND 12167; GFX900-NEXT: s_setpc_b64 s[30:31] 12168; 12169; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_u_u_u: 12170; GFX90A: ; %bb.0: 12171; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12172; GFX90A-NEXT: ;;#ASMSTART 12173; GFX90A-NEXT: ; def s[4:5] 12174; GFX90A-NEXT: ;;#ASMEND 12175; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 12176; GFX90A-NEXT: ;;#ASMSTART 12177; GFX90A-NEXT: ; use s[8:9] 12178; GFX90A-NEXT: ;;#ASMEND 12179; GFX90A-NEXT: s_setpc_b64 s[30:31] 12180; 12181; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_u_u_u: 12182; GFX940: ; %bb.0: 12183; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12184; GFX940-NEXT: ;;#ASMSTART 12185; GFX940-NEXT: ; def s[0:1] 12186; GFX940-NEXT: ;;#ASMEND 12187; GFX940-NEXT: s_lshr_b32 s8, s0, 16 12188; GFX940-NEXT: ;;#ASMSTART 12189; GFX940-NEXT: ; use s[8:9] 12190; GFX940-NEXT: ;;#ASMEND 12191; GFX940-NEXT: s_setpc_b64 s[30:31] 12192 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12193 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 12194 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12195 ret void 12196} 12197 12198define void @s_shuffle_v4i16_v4i16__2_u_u_u() { 12199; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_u_u_u: 12200; GFX900: ; %bb.0: 12201; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12202; GFX900-NEXT: ;;#ASMSTART 12203; GFX900-NEXT: ; def s[4:5] 12204; GFX900-NEXT: ;;#ASMEND 12205; GFX900-NEXT: s_mov_b32 s8, s5 12206; GFX900-NEXT: ;;#ASMSTART 12207; GFX900-NEXT: ; use s[8:9] 12208; GFX900-NEXT: ;;#ASMEND 12209; GFX900-NEXT: s_setpc_b64 s[30:31] 12210; 12211; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_u_u_u: 12212; GFX90A: ; %bb.0: 12213; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12214; GFX90A-NEXT: ;;#ASMSTART 12215; GFX90A-NEXT: ; def s[4:5] 12216; GFX90A-NEXT: ;;#ASMEND 12217; GFX90A-NEXT: s_mov_b32 s8, s5 12218; GFX90A-NEXT: ;;#ASMSTART 12219; GFX90A-NEXT: ; use s[8:9] 12220; GFX90A-NEXT: ;;#ASMEND 12221; GFX90A-NEXT: s_setpc_b64 s[30:31] 12222; 12223; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_u_u_u: 12224; GFX940: ; %bb.0: 12225; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12226; GFX940-NEXT: ;;#ASMSTART 12227; GFX940-NEXT: ; def s[0:1] 12228; GFX940-NEXT: ;;#ASMEND 12229; GFX940-NEXT: s_mov_b32 s8, s1 12230; GFX940-NEXT: ;;#ASMSTART 12231; GFX940-NEXT: ; use s[8:9] 12232; GFX940-NEXT: ;;#ASMEND 12233; GFX940-NEXT: s_setpc_b64 s[30:31] 12234 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12235 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 12236 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12237 ret void 12238} 12239 12240define void @s_shuffle_v4i16_v4i16__3_u_u_u() { 12241; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_u_u_u: 12242; GFX900: ; %bb.0: 12243; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12244; GFX900-NEXT: ;;#ASMSTART 12245; GFX900-NEXT: ; def s[4:5] 12246; GFX900-NEXT: ;;#ASMEND 12247; GFX900-NEXT: s_lshr_b32 s8, s5, 16 12248; GFX900-NEXT: ;;#ASMSTART 12249; GFX900-NEXT: ; use s[8:9] 12250; GFX900-NEXT: ;;#ASMEND 12251; GFX900-NEXT: s_setpc_b64 s[30:31] 12252; 12253; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_u_u_u: 12254; GFX90A: ; %bb.0: 12255; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12256; GFX90A-NEXT: ;;#ASMSTART 12257; GFX90A-NEXT: ; def s[4:5] 12258; GFX90A-NEXT: ;;#ASMEND 12259; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 12260; GFX90A-NEXT: ;;#ASMSTART 12261; GFX90A-NEXT: ; use s[8:9] 12262; GFX90A-NEXT: ;;#ASMEND 12263; GFX90A-NEXT: s_setpc_b64 s[30:31] 12264; 12265; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_u_u_u: 12266; GFX940: ; %bb.0: 12267; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12268; GFX940-NEXT: ;;#ASMSTART 12269; GFX940-NEXT: ; def s[0:1] 12270; GFX940-NEXT: ;;#ASMEND 12271; GFX940-NEXT: s_lshr_b32 s8, s1, 16 12272; GFX940-NEXT: ;;#ASMSTART 12273; GFX940-NEXT: ; use s[8:9] 12274; GFX940-NEXT: ;;#ASMEND 12275; GFX940-NEXT: s_setpc_b64 s[30:31] 12276 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12277 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison> 12278 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12279 ret void 12280} 12281 12282define void @s_shuffle_v4i16_v4i16__4_u_u_u() { 12283; GFX9-LABEL: s_shuffle_v4i16_v4i16__4_u_u_u: 12284; GFX9: ; %bb.0: 12285; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12286; GFX9-NEXT: ;;#ASMSTART 12287; GFX9-NEXT: ; use s[8:9] 12288; GFX9-NEXT: ;;#ASMEND 12289; GFX9-NEXT: s_setpc_b64 s[30:31] 12290 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12291 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison> 12292 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12293 ret void 12294} 12295 12296define void @s_shuffle_v4i16_v4i16__5_u_u_u() { 12297; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_u_u_u: 12298; GFX900: ; %bb.0: 12299; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12300; GFX900-NEXT: ;;#ASMSTART 12301; GFX900-NEXT: ; def s[4:5] 12302; GFX900-NEXT: ;;#ASMEND 12303; GFX900-NEXT: s_lshr_b32 s8, s4, 16 12304; GFX900-NEXT: ;;#ASMSTART 12305; GFX900-NEXT: ; use s[8:9] 12306; GFX900-NEXT: ;;#ASMEND 12307; GFX900-NEXT: s_setpc_b64 s[30:31] 12308; 12309; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_u_u_u: 12310; GFX90A: ; %bb.0: 12311; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12312; GFX90A-NEXT: ;;#ASMSTART 12313; GFX90A-NEXT: ; def s[4:5] 12314; GFX90A-NEXT: ;;#ASMEND 12315; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 12316; GFX90A-NEXT: ;;#ASMSTART 12317; GFX90A-NEXT: ; use s[8:9] 12318; GFX90A-NEXT: ;;#ASMEND 12319; GFX90A-NEXT: s_setpc_b64 s[30:31] 12320; 12321; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_u_u_u: 12322; GFX940: ; %bb.0: 12323; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12324; GFX940-NEXT: ;;#ASMSTART 12325; GFX940-NEXT: ; def s[0:1] 12326; GFX940-NEXT: ;;#ASMEND 12327; GFX940-NEXT: s_lshr_b32 s8, s0, 16 12328; GFX940-NEXT: ;;#ASMSTART 12329; GFX940-NEXT: ; use s[8:9] 12330; GFX940-NEXT: ;;#ASMEND 12331; GFX940-NEXT: s_setpc_b64 s[30:31] 12332 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12333 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12334 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison> 12335 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12336 ret void 12337} 12338 12339define void @s_shuffle_v4i16_v4i16__6_u_u_u() { 12340; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_u_u_u: 12341; GFX900: ; %bb.0: 12342; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12343; GFX900-NEXT: ;;#ASMSTART 12344; GFX900-NEXT: ; def s[4:5] 12345; GFX900-NEXT: ;;#ASMEND 12346; GFX900-NEXT: s_mov_b32 s8, s5 12347; GFX900-NEXT: ;;#ASMSTART 12348; GFX900-NEXT: ; use s[8:9] 12349; GFX900-NEXT: ;;#ASMEND 12350; GFX900-NEXT: s_setpc_b64 s[30:31] 12351; 12352; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_u_u_u: 12353; GFX90A: ; %bb.0: 12354; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12355; GFX90A-NEXT: ;;#ASMSTART 12356; GFX90A-NEXT: ; def s[4:5] 12357; GFX90A-NEXT: ;;#ASMEND 12358; GFX90A-NEXT: s_mov_b32 s8, s5 12359; GFX90A-NEXT: ;;#ASMSTART 12360; GFX90A-NEXT: ; use s[8:9] 12361; GFX90A-NEXT: ;;#ASMEND 12362; GFX90A-NEXT: s_setpc_b64 s[30:31] 12363; 12364; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_u_u_u: 12365; GFX940: ; %bb.0: 12366; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12367; GFX940-NEXT: ;;#ASMSTART 12368; GFX940-NEXT: ; def s[0:1] 12369; GFX940-NEXT: ;;#ASMEND 12370; GFX940-NEXT: s_mov_b32 s8, s1 12371; GFX940-NEXT: ;;#ASMSTART 12372; GFX940-NEXT: ; use s[8:9] 12373; GFX940-NEXT: ;;#ASMEND 12374; GFX940-NEXT: s_setpc_b64 s[30:31] 12375 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12376 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12377 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 poison, i32 poison, i32 poison> 12378 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12379 ret void 12380} 12381 12382define void @s_shuffle_v4i16_v4i16__7_u_u_u() { 12383; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_u_u: 12384; GFX900: ; %bb.0: 12385; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12386; GFX900-NEXT: ;;#ASMSTART 12387; GFX900-NEXT: ; def s[4:5] 12388; GFX900-NEXT: ;;#ASMEND 12389; GFX900-NEXT: s_lshr_b32 s8, s5, 16 12390; GFX900-NEXT: ;;#ASMSTART 12391; GFX900-NEXT: ; use s[8:9] 12392; GFX900-NEXT: ;;#ASMEND 12393; GFX900-NEXT: s_setpc_b64 s[30:31] 12394; 12395; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_u_u: 12396; GFX90A: ; %bb.0: 12397; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12398; GFX90A-NEXT: ;;#ASMSTART 12399; GFX90A-NEXT: ; def s[4:5] 12400; GFX90A-NEXT: ;;#ASMEND 12401; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 12402; GFX90A-NEXT: ;;#ASMSTART 12403; GFX90A-NEXT: ; use s[8:9] 12404; GFX90A-NEXT: ;;#ASMEND 12405; GFX90A-NEXT: s_setpc_b64 s[30:31] 12406; 12407; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_u_u: 12408; GFX940: ; %bb.0: 12409; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12410; GFX940-NEXT: ;;#ASMSTART 12411; GFX940-NEXT: ; def s[0:1] 12412; GFX940-NEXT: ;;#ASMEND 12413; GFX940-NEXT: s_lshr_b32 s8, s1, 16 12414; GFX940-NEXT: ;;#ASMSTART 12415; GFX940-NEXT: ; use s[8:9] 12416; GFX940-NEXT: ;;#ASMEND 12417; GFX940-NEXT: s_setpc_b64 s[30:31] 12418 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12419 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12420 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 poison, i32 poison> 12421 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12422 ret void 12423} 12424 12425define void @s_shuffle_v4i16_v4i16__7_0_u_u() { 12426; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_u_u: 12427; GFX900: ; %bb.0: 12428; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12429; GFX900-NEXT: ;;#ASMSTART 12430; GFX900-NEXT: ; def s[4:5] 12431; GFX900-NEXT: ;;#ASMEND 12432; GFX900-NEXT: ;;#ASMSTART 12433; GFX900-NEXT: ; def s[6:7] 12434; GFX900-NEXT: ;;#ASMEND 12435; GFX900-NEXT: s_lshr_b32 s5, s7, 16 12436; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12437; GFX900-NEXT: ;;#ASMSTART 12438; GFX900-NEXT: ; use s[8:9] 12439; GFX900-NEXT: ;;#ASMEND 12440; GFX900-NEXT: s_setpc_b64 s[30:31] 12441; 12442; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_u_u: 12443; GFX90A: ; %bb.0: 12444; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12445; GFX90A-NEXT: ;;#ASMSTART 12446; GFX90A-NEXT: ; def s[4:5] 12447; GFX90A-NEXT: ;;#ASMEND 12448; GFX90A-NEXT: ;;#ASMSTART 12449; GFX90A-NEXT: ; def s[6:7] 12450; GFX90A-NEXT: ;;#ASMEND 12451; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 12452; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12453; GFX90A-NEXT: ;;#ASMSTART 12454; GFX90A-NEXT: ; use s[8:9] 12455; GFX90A-NEXT: ;;#ASMEND 12456; GFX90A-NEXT: s_setpc_b64 s[30:31] 12457; 12458; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_u_u: 12459; GFX940: ; %bb.0: 12460; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12461; GFX940-NEXT: ;;#ASMSTART 12462; GFX940-NEXT: ; def s[0:1] 12463; GFX940-NEXT: ;;#ASMEND 12464; GFX940-NEXT: ;;#ASMSTART 12465; GFX940-NEXT: ; def s[2:3] 12466; GFX940-NEXT: ;;#ASMEND 12467; GFX940-NEXT: s_lshr_b32 s1, s3, 16 12468; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12469; GFX940-NEXT: ;;#ASMSTART 12470; GFX940-NEXT: ; use s[8:9] 12471; GFX940-NEXT: ;;#ASMEND 12472; GFX940-NEXT: s_setpc_b64 s[30:31] 12473 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12474 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12475 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 poison, i32 poison> 12476 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12477 ret void 12478} 12479 12480define void @s_shuffle_v4i16_v4i16__7_1_u_u() { 12481; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_u_u: 12482; GFX900: ; %bb.0: 12483; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12484; GFX900-NEXT: ;;#ASMSTART 12485; GFX900-NEXT: ; def s[4:5] 12486; GFX900-NEXT: ;;#ASMEND 12487; GFX900-NEXT: ;;#ASMSTART 12488; GFX900-NEXT: ; def s[6:7] 12489; GFX900-NEXT: ;;#ASMEND 12490; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 12491; GFX900-NEXT: ;;#ASMSTART 12492; GFX900-NEXT: ; use s[8:9] 12493; GFX900-NEXT: ;;#ASMEND 12494; GFX900-NEXT: s_setpc_b64 s[30:31] 12495; 12496; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_u_u: 12497; GFX90A: ; %bb.0: 12498; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12499; GFX90A-NEXT: ;;#ASMSTART 12500; GFX90A-NEXT: ; def s[4:5] 12501; GFX90A-NEXT: ;;#ASMEND 12502; GFX90A-NEXT: ;;#ASMSTART 12503; GFX90A-NEXT: ; def s[6:7] 12504; GFX90A-NEXT: ;;#ASMEND 12505; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 12506; GFX90A-NEXT: ;;#ASMSTART 12507; GFX90A-NEXT: ; use s[8:9] 12508; GFX90A-NEXT: ;;#ASMEND 12509; GFX90A-NEXT: s_setpc_b64 s[30:31] 12510; 12511; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_u_u: 12512; GFX940: ; %bb.0: 12513; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12514; GFX940-NEXT: ;;#ASMSTART 12515; GFX940-NEXT: ; def s[0:1] 12516; GFX940-NEXT: ;;#ASMEND 12517; GFX940-NEXT: ;;#ASMSTART 12518; GFX940-NEXT: ; def s[2:3] 12519; GFX940-NEXT: ;;#ASMEND 12520; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 12521; GFX940-NEXT: ;;#ASMSTART 12522; GFX940-NEXT: ; use s[8:9] 12523; GFX940-NEXT: ;;#ASMEND 12524; GFX940-NEXT: s_setpc_b64 s[30:31] 12525 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12526 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12527 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 poison, i32 poison> 12528 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12529 ret void 12530} 12531 12532define void @s_shuffle_v4i16_v4i16__7_2_u_u() { 12533; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_u_u: 12534; GFX900: ; %bb.0: 12535; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12536; GFX900-NEXT: ;;#ASMSTART 12537; GFX900-NEXT: ; def s[4:5] 12538; GFX900-NEXT: ;;#ASMEND 12539; GFX900-NEXT: ;;#ASMSTART 12540; GFX900-NEXT: ; def s[6:7] 12541; GFX900-NEXT: ;;#ASMEND 12542; GFX900-NEXT: s_lshr_b32 s4, s7, 16 12543; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 12544; GFX900-NEXT: ;;#ASMSTART 12545; GFX900-NEXT: ; use s[8:9] 12546; GFX900-NEXT: ;;#ASMEND 12547; GFX900-NEXT: s_setpc_b64 s[30:31] 12548; 12549; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_u_u: 12550; GFX90A: ; %bb.0: 12551; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12552; GFX90A-NEXT: ;;#ASMSTART 12553; GFX90A-NEXT: ; def s[4:5] 12554; GFX90A-NEXT: ;;#ASMEND 12555; GFX90A-NEXT: ;;#ASMSTART 12556; GFX90A-NEXT: ; def s[6:7] 12557; GFX90A-NEXT: ;;#ASMEND 12558; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 12559; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 12560; GFX90A-NEXT: ;;#ASMSTART 12561; GFX90A-NEXT: ; use s[8:9] 12562; GFX90A-NEXT: ;;#ASMEND 12563; GFX90A-NEXT: s_setpc_b64 s[30:31] 12564; 12565; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_u_u: 12566; GFX940: ; %bb.0: 12567; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12568; GFX940-NEXT: ;;#ASMSTART 12569; GFX940-NEXT: ; def s[0:1] 12570; GFX940-NEXT: ;;#ASMEND 12571; GFX940-NEXT: ;;#ASMSTART 12572; GFX940-NEXT: ; def s[2:3] 12573; GFX940-NEXT: ;;#ASMEND 12574; GFX940-NEXT: s_lshr_b32 s0, s3, 16 12575; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 12576; GFX940-NEXT: ;;#ASMSTART 12577; GFX940-NEXT: ; use s[8:9] 12578; GFX940-NEXT: ;;#ASMEND 12579; GFX940-NEXT: s_setpc_b64 s[30:31] 12580 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12581 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12582 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 poison, i32 poison> 12583 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12584 ret void 12585} 12586 12587define void @s_shuffle_v4i16_v4i16__7_3_u_u() { 12588; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_u_u: 12589; GFX900: ; %bb.0: 12590; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12591; GFX900-NEXT: ;;#ASMSTART 12592; GFX900-NEXT: ; def s[4:5] 12593; GFX900-NEXT: ;;#ASMEND 12594; GFX900-NEXT: ;;#ASMSTART 12595; GFX900-NEXT: ; def s[6:7] 12596; GFX900-NEXT: ;;#ASMEND 12597; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 12598; GFX900-NEXT: ;;#ASMSTART 12599; GFX900-NEXT: ; use s[8:9] 12600; GFX900-NEXT: ;;#ASMEND 12601; GFX900-NEXT: s_setpc_b64 s[30:31] 12602; 12603; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_u_u: 12604; GFX90A: ; %bb.0: 12605; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12606; GFX90A-NEXT: ;;#ASMSTART 12607; GFX90A-NEXT: ; def s[4:5] 12608; GFX90A-NEXT: ;;#ASMEND 12609; GFX90A-NEXT: ;;#ASMSTART 12610; GFX90A-NEXT: ; def s[6:7] 12611; GFX90A-NEXT: ;;#ASMEND 12612; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 12613; GFX90A-NEXT: ;;#ASMSTART 12614; GFX90A-NEXT: ; use s[8:9] 12615; GFX90A-NEXT: ;;#ASMEND 12616; GFX90A-NEXT: s_setpc_b64 s[30:31] 12617; 12618; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_u_u: 12619; GFX940: ; %bb.0: 12620; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12621; GFX940-NEXT: ;;#ASMSTART 12622; GFX940-NEXT: ; def s[0:1] 12623; GFX940-NEXT: ;;#ASMEND 12624; GFX940-NEXT: ;;#ASMSTART 12625; GFX940-NEXT: ; def s[2:3] 12626; GFX940-NEXT: ;;#ASMEND 12627; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 12628; GFX940-NEXT: ;;#ASMSTART 12629; GFX940-NEXT: ; use s[8:9] 12630; GFX940-NEXT: ;;#ASMEND 12631; GFX940-NEXT: s_setpc_b64 s[30:31] 12632 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12633 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12634 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 poison, i32 poison> 12635 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12636 ret void 12637} 12638 12639define void @s_shuffle_v4i16_v4i16__7_4_u_u() { 12640; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_u_u: 12641; GFX900: ; %bb.0: 12642; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12643; GFX900-NEXT: ;;#ASMSTART 12644; GFX900-NEXT: ; def s[4:5] 12645; GFX900-NEXT: ;;#ASMEND 12646; GFX900-NEXT: s_lshr_b32 s5, s5, 16 12647; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12648; GFX900-NEXT: ;;#ASMSTART 12649; GFX900-NEXT: ; use s[8:9] 12650; GFX900-NEXT: ;;#ASMEND 12651; GFX900-NEXT: s_setpc_b64 s[30:31] 12652; 12653; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_u_u: 12654; GFX90A: ; %bb.0: 12655; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12656; GFX90A-NEXT: ;;#ASMSTART 12657; GFX90A-NEXT: ; def s[4:5] 12658; GFX90A-NEXT: ;;#ASMEND 12659; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 12660; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12661; GFX90A-NEXT: ;;#ASMSTART 12662; GFX90A-NEXT: ; use s[8:9] 12663; GFX90A-NEXT: ;;#ASMEND 12664; GFX90A-NEXT: s_setpc_b64 s[30:31] 12665; 12666; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_u_u: 12667; GFX940: ; %bb.0: 12668; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12669; GFX940-NEXT: ;;#ASMSTART 12670; GFX940-NEXT: ; def s[0:1] 12671; GFX940-NEXT: ;;#ASMEND 12672; GFX940-NEXT: s_lshr_b32 s1, s1, 16 12673; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12674; GFX940-NEXT: ;;#ASMSTART 12675; GFX940-NEXT: ; use s[8:9] 12676; GFX940-NEXT: ;;#ASMEND 12677; GFX940-NEXT: s_setpc_b64 s[30:31] 12678 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12679 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12680 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 poison, i32 poison> 12681 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12682 ret void 12683} 12684 12685define void @s_shuffle_v4i16_v4i16__7_5_u_u() { 12686; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_u_u: 12687; GFX900: ; %bb.0: 12688; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12689; GFX900-NEXT: ;;#ASMSTART 12690; GFX900-NEXT: ; def s[4:5] 12691; GFX900-NEXT: ;;#ASMEND 12692; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 12693; GFX900-NEXT: ;;#ASMSTART 12694; GFX900-NEXT: ; use s[8:9] 12695; GFX900-NEXT: ;;#ASMEND 12696; GFX900-NEXT: s_setpc_b64 s[30:31] 12697; 12698; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_u_u: 12699; GFX90A: ; %bb.0: 12700; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12701; GFX90A-NEXT: ;;#ASMSTART 12702; GFX90A-NEXT: ; def s[4:5] 12703; GFX90A-NEXT: ;;#ASMEND 12704; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 12705; GFX90A-NEXT: ;;#ASMSTART 12706; GFX90A-NEXT: ; use s[8:9] 12707; GFX90A-NEXT: ;;#ASMEND 12708; GFX90A-NEXT: s_setpc_b64 s[30:31] 12709; 12710; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_u_u: 12711; GFX940: ; %bb.0: 12712; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12713; GFX940-NEXT: ;;#ASMSTART 12714; GFX940-NEXT: ; def s[0:1] 12715; GFX940-NEXT: ;;#ASMEND 12716; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 12717; GFX940-NEXT: ;;#ASMSTART 12718; GFX940-NEXT: ; use s[8:9] 12719; GFX940-NEXT: ;;#ASMEND 12720; GFX940-NEXT: s_setpc_b64 s[30:31] 12721 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12722 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12723 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 poison, i32 poison> 12724 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12725 ret void 12726} 12727 12728define void @s_shuffle_v4i16_v4i16__7_6_u_u() { 12729; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_u_u: 12730; GFX900: ; %bb.0: 12731; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12732; GFX900-NEXT: ;;#ASMSTART 12733; GFX900-NEXT: ; def s[4:5] 12734; GFX900-NEXT: ;;#ASMEND 12735; GFX900-NEXT: s_lshr_b32 s4, s5, 16 12736; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 12737; GFX900-NEXT: ;;#ASMSTART 12738; GFX900-NEXT: ; use s[8:9] 12739; GFX900-NEXT: ;;#ASMEND 12740; GFX900-NEXT: s_setpc_b64 s[30:31] 12741; 12742; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_u_u: 12743; GFX90A: ; %bb.0: 12744; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12745; GFX90A-NEXT: ;;#ASMSTART 12746; GFX90A-NEXT: ; def s[4:5] 12747; GFX90A-NEXT: ;;#ASMEND 12748; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 12749; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 12750; GFX90A-NEXT: ;;#ASMSTART 12751; GFX90A-NEXT: ; use s[8:9] 12752; GFX90A-NEXT: ;;#ASMEND 12753; GFX90A-NEXT: s_setpc_b64 s[30:31] 12754; 12755; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_u_u: 12756; GFX940: ; %bb.0: 12757; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12758; GFX940-NEXT: ;;#ASMSTART 12759; GFX940-NEXT: ; def s[0:1] 12760; GFX940-NEXT: ;;#ASMEND 12761; GFX940-NEXT: s_lshr_b32 s0, s1, 16 12762; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 12763; GFX940-NEXT: ;;#ASMSTART 12764; GFX940-NEXT: ; use s[8:9] 12765; GFX940-NEXT: ;;#ASMEND 12766; GFX940-NEXT: s_setpc_b64 s[30:31] 12767 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12768 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12769 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 poison, i32 poison> 12770 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12771 ret void 12772} 12773 12774define void @s_shuffle_v4i16_v4i16__7_7_u_u() { 12775; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_u: 12776; GFX900: ; %bb.0: 12777; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12778; GFX900-NEXT: ;;#ASMSTART 12779; GFX900-NEXT: ; def s[4:5] 12780; GFX900-NEXT: ;;#ASMEND 12781; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 12782; GFX900-NEXT: ;;#ASMSTART 12783; GFX900-NEXT: ; use s[8:9] 12784; GFX900-NEXT: ;;#ASMEND 12785; GFX900-NEXT: s_setpc_b64 s[30:31] 12786; 12787; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_u: 12788; GFX90A: ; %bb.0: 12789; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12790; GFX90A-NEXT: ;;#ASMSTART 12791; GFX90A-NEXT: ; def s[4:5] 12792; GFX90A-NEXT: ;;#ASMEND 12793; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 12794; GFX90A-NEXT: ;;#ASMSTART 12795; GFX90A-NEXT: ; use s[8:9] 12796; GFX90A-NEXT: ;;#ASMEND 12797; GFX90A-NEXT: s_setpc_b64 s[30:31] 12798; 12799; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_u: 12800; GFX940: ; %bb.0: 12801; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12802; GFX940-NEXT: ;;#ASMSTART 12803; GFX940-NEXT: ; def s[0:1] 12804; GFX940-NEXT: ;;#ASMEND 12805; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 12806; GFX940-NEXT: ;;#ASMSTART 12807; GFX940-NEXT: ; use s[8:9] 12808; GFX940-NEXT: ;;#ASMEND 12809; GFX940-NEXT: s_setpc_b64 s[30:31] 12810 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12811 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12812 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 poison> 12813 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12814 ret void 12815} 12816 12817define void @s_shuffle_v4i16_v4i16__7_7_0_u() { 12818; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_u: 12819; GFX900: ; %bb.0: 12820; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12821; GFX900-NEXT: ;;#ASMSTART 12822; GFX900-NEXT: ; def s[4:5] 12823; GFX900-NEXT: ;;#ASMEND 12824; GFX900-NEXT: ;;#ASMSTART 12825; GFX900-NEXT: ; def s[6:7] 12826; GFX900-NEXT: ;;#ASMEND 12827; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 12828; GFX900-NEXT: s_mov_b32 s9, s4 12829; GFX900-NEXT: ;;#ASMSTART 12830; GFX900-NEXT: ; use s[8:9] 12831; GFX900-NEXT: ;;#ASMEND 12832; GFX900-NEXT: s_setpc_b64 s[30:31] 12833; 12834; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_u: 12835; GFX90A: ; %bb.0: 12836; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12837; GFX90A-NEXT: ;;#ASMSTART 12838; GFX90A-NEXT: ; def s[4:5] 12839; GFX90A-NEXT: ;;#ASMEND 12840; GFX90A-NEXT: ;;#ASMSTART 12841; GFX90A-NEXT: ; def s[6:7] 12842; GFX90A-NEXT: ;;#ASMEND 12843; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 12844; GFX90A-NEXT: s_mov_b32 s9, s4 12845; GFX90A-NEXT: ;;#ASMSTART 12846; GFX90A-NEXT: ; use s[8:9] 12847; GFX90A-NEXT: ;;#ASMEND 12848; GFX90A-NEXT: s_setpc_b64 s[30:31] 12849; 12850; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_u: 12851; GFX940: ; %bb.0: 12852; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12853; GFX940-NEXT: ;;#ASMSTART 12854; GFX940-NEXT: ; def s[0:1] 12855; GFX940-NEXT: ;;#ASMEND 12856; GFX940-NEXT: ;;#ASMSTART 12857; GFX940-NEXT: ; def s[2:3] 12858; GFX940-NEXT: ;;#ASMEND 12859; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 12860; GFX940-NEXT: s_mov_b32 s9, s0 12861; GFX940-NEXT: ;;#ASMSTART 12862; GFX940-NEXT: ; use s[8:9] 12863; GFX940-NEXT: ;;#ASMEND 12864; GFX940-NEXT: s_setpc_b64 s[30:31] 12865 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12866 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12867 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 poison> 12868 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12869 ret void 12870} 12871 12872define void @s_shuffle_v4i16_v4i16__7_7_1_u() { 12873; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_u: 12874; GFX900: ; %bb.0: 12875; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12876; GFX900-NEXT: ;;#ASMSTART 12877; GFX900-NEXT: ; def s[4:5] 12878; GFX900-NEXT: ;;#ASMEND 12879; GFX900-NEXT: ;;#ASMSTART 12880; GFX900-NEXT: ; def s[6:7] 12881; GFX900-NEXT: ;;#ASMEND 12882; GFX900-NEXT: s_lshr_b32 s9, s4, 16 12883; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 12884; GFX900-NEXT: ;;#ASMSTART 12885; GFX900-NEXT: ; use s[8:9] 12886; GFX900-NEXT: ;;#ASMEND 12887; GFX900-NEXT: s_setpc_b64 s[30:31] 12888; 12889; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_u: 12890; GFX90A: ; %bb.0: 12891; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12892; GFX90A-NEXT: ;;#ASMSTART 12893; GFX90A-NEXT: ; def s[4:5] 12894; GFX90A-NEXT: ;;#ASMEND 12895; GFX90A-NEXT: ;;#ASMSTART 12896; GFX90A-NEXT: ; def s[6:7] 12897; GFX90A-NEXT: ;;#ASMEND 12898; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 12899; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 12900; GFX90A-NEXT: ;;#ASMSTART 12901; GFX90A-NEXT: ; use s[8:9] 12902; GFX90A-NEXT: ;;#ASMEND 12903; GFX90A-NEXT: s_setpc_b64 s[30:31] 12904; 12905; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_u: 12906; GFX940: ; %bb.0: 12907; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12908; GFX940-NEXT: ;;#ASMSTART 12909; GFX940-NEXT: ; def s[0:1] 12910; GFX940-NEXT: ;;#ASMEND 12911; GFX940-NEXT: ;;#ASMSTART 12912; GFX940-NEXT: ; def s[2:3] 12913; GFX940-NEXT: ;;#ASMEND 12914; GFX940-NEXT: s_lshr_b32 s9, s0, 16 12915; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 12916; GFX940-NEXT: ;;#ASMSTART 12917; GFX940-NEXT: ; use s[8:9] 12918; GFX940-NEXT: ;;#ASMEND 12919; GFX940-NEXT: s_setpc_b64 s[30:31] 12920 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12921 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12922 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 poison> 12923 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12924 ret void 12925} 12926 12927define void @s_shuffle_v4i16_v4i16__7_7_2_u() { 12928; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_u: 12929; GFX900: ; %bb.0: 12930; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12931; GFX900-NEXT: ;;#ASMSTART 12932; GFX900-NEXT: ; def s[8:9] 12933; GFX900-NEXT: ;;#ASMEND 12934; GFX900-NEXT: ;;#ASMSTART 12935; GFX900-NEXT: ; def s[4:5] 12936; GFX900-NEXT: ;;#ASMEND 12937; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 12938; GFX900-NEXT: ;;#ASMSTART 12939; GFX900-NEXT: ; use s[8:9] 12940; GFX900-NEXT: ;;#ASMEND 12941; GFX900-NEXT: s_setpc_b64 s[30:31] 12942; 12943; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_u: 12944; GFX90A: ; %bb.0: 12945; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12946; GFX90A-NEXT: ;;#ASMSTART 12947; GFX90A-NEXT: ; def s[8:9] 12948; GFX90A-NEXT: ;;#ASMEND 12949; GFX90A-NEXT: ;;#ASMSTART 12950; GFX90A-NEXT: ; def s[4:5] 12951; GFX90A-NEXT: ;;#ASMEND 12952; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 12953; GFX90A-NEXT: ;;#ASMSTART 12954; GFX90A-NEXT: ; use s[8:9] 12955; GFX90A-NEXT: ;;#ASMEND 12956; GFX90A-NEXT: s_setpc_b64 s[30:31] 12957; 12958; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_u: 12959; GFX940: ; %bb.0: 12960; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12961; GFX940-NEXT: ;;#ASMSTART 12962; GFX940-NEXT: ; def s[8:9] 12963; GFX940-NEXT: ;;#ASMEND 12964; GFX940-NEXT: ;;#ASMSTART 12965; GFX940-NEXT: ; def s[0:1] 12966; GFX940-NEXT: ;;#ASMEND 12967; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 12968; GFX940-NEXT: ;;#ASMSTART 12969; GFX940-NEXT: ; use s[8:9] 12970; GFX940-NEXT: ;;#ASMEND 12971; GFX940-NEXT: s_setpc_b64 s[30:31] 12972 %vec0 = call <4 x i16> asm "; def $0", "=s"() 12973 %vec1 = call <4 x i16> asm "; def $0", "=s"() 12974 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 poison> 12975 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 12976 ret void 12977} 12978 12979define void @s_shuffle_v4i16_v4i16__7_7_3_u() { 12980; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_u: 12981; GFX900: ; %bb.0: 12982; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12983; GFX900-NEXT: ;;#ASMSTART 12984; GFX900-NEXT: ; def s[4:5] 12985; GFX900-NEXT: ;;#ASMEND 12986; GFX900-NEXT: ;;#ASMSTART 12987; GFX900-NEXT: ; def s[6:7] 12988; GFX900-NEXT: ;;#ASMEND 12989; GFX900-NEXT: s_lshr_b32 s9, s5, 16 12990; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 12991; GFX900-NEXT: ;;#ASMSTART 12992; GFX900-NEXT: ; use s[8:9] 12993; GFX900-NEXT: ;;#ASMEND 12994; GFX900-NEXT: s_setpc_b64 s[30:31] 12995; 12996; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_u: 12997; GFX90A: ; %bb.0: 12998; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12999; GFX90A-NEXT: ;;#ASMSTART 13000; GFX90A-NEXT: ; def s[4:5] 13001; GFX90A-NEXT: ;;#ASMEND 13002; GFX90A-NEXT: ;;#ASMSTART 13003; GFX90A-NEXT: ; def s[6:7] 13004; GFX90A-NEXT: ;;#ASMEND 13005; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 13006; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 13007; GFX90A-NEXT: ;;#ASMSTART 13008; GFX90A-NEXT: ; use s[8:9] 13009; GFX90A-NEXT: ;;#ASMEND 13010; GFX90A-NEXT: s_setpc_b64 s[30:31] 13011; 13012; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_u: 13013; GFX940: ; %bb.0: 13014; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13015; GFX940-NEXT: ;;#ASMSTART 13016; GFX940-NEXT: ; def s[0:1] 13017; GFX940-NEXT: ;;#ASMEND 13018; GFX940-NEXT: ;;#ASMSTART 13019; GFX940-NEXT: ; def s[2:3] 13020; GFX940-NEXT: ;;#ASMEND 13021; GFX940-NEXT: s_lshr_b32 s9, s1, 16 13022; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 13023; GFX940-NEXT: ;;#ASMSTART 13024; GFX940-NEXT: ; use s[8:9] 13025; GFX940-NEXT: ;;#ASMEND 13026; GFX940-NEXT: s_setpc_b64 s[30:31] 13027 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13028 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13029 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 poison> 13030 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13031 ret void 13032} 13033 13034define void @s_shuffle_v4i16_v4i16__7_7_4_u() { 13035; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_u: 13036; GFX900: ; %bb.0: 13037; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13038; GFX900-NEXT: ;;#ASMSTART 13039; GFX900-NEXT: ; def s[4:5] 13040; GFX900-NEXT: ;;#ASMEND 13041; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13042; GFX900-NEXT: s_mov_b32 s9, s4 13043; GFX900-NEXT: ;;#ASMSTART 13044; GFX900-NEXT: ; use s[8:9] 13045; GFX900-NEXT: ;;#ASMEND 13046; GFX900-NEXT: s_setpc_b64 s[30:31] 13047; 13048; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_u: 13049; GFX90A: ; %bb.0: 13050; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13051; GFX90A-NEXT: ;;#ASMSTART 13052; GFX90A-NEXT: ; def s[4:5] 13053; GFX90A-NEXT: ;;#ASMEND 13054; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13055; GFX90A-NEXT: s_mov_b32 s9, s4 13056; GFX90A-NEXT: ;;#ASMSTART 13057; GFX90A-NEXT: ; use s[8:9] 13058; GFX90A-NEXT: ;;#ASMEND 13059; GFX90A-NEXT: s_setpc_b64 s[30:31] 13060; 13061; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_u: 13062; GFX940: ; %bb.0: 13063; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13064; GFX940-NEXT: ;;#ASMSTART 13065; GFX940-NEXT: ; def s[0:1] 13066; GFX940-NEXT: ;;#ASMEND 13067; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 13068; GFX940-NEXT: s_mov_b32 s9, s0 13069; GFX940-NEXT: ;;#ASMSTART 13070; GFX940-NEXT: ; use s[8:9] 13071; GFX940-NEXT: ;;#ASMEND 13072; GFX940-NEXT: s_setpc_b64 s[30:31] 13073 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13074 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13075 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 poison> 13076 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13077 ret void 13078} 13079 13080define void @s_shuffle_v4i16_v4i16__7_7_5_u() { 13081; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_u: 13082; GFX900: ; %bb.0: 13083; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13084; GFX900-NEXT: ;;#ASMSTART 13085; GFX900-NEXT: ; def s[4:5] 13086; GFX900-NEXT: ;;#ASMEND 13087; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13088; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13089; GFX900-NEXT: ;;#ASMSTART 13090; GFX900-NEXT: ; use s[8:9] 13091; GFX900-NEXT: ;;#ASMEND 13092; GFX900-NEXT: s_setpc_b64 s[30:31] 13093; 13094; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_u: 13095; GFX90A: ; %bb.0: 13096; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13097; GFX90A-NEXT: ;;#ASMSTART 13098; GFX90A-NEXT: ; def s[4:5] 13099; GFX90A-NEXT: ;;#ASMEND 13100; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13101; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13102; GFX90A-NEXT: ;;#ASMSTART 13103; GFX90A-NEXT: ; use s[8:9] 13104; GFX90A-NEXT: ;;#ASMEND 13105; GFX90A-NEXT: s_setpc_b64 s[30:31] 13106; 13107; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_u: 13108; GFX940: ; %bb.0: 13109; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13110; GFX940-NEXT: ;;#ASMSTART 13111; GFX940-NEXT: ; def s[0:1] 13112; GFX940-NEXT: ;;#ASMEND 13113; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13114; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 13115; GFX940-NEXT: ;;#ASMSTART 13116; GFX940-NEXT: ; use s[8:9] 13117; GFX940-NEXT: ;;#ASMEND 13118; GFX940-NEXT: s_setpc_b64 s[30:31] 13119 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13120 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13121 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 poison> 13122 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13123 ret void 13124} 13125 13126define void @s_shuffle_v4i16_v4i16__7_7_6_u() { 13127; GFX9-LABEL: s_shuffle_v4i16_v4i16__7_7_6_u: 13128; GFX9: ; %bb.0: 13129; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13130; GFX9-NEXT: ;;#ASMSTART 13131; GFX9-NEXT: ; def s[8:9] 13132; GFX9-NEXT: ;;#ASMEND 13133; GFX9-NEXT: s_pack_hh_b32_b16 s8, s9, s9 13134; GFX9-NEXT: ;;#ASMSTART 13135; GFX9-NEXT: ; use s[8:9] 13136; GFX9-NEXT: ;;#ASMEND 13137; GFX9-NEXT: s_setpc_b64 s[30:31] 13138 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13139 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13140 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison> 13141 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13142 ret void 13143} 13144 13145define void @s_shuffle_v4i16_v4i16__7_7_7_u() { 13146; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_u: 13147; GFX900: ; %bb.0: 13148; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13149; GFX900-NEXT: ;;#ASMSTART 13150; GFX900-NEXT: ; def s[4:5] 13151; GFX900-NEXT: ;;#ASMEND 13152; GFX900-NEXT: s_lshr_b32 s9, s5, 16 13153; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13154; GFX900-NEXT: ;;#ASMSTART 13155; GFX900-NEXT: ; use s[8:9] 13156; GFX900-NEXT: ;;#ASMEND 13157; GFX900-NEXT: s_setpc_b64 s[30:31] 13158; 13159; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_u: 13160; GFX90A: ; %bb.0: 13161; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13162; GFX90A-NEXT: ;;#ASMSTART 13163; GFX90A-NEXT: ; def s[4:5] 13164; GFX90A-NEXT: ;;#ASMEND 13165; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 13166; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13167; GFX90A-NEXT: ;;#ASMSTART 13168; GFX90A-NEXT: ; use s[8:9] 13169; GFX90A-NEXT: ;;#ASMEND 13170; GFX90A-NEXT: s_setpc_b64 s[30:31] 13171; 13172; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_u: 13173; GFX940: ; %bb.0: 13174; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13175; GFX940-NEXT: ;;#ASMSTART 13176; GFX940-NEXT: ; def s[0:1] 13177; GFX940-NEXT: ;;#ASMEND 13178; GFX940-NEXT: s_lshr_b32 s9, s1, 16 13179; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 13180; GFX940-NEXT: ;;#ASMSTART 13181; GFX940-NEXT: ; use s[8:9] 13182; GFX940-NEXT: ;;#ASMEND 13183; GFX940-NEXT: s_setpc_b64 s[30:31] 13184 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13185 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13186 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 poison> 13187 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13188 ret void 13189} 13190 13191define void @s_shuffle_v4i16_v4i16__7_7_7_0() { 13192; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_0: 13193; GFX900: ; %bb.0: 13194; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13195; GFX900-NEXT: ;;#ASMSTART 13196; GFX900-NEXT: ; def s[4:5] 13197; GFX900-NEXT: ;;#ASMEND 13198; GFX900-NEXT: ;;#ASMSTART 13199; GFX900-NEXT: ; def s[6:7] 13200; GFX900-NEXT: ;;#ASMEND 13201; GFX900-NEXT: s_lshr_b32 s5, s7, 16 13202; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4 13203; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 13204; GFX900-NEXT: ;;#ASMSTART 13205; GFX900-NEXT: ; use s[8:9] 13206; GFX900-NEXT: ;;#ASMEND 13207; GFX900-NEXT: s_setpc_b64 s[30:31] 13208; 13209; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_0: 13210; GFX90A: ; %bb.0: 13211; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13212; GFX90A-NEXT: ;;#ASMSTART 13213; GFX90A-NEXT: ; def s[4:5] 13214; GFX90A-NEXT: ;;#ASMEND 13215; GFX90A-NEXT: ;;#ASMSTART 13216; GFX90A-NEXT: ; def s[6:7] 13217; GFX90A-NEXT: ;;#ASMEND 13218; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 13219; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4 13220; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 13221; GFX90A-NEXT: ;;#ASMSTART 13222; GFX90A-NEXT: ; use s[8:9] 13223; GFX90A-NEXT: ;;#ASMEND 13224; GFX90A-NEXT: s_setpc_b64 s[30:31] 13225; 13226; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_0: 13227; GFX940: ; %bb.0: 13228; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13229; GFX940-NEXT: ;;#ASMSTART 13230; GFX940-NEXT: ; def s[0:1] 13231; GFX940-NEXT: ;;#ASMEND 13232; GFX940-NEXT: ;;#ASMSTART 13233; GFX940-NEXT: ; def s[2:3] 13234; GFX940-NEXT: ;;#ASMEND 13235; GFX940-NEXT: s_lshr_b32 s1, s3, 16 13236; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 13237; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 13238; GFX940-NEXT: ;;#ASMSTART 13239; GFX940-NEXT: ; use s[8:9] 13240; GFX940-NEXT: ;;#ASMEND 13241; GFX940-NEXT: s_setpc_b64 s[30:31] 13242 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13243 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13244 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 0> 13245 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13246 ret void 13247} 13248 13249define void @s_shuffle_v4i16_v4i16__7_7_7_1() { 13250; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_1: 13251; GFX900: ; %bb.0: 13252; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13253; GFX900-NEXT: ;;#ASMSTART 13254; GFX900-NEXT: ; def s[4:5] 13255; GFX900-NEXT: ;;#ASMEND 13256; GFX900-NEXT: ;;#ASMSTART 13257; GFX900-NEXT: ; def s[6:7] 13258; GFX900-NEXT: ;;#ASMEND 13259; GFX900-NEXT: s_pack_hh_b32_b16 s9, s7, s4 13260; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 13261; GFX900-NEXT: ;;#ASMSTART 13262; GFX900-NEXT: ; use s[8:9] 13263; GFX900-NEXT: ;;#ASMEND 13264; GFX900-NEXT: s_setpc_b64 s[30:31] 13265; 13266; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_1: 13267; GFX90A: ; %bb.0: 13268; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13269; GFX90A-NEXT: ;;#ASMSTART 13270; GFX90A-NEXT: ; def s[4:5] 13271; GFX90A-NEXT: ;;#ASMEND 13272; GFX90A-NEXT: ;;#ASMSTART 13273; GFX90A-NEXT: ; def s[6:7] 13274; GFX90A-NEXT: ;;#ASMEND 13275; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s7, s4 13276; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 13277; GFX90A-NEXT: ;;#ASMSTART 13278; GFX90A-NEXT: ; use s[8:9] 13279; GFX90A-NEXT: ;;#ASMEND 13280; GFX90A-NEXT: s_setpc_b64 s[30:31] 13281; 13282; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_1: 13283; GFX940: ; %bb.0: 13284; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13285; GFX940-NEXT: ;;#ASMSTART 13286; GFX940-NEXT: ; def s[0:1] 13287; GFX940-NEXT: ;;#ASMEND 13288; GFX940-NEXT: ;;#ASMSTART 13289; GFX940-NEXT: ; def s[2:3] 13290; GFX940-NEXT: ;;#ASMEND 13291; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s0 13292; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 13293; GFX940-NEXT: ;;#ASMSTART 13294; GFX940-NEXT: ; use s[8:9] 13295; GFX940-NEXT: ;;#ASMEND 13296; GFX940-NEXT: s_setpc_b64 s[30:31] 13297 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13298 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13299 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 1> 13300 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13301 ret void 13302} 13303 13304define void @s_shuffle_v4i16_v4i16__7_7_7_2() { 13305; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_2: 13306; GFX900: ; %bb.0: 13307; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13308; GFX900-NEXT: ;;#ASMSTART 13309; GFX900-NEXT: ; def s[4:5] 13310; GFX900-NEXT: ;;#ASMEND 13311; GFX900-NEXT: ;;#ASMSTART 13312; GFX900-NEXT: ; def s[6:7] 13313; GFX900-NEXT: ;;#ASMEND 13314; GFX900-NEXT: s_lshr_b32 s4, s7, 16 13315; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 13316; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 13317; GFX900-NEXT: ;;#ASMSTART 13318; GFX900-NEXT: ; use s[8:9] 13319; GFX900-NEXT: ;;#ASMEND 13320; GFX900-NEXT: s_setpc_b64 s[30:31] 13321; 13322; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_2: 13323; GFX90A: ; %bb.0: 13324; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13325; GFX90A-NEXT: ;;#ASMSTART 13326; GFX90A-NEXT: ; def s[4:5] 13327; GFX90A-NEXT: ;;#ASMEND 13328; GFX90A-NEXT: ;;#ASMSTART 13329; GFX90A-NEXT: ; def s[6:7] 13330; GFX90A-NEXT: ;;#ASMEND 13331; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 13332; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 13333; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 13334; GFX90A-NEXT: ;;#ASMSTART 13335; GFX90A-NEXT: ; use s[8:9] 13336; GFX90A-NEXT: ;;#ASMEND 13337; GFX90A-NEXT: s_setpc_b64 s[30:31] 13338; 13339; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_2: 13340; GFX940: ; %bb.0: 13341; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13342; GFX940-NEXT: ;;#ASMSTART 13343; GFX940-NEXT: ; def s[0:1] 13344; GFX940-NEXT: ;;#ASMEND 13345; GFX940-NEXT: ;;#ASMSTART 13346; GFX940-NEXT: ; def s[2:3] 13347; GFX940-NEXT: ;;#ASMEND 13348; GFX940-NEXT: s_lshr_b32 s0, s3, 16 13349; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 13350; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 13351; GFX940-NEXT: ;;#ASMSTART 13352; GFX940-NEXT: ; use s[8:9] 13353; GFX940-NEXT: ;;#ASMEND 13354; GFX940-NEXT: s_setpc_b64 s[30:31] 13355 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13356 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13357 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 2> 13358 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13359 ret void 13360} 13361 13362define void @s_shuffle_v4i16_v4i16__7_7_7_3() { 13363; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_3: 13364; GFX900: ; %bb.0: 13365; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13366; GFX900-NEXT: ;;#ASMSTART 13367; GFX900-NEXT: ; def s[4:5] 13368; GFX900-NEXT: ;;#ASMEND 13369; GFX900-NEXT: ;;#ASMSTART 13370; GFX900-NEXT: ; def s[6:7] 13371; GFX900-NEXT: ;;#ASMEND 13372; GFX900-NEXT: s_pack_hh_b32_b16 s9, s7, s5 13373; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 13374; GFX900-NEXT: ;;#ASMSTART 13375; GFX900-NEXT: ; use s[8:9] 13376; GFX900-NEXT: ;;#ASMEND 13377; GFX900-NEXT: s_setpc_b64 s[30:31] 13378; 13379; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_3: 13380; GFX90A: ; %bb.0: 13381; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13382; GFX90A-NEXT: ;;#ASMSTART 13383; GFX90A-NEXT: ; def s[4:5] 13384; GFX90A-NEXT: ;;#ASMEND 13385; GFX90A-NEXT: ;;#ASMSTART 13386; GFX90A-NEXT: ; def s[6:7] 13387; GFX90A-NEXT: ;;#ASMEND 13388; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s7, s5 13389; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 13390; GFX90A-NEXT: ;;#ASMSTART 13391; GFX90A-NEXT: ; use s[8:9] 13392; GFX90A-NEXT: ;;#ASMEND 13393; GFX90A-NEXT: s_setpc_b64 s[30:31] 13394; 13395; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_3: 13396; GFX940: ; %bb.0: 13397; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13398; GFX940-NEXT: ;;#ASMSTART 13399; GFX940-NEXT: ; def s[0:1] 13400; GFX940-NEXT: ;;#ASMEND 13401; GFX940-NEXT: ;;#ASMSTART 13402; GFX940-NEXT: ; def s[2:3] 13403; GFX940-NEXT: ;;#ASMEND 13404; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s1 13405; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 13406; GFX940-NEXT: ;;#ASMSTART 13407; GFX940-NEXT: ; use s[8:9] 13408; GFX940-NEXT: ;;#ASMEND 13409; GFX940-NEXT: s_setpc_b64 s[30:31] 13410 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13411 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13412 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 3> 13413 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13414 ret void 13415} 13416 13417define void @s_shuffle_v4i16_v4i16__7_7_7_4() { 13418; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_4: 13419; GFX900: ; %bb.0: 13420; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13421; GFX900-NEXT: ;;#ASMSTART 13422; GFX900-NEXT: ; def s[4:5] 13423; GFX900-NEXT: ;;#ASMEND 13424; GFX900-NEXT: s_lshr_b32 s6, s5, 16 13425; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s4 13426; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13427; GFX900-NEXT: ;;#ASMSTART 13428; GFX900-NEXT: ; use s[8:9] 13429; GFX900-NEXT: ;;#ASMEND 13430; GFX900-NEXT: s_setpc_b64 s[30:31] 13431; 13432; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_4: 13433; GFX90A: ; %bb.0: 13434; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13435; GFX90A-NEXT: ;;#ASMSTART 13436; GFX90A-NEXT: ; def s[4:5] 13437; GFX90A-NEXT: ;;#ASMEND 13438; GFX90A-NEXT: s_lshr_b32 s6, s5, 16 13439; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s4 13440; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13441; GFX90A-NEXT: ;;#ASMSTART 13442; GFX90A-NEXT: ; use s[8:9] 13443; GFX90A-NEXT: ;;#ASMEND 13444; GFX90A-NEXT: s_setpc_b64 s[30:31] 13445; 13446; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_4: 13447; GFX940: ; %bb.0: 13448; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13449; GFX940-NEXT: ;;#ASMSTART 13450; GFX940-NEXT: ; def s[0:1] 13451; GFX940-NEXT: ;;#ASMEND 13452; GFX940-NEXT: s_lshr_b32 s2, s1, 16 13453; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 13454; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 13455; GFX940-NEXT: ;;#ASMSTART 13456; GFX940-NEXT: ; use s[8:9] 13457; GFX940-NEXT: ;;#ASMEND 13458; GFX940-NEXT: s_setpc_b64 s[30:31] 13459 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13460 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13461 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 4> 13462 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13463 ret void 13464} 13465 13466define void @s_shuffle_v4i16_v4i16__7_7_7_5() { 13467; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_5: 13468; GFX900: ; %bb.0: 13469; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13470; GFX900-NEXT: ;;#ASMSTART 13471; GFX900-NEXT: ; def s[4:5] 13472; GFX900-NEXT: ;;#ASMEND 13473; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s4 13474; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13475; GFX900-NEXT: ;;#ASMSTART 13476; GFX900-NEXT: ; use s[8:9] 13477; GFX900-NEXT: ;;#ASMEND 13478; GFX900-NEXT: s_setpc_b64 s[30:31] 13479; 13480; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_5: 13481; GFX90A: ; %bb.0: 13482; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13483; GFX90A-NEXT: ;;#ASMSTART 13484; GFX90A-NEXT: ; def s[4:5] 13485; GFX90A-NEXT: ;;#ASMEND 13486; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s4 13487; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13488; GFX90A-NEXT: ;;#ASMSTART 13489; GFX90A-NEXT: ; use s[8:9] 13490; GFX90A-NEXT: ;;#ASMEND 13491; GFX90A-NEXT: s_setpc_b64 s[30:31] 13492; 13493; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_5: 13494; GFX940: ; %bb.0: 13495; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13496; GFX940-NEXT: ;;#ASMSTART 13497; GFX940-NEXT: ; def s[0:1] 13498; GFX940-NEXT: ;;#ASMEND 13499; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s0 13500; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 13501; GFX940-NEXT: ;;#ASMSTART 13502; GFX940-NEXT: ; use s[8:9] 13503; GFX940-NEXT: ;;#ASMEND 13504; GFX940-NEXT: s_setpc_b64 s[30:31] 13505 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13506 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13507 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 5> 13508 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13509 ret void 13510} 13511 13512define void @s_shuffle_v4i16_v4i16__7_7_7_6() { 13513; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_6: 13514; GFX900: ; %bb.0: 13515; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13516; GFX900-NEXT: ;;#ASMSTART 13517; GFX900-NEXT: ; def s[4:5] 13518; GFX900-NEXT: ;;#ASMEND 13519; GFX900-NEXT: s_lshr_b32 s4, s5, 16 13520; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 13521; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13522; GFX900-NEXT: ;;#ASMSTART 13523; GFX900-NEXT: ; use s[8:9] 13524; GFX900-NEXT: ;;#ASMEND 13525; GFX900-NEXT: s_setpc_b64 s[30:31] 13526; 13527; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_6: 13528; GFX90A: ; %bb.0: 13529; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13530; GFX90A-NEXT: ;;#ASMSTART 13531; GFX90A-NEXT: ; def s[4:5] 13532; GFX90A-NEXT: ;;#ASMEND 13533; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 13534; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 13535; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13536; GFX90A-NEXT: ;;#ASMSTART 13537; GFX90A-NEXT: ; use s[8:9] 13538; GFX90A-NEXT: ;;#ASMEND 13539; GFX90A-NEXT: s_setpc_b64 s[30:31] 13540; 13541; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_6: 13542; GFX940: ; %bb.0: 13543; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13544; GFX940-NEXT: ;;#ASMSTART 13545; GFX940-NEXT: ; def s[0:1] 13546; GFX940-NEXT: ;;#ASMEND 13547; GFX940-NEXT: s_lshr_b32 s0, s1, 16 13548; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 13549; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 13550; GFX940-NEXT: ;;#ASMSTART 13551; GFX940-NEXT: ; use s[8:9] 13552; GFX940-NEXT: ;;#ASMEND 13553; GFX940-NEXT: s_setpc_b64 s[30:31] 13554 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13555 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13556 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 6> 13557 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13558 ret void 13559} 13560 13561define void @s_shuffle_v4i16_v4i16__7_7_7_7() { 13562; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_7: 13563; GFX900: ; %bb.0: 13564; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13565; GFX900-NEXT: ;;#ASMSTART 13566; GFX900-NEXT: ; def s[4:5] 13567; GFX900-NEXT: ;;#ASMEND 13568; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13569; GFX900-NEXT: s_mov_b32 s9, s8 13570; GFX900-NEXT: ;;#ASMSTART 13571; GFX900-NEXT: ; use s[8:9] 13572; GFX900-NEXT: ;;#ASMEND 13573; GFX900-NEXT: s_setpc_b64 s[30:31] 13574; 13575; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_7: 13576; GFX90A: ; %bb.0: 13577; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13578; GFX90A-NEXT: ;;#ASMSTART 13579; GFX90A-NEXT: ; def s[4:5] 13580; GFX90A-NEXT: ;;#ASMEND 13581; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 13582; GFX90A-NEXT: s_mov_b32 s9, s8 13583; GFX90A-NEXT: ;;#ASMSTART 13584; GFX90A-NEXT: ; use s[8:9] 13585; GFX90A-NEXT: ;;#ASMEND 13586; GFX90A-NEXT: s_setpc_b64 s[30:31] 13587; 13588; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_7: 13589; GFX940: ; %bb.0: 13590; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13591; GFX940-NEXT: ;;#ASMSTART 13592; GFX940-NEXT: ; def s[0:1] 13593; GFX940-NEXT: ;;#ASMEND 13594; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 13595; GFX940-NEXT: s_mov_b32 s9, s8 13596; GFX940-NEXT: ;;#ASMSTART 13597; GFX940-NEXT: ; use s[8:9] 13598; GFX940-NEXT: ;;#ASMEND 13599; GFX940-NEXT: s_setpc_b64 s[30:31] 13600 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13601 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13602 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 13603 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13604 ret void 13605} 13606 13607define void @s_shuffle_v4i16_v4i16__u_0_0_0() { 13608; GFX900-LABEL: s_shuffle_v4i16_v4i16__u_0_0_0: 13609; GFX900: ; %bb.0: 13610; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13611; GFX900-NEXT: ;;#ASMSTART 13612; GFX900-NEXT: ; def s[4:5] 13613; GFX900-NEXT: ;;#ASMEND 13614; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13615; GFX900-NEXT: s_lshl_b32 s8, s4, 16 13616; GFX900-NEXT: ;;#ASMSTART 13617; GFX900-NEXT: ; use s[8:9] 13618; GFX900-NEXT: ;;#ASMEND 13619; GFX900-NEXT: s_setpc_b64 s[30:31] 13620; 13621; GFX90A-LABEL: s_shuffle_v4i16_v4i16__u_0_0_0: 13622; GFX90A: ; %bb.0: 13623; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13624; GFX90A-NEXT: ;;#ASMSTART 13625; GFX90A-NEXT: ; def s[4:5] 13626; GFX90A-NEXT: ;;#ASMEND 13627; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13628; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 13629; GFX90A-NEXT: ;;#ASMSTART 13630; GFX90A-NEXT: ; use s[8:9] 13631; GFX90A-NEXT: ;;#ASMEND 13632; GFX90A-NEXT: s_setpc_b64 s[30:31] 13633; 13634; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_0_0_0: 13635; GFX940: ; %bb.0: 13636; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13637; GFX940-NEXT: ;;#ASMSTART 13638; GFX940-NEXT: ; def s[0:1] 13639; GFX940-NEXT: ;;#ASMEND 13640; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 13641; GFX940-NEXT: s_lshl_b32 s8, s0, 16 13642; GFX940-NEXT: ;;#ASMSTART 13643; GFX940-NEXT: ; use s[8:9] 13644; GFX940-NEXT: ;;#ASMEND 13645; GFX940-NEXT: s_setpc_b64 s[30:31] 13646 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13647 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0> 13648 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13649 ret void 13650} 13651 13652define void @s_shuffle_v4i16_v4i16__0_0_0_0() { 13653; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_0_0_0: 13654; GFX900: ; %bb.0: 13655; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13656; GFX900-NEXT: ;;#ASMSTART 13657; GFX900-NEXT: ; def s[4:5] 13658; GFX900-NEXT: ;;#ASMEND 13659; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 13660; GFX900-NEXT: s_mov_b32 s9, s8 13661; GFX900-NEXT: ;;#ASMSTART 13662; GFX900-NEXT: ; use s[8:9] 13663; GFX900-NEXT: ;;#ASMEND 13664; GFX900-NEXT: s_setpc_b64 s[30:31] 13665; 13666; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_0_0_0: 13667; GFX90A: ; %bb.0: 13668; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13669; GFX90A-NEXT: ;;#ASMSTART 13670; GFX90A-NEXT: ; def s[4:5] 13671; GFX90A-NEXT: ;;#ASMEND 13672; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 13673; GFX90A-NEXT: s_mov_b32 s9, s8 13674; GFX90A-NEXT: ;;#ASMSTART 13675; GFX90A-NEXT: ; use s[8:9] 13676; GFX90A-NEXT: ;;#ASMEND 13677; GFX90A-NEXT: s_setpc_b64 s[30:31] 13678; 13679; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_0_0_0: 13680; GFX940: ; %bb.0: 13681; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13682; GFX940-NEXT: ;;#ASMSTART 13683; GFX940-NEXT: ; def s[0:1] 13684; GFX940-NEXT: ;;#ASMEND 13685; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 13686; GFX940-NEXT: s_mov_b32 s9, s8 13687; GFX940-NEXT: ;;#ASMSTART 13688; GFX940-NEXT: ; use s[8:9] 13689; GFX940-NEXT: ;;#ASMEND 13690; GFX940-NEXT: s_setpc_b64 s[30:31] 13691 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13692 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> zeroinitializer 13693 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13694 ret void 13695} 13696 13697define void @s_shuffle_v4i16_v4i16__1_0_0_0() { 13698; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_0_0_0: 13699; GFX900: ; %bb.0: 13700; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13701; GFX900-NEXT: ;;#ASMSTART 13702; GFX900-NEXT: ; def s[4:5] 13703; GFX900-NEXT: ;;#ASMEND 13704; GFX900-NEXT: s_lshr_b32 s5, s4, 16 13705; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13706; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13707; GFX900-NEXT: ;;#ASMSTART 13708; GFX900-NEXT: ; use s[8:9] 13709; GFX900-NEXT: ;;#ASMEND 13710; GFX900-NEXT: s_setpc_b64 s[30:31] 13711; 13712; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_0_0_0: 13713; GFX90A: ; %bb.0: 13714; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13715; GFX90A-NEXT: ;;#ASMSTART 13716; GFX90A-NEXT: ; def s[4:5] 13717; GFX90A-NEXT: ;;#ASMEND 13718; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 13719; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13720; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13721; GFX90A-NEXT: ;;#ASMSTART 13722; GFX90A-NEXT: ; use s[8:9] 13723; GFX90A-NEXT: ;;#ASMEND 13724; GFX90A-NEXT: s_setpc_b64 s[30:31] 13725; 13726; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_0_0_0: 13727; GFX940: ; %bb.0: 13728; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13729; GFX940-NEXT: ;;#ASMSTART 13730; GFX940-NEXT: ; def s[0:1] 13731; GFX940-NEXT: ;;#ASMEND 13732; GFX940-NEXT: s_lshr_b32 s1, s0, 16 13733; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 13734; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 13735; GFX940-NEXT: ;;#ASMSTART 13736; GFX940-NEXT: ; use s[8:9] 13737; GFX940-NEXT: ;;#ASMEND 13738; GFX940-NEXT: s_setpc_b64 s[30:31] 13739 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13740 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 13741 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13742 ret void 13743} 13744 13745define void @s_shuffle_v4i16_v4i16__2_0_0_0() { 13746; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_0_0_0: 13747; GFX900: ; %bb.0: 13748; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13749; GFX900-NEXT: ;;#ASMSTART 13750; GFX900-NEXT: ; def s[4:5] 13751; GFX900-NEXT: ;;#ASMEND 13752; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13753; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13754; GFX900-NEXT: ;;#ASMSTART 13755; GFX900-NEXT: ; use s[8:9] 13756; GFX900-NEXT: ;;#ASMEND 13757; GFX900-NEXT: s_setpc_b64 s[30:31] 13758; 13759; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_0_0_0: 13760; GFX90A: ; %bb.0: 13761; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13762; GFX90A-NEXT: ;;#ASMSTART 13763; GFX90A-NEXT: ; def s[4:5] 13764; GFX90A-NEXT: ;;#ASMEND 13765; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13766; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13767; GFX90A-NEXT: ;;#ASMSTART 13768; GFX90A-NEXT: ; use s[8:9] 13769; GFX90A-NEXT: ;;#ASMEND 13770; GFX90A-NEXT: s_setpc_b64 s[30:31] 13771; 13772; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_0_0_0: 13773; GFX940: ; %bb.0: 13774; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13775; GFX940-NEXT: ;;#ASMSTART 13776; GFX940-NEXT: ; def s[0:1] 13777; GFX940-NEXT: ;;#ASMEND 13778; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 13779; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 13780; GFX940-NEXT: ;;#ASMSTART 13781; GFX940-NEXT: ; use s[8:9] 13782; GFX940-NEXT: ;;#ASMEND 13783; GFX940-NEXT: s_setpc_b64 s[30:31] 13784 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13785 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0> 13786 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13787 ret void 13788} 13789 13790define void @s_shuffle_v4i16_v4i16__3_0_0_0() { 13791; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_0_0_0: 13792; GFX900: ; %bb.0: 13793; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13794; GFX900-NEXT: ;;#ASMSTART 13795; GFX900-NEXT: ; def s[4:5] 13796; GFX900-NEXT: ;;#ASMEND 13797; GFX900-NEXT: s_lshr_b32 s5, s5, 16 13798; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13799; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13800; GFX900-NEXT: ;;#ASMSTART 13801; GFX900-NEXT: ; use s[8:9] 13802; GFX900-NEXT: ;;#ASMEND 13803; GFX900-NEXT: s_setpc_b64 s[30:31] 13804; 13805; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_0_0_0: 13806; GFX90A: ; %bb.0: 13807; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13808; GFX90A-NEXT: ;;#ASMSTART 13809; GFX90A-NEXT: ; def s[4:5] 13810; GFX90A-NEXT: ;;#ASMEND 13811; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 13812; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13813; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13814; GFX90A-NEXT: ;;#ASMSTART 13815; GFX90A-NEXT: ; use s[8:9] 13816; GFX90A-NEXT: ;;#ASMEND 13817; GFX90A-NEXT: s_setpc_b64 s[30:31] 13818; 13819; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_0_0_0: 13820; GFX940: ; %bb.0: 13821; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13822; GFX940-NEXT: ;;#ASMSTART 13823; GFX940-NEXT: ; def s[0:1] 13824; GFX940-NEXT: ;;#ASMEND 13825; GFX940-NEXT: s_lshr_b32 s1, s1, 16 13826; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 13827; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 13828; GFX940-NEXT: ;;#ASMSTART 13829; GFX940-NEXT: ; use s[8:9] 13830; GFX940-NEXT: ;;#ASMEND 13831; GFX940-NEXT: s_setpc_b64 s[30:31] 13832 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13833 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 13834 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13835 ret void 13836} 13837 13838define void @s_shuffle_v4i16_v4i16__4_0_0_0() { 13839; GFX900-LABEL: s_shuffle_v4i16_v4i16__4_0_0_0: 13840; GFX900: ; %bb.0: 13841; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13842; GFX900-NEXT: ;;#ASMSTART 13843; GFX900-NEXT: ; def s[4:5] 13844; GFX900-NEXT: ;;#ASMEND 13845; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13846; GFX900-NEXT: s_lshl_b32 s8, s4, 16 13847; GFX900-NEXT: ;;#ASMSTART 13848; GFX900-NEXT: ; use s[8:9] 13849; GFX900-NEXT: ;;#ASMEND 13850; GFX900-NEXT: s_setpc_b64 s[30:31] 13851; 13852; GFX90A-LABEL: s_shuffle_v4i16_v4i16__4_0_0_0: 13853; GFX90A: ; %bb.0: 13854; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13855; GFX90A-NEXT: ;;#ASMSTART 13856; GFX90A-NEXT: ; def s[4:5] 13857; GFX90A-NEXT: ;;#ASMEND 13858; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13859; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 13860; GFX90A-NEXT: ;;#ASMSTART 13861; GFX90A-NEXT: ; use s[8:9] 13862; GFX90A-NEXT: ;;#ASMEND 13863; GFX90A-NEXT: s_setpc_b64 s[30:31] 13864; 13865; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_0_0_0: 13866; GFX940: ; %bb.0: 13867; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13868; GFX940-NEXT: ;;#ASMSTART 13869; GFX940-NEXT: ; def s[0:1] 13870; GFX940-NEXT: ;;#ASMEND 13871; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 13872; GFX940-NEXT: s_lshl_b32 s8, s0, 16 13873; GFX940-NEXT: ;;#ASMSTART 13874; GFX940-NEXT: ; use s[8:9] 13875; GFX940-NEXT: ;;#ASMEND 13876; GFX940-NEXT: s_setpc_b64 s[30:31] 13877 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13878 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 0, i32 0, i32 0> 13879 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13880 ret void 13881} 13882 13883define void @s_shuffle_v4i16_v4i16__5_0_0_0() { 13884; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_0_0_0: 13885; GFX900: ; %bb.0: 13886; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13887; GFX900-NEXT: ;;#ASMSTART 13888; GFX900-NEXT: ; def s[4:5] 13889; GFX900-NEXT: ;;#ASMEND 13890; GFX900-NEXT: ;;#ASMSTART 13891; GFX900-NEXT: ; def s[6:7] 13892; GFX900-NEXT: ;;#ASMEND 13893; GFX900-NEXT: s_lshr_b32 s5, s6, 16 13894; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13895; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13896; GFX900-NEXT: ;;#ASMSTART 13897; GFX900-NEXT: ; use s[8:9] 13898; GFX900-NEXT: ;;#ASMEND 13899; GFX900-NEXT: s_setpc_b64 s[30:31] 13900; 13901; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_0_0_0: 13902; GFX90A: ; %bb.0: 13903; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13904; GFX90A-NEXT: ;;#ASMSTART 13905; GFX90A-NEXT: ; def s[4:5] 13906; GFX90A-NEXT: ;;#ASMEND 13907; GFX90A-NEXT: ;;#ASMSTART 13908; GFX90A-NEXT: ; def s[6:7] 13909; GFX90A-NEXT: ;;#ASMEND 13910; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 13911; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13912; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13913; GFX90A-NEXT: ;;#ASMSTART 13914; GFX90A-NEXT: ; use s[8:9] 13915; GFX90A-NEXT: ;;#ASMEND 13916; GFX90A-NEXT: s_setpc_b64 s[30:31] 13917; 13918; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_0_0_0: 13919; GFX940: ; %bb.0: 13920; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13921; GFX940-NEXT: ;;#ASMSTART 13922; GFX940-NEXT: ; def s[0:1] 13923; GFX940-NEXT: ;;#ASMEND 13924; GFX940-NEXT: ;;#ASMSTART 13925; GFX940-NEXT: ; def s[2:3] 13926; GFX940-NEXT: ;;#ASMEND 13927; GFX940-NEXT: s_lshr_b32 s1, s2, 16 13928; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 13929; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 13930; GFX940-NEXT: ;;#ASMSTART 13931; GFX940-NEXT: ; use s[8:9] 13932; GFX940-NEXT: ;;#ASMEND 13933; GFX940-NEXT: s_setpc_b64 s[30:31] 13934 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13935 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13936 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0> 13937 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13938 ret void 13939} 13940 13941define void @s_shuffle_v4i16_v4i16__6_0_0_0() { 13942; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_0_0_0: 13943; GFX900: ; %bb.0: 13944; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13945; GFX900-NEXT: ;;#ASMSTART 13946; GFX900-NEXT: ; def s[4:5] 13947; GFX900-NEXT: ;;#ASMEND 13948; GFX900-NEXT: ;;#ASMSTART 13949; GFX900-NEXT: ; def s[6:7] 13950; GFX900-NEXT: ;;#ASMEND 13951; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 13952; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13953; GFX900-NEXT: ;;#ASMSTART 13954; GFX900-NEXT: ; use s[8:9] 13955; GFX900-NEXT: ;;#ASMEND 13956; GFX900-NEXT: s_setpc_b64 s[30:31] 13957; 13958; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_0_0_0: 13959; GFX90A: ; %bb.0: 13960; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13961; GFX90A-NEXT: ;;#ASMSTART 13962; GFX90A-NEXT: ; def s[4:5] 13963; GFX90A-NEXT: ;;#ASMEND 13964; GFX90A-NEXT: ;;#ASMSTART 13965; GFX90A-NEXT: ; def s[6:7] 13966; GFX90A-NEXT: ;;#ASMEND 13967; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 13968; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 13969; GFX90A-NEXT: ;;#ASMSTART 13970; GFX90A-NEXT: ; use s[8:9] 13971; GFX90A-NEXT: ;;#ASMEND 13972; GFX90A-NEXT: s_setpc_b64 s[30:31] 13973; 13974; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_0_0_0: 13975; GFX940: ; %bb.0: 13976; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13977; GFX940-NEXT: ;;#ASMSTART 13978; GFX940-NEXT: ; def s[0:1] 13979; GFX940-NEXT: ;;#ASMEND 13980; GFX940-NEXT: ;;#ASMSTART 13981; GFX940-NEXT: ; def s[2:3] 13982; GFX940-NEXT: ;;#ASMEND 13983; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 13984; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 13985; GFX940-NEXT: ;;#ASMSTART 13986; GFX940-NEXT: ; use s[8:9] 13987; GFX940-NEXT: ;;#ASMEND 13988; GFX940-NEXT: s_setpc_b64 s[30:31] 13989 %vec0 = call <4 x i16> asm "; def $0", "=s"() 13990 %vec1 = call <4 x i16> asm "; def $0", "=s"() 13991 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 0, i32 0, i32 0> 13992 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 13993 ret void 13994} 13995 13996define void @s_shuffle_v4i16_v4i16__7_0_0_0() { 13997; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_0_0: 13998; GFX900: ; %bb.0: 13999; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14000; GFX900-NEXT: ;;#ASMSTART 14001; GFX900-NEXT: ; def s[4:5] 14002; GFX900-NEXT: ;;#ASMEND 14003; GFX900-NEXT: ;;#ASMSTART 14004; GFX900-NEXT: ; def s[6:7] 14005; GFX900-NEXT: ;;#ASMEND 14006; GFX900-NEXT: s_lshr_b32 s5, s7, 16 14007; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14008; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14009; GFX900-NEXT: ;;#ASMSTART 14010; GFX900-NEXT: ; use s[8:9] 14011; GFX900-NEXT: ;;#ASMEND 14012; GFX900-NEXT: s_setpc_b64 s[30:31] 14013; 14014; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_0_0: 14015; GFX90A: ; %bb.0: 14016; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14017; GFX90A-NEXT: ;;#ASMSTART 14018; GFX90A-NEXT: ; def s[4:5] 14019; GFX90A-NEXT: ;;#ASMEND 14020; GFX90A-NEXT: ;;#ASMSTART 14021; GFX90A-NEXT: ; def s[6:7] 14022; GFX90A-NEXT: ;;#ASMEND 14023; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 14024; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14025; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14026; GFX90A-NEXT: ;;#ASMSTART 14027; GFX90A-NEXT: ; use s[8:9] 14028; GFX90A-NEXT: ;;#ASMEND 14029; GFX90A-NEXT: s_setpc_b64 s[30:31] 14030; 14031; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_0_0: 14032; GFX940: ; %bb.0: 14033; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14034; GFX940-NEXT: ;;#ASMSTART 14035; GFX940-NEXT: ; def s[0:1] 14036; GFX940-NEXT: ;;#ASMEND 14037; GFX940-NEXT: ;;#ASMSTART 14038; GFX940-NEXT: ; def s[2:3] 14039; GFX940-NEXT: ;;#ASMEND 14040; GFX940-NEXT: s_lshr_b32 s1, s3, 16 14041; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 14042; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 14043; GFX940-NEXT: ;;#ASMSTART 14044; GFX940-NEXT: ; use s[8:9] 14045; GFX940-NEXT: ;;#ASMEND 14046; GFX940-NEXT: s_setpc_b64 s[30:31] 14047 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14048 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14049 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 0, i32 0> 14050 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14051 ret void 14052} 14053 14054define void @s_shuffle_v4i16_v4i16__7_u_0_0() { 14055; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_0_0: 14056; GFX900: ; %bb.0: 14057; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14058; GFX900-NEXT: ;;#ASMSTART 14059; GFX900-NEXT: ; def s[4:5] 14060; GFX900-NEXT: ;;#ASMEND 14061; GFX900-NEXT: ;;#ASMSTART 14062; GFX900-NEXT: ; def s[6:7] 14063; GFX900-NEXT: ;;#ASMEND 14064; GFX900-NEXT: s_lshr_b32 s8, s7, 16 14065; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14066; GFX900-NEXT: ;;#ASMSTART 14067; GFX900-NEXT: ; use s[8:9] 14068; GFX900-NEXT: ;;#ASMEND 14069; GFX900-NEXT: s_setpc_b64 s[30:31] 14070; 14071; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_0_0: 14072; GFX90A: ; %bb.0: 14073; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14074; GFX90A-NEXT: ;;#ASMSTART 14075; GFX90A-NEXT: ; def s[4:5] 14076; GFX90A-NEXT: ;;#ASMEND 14077; GFX90A-NEXT: ;;#ASMSTART 14078; GFX90A-NEXT: ; def s[6:7] 14079; GFX90A-NEXT: ;;#ASMEND 14080; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 14081; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14082; GFX90A-NEXT: ;;#ASMSTART 14083; GFX90A-NEXT: ; use s[8:9] 14084; GFX90A-NEXT: ;;#ASMEND 14085; GFX90A-NEXT: s_setpc_b64 s[30:31] 14086; 14087; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_0_0: 14088; GFX940: ; %bb.0: 14089; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14090; GFX940-NEXT: ;;#ASMSTART 14091; GFX940-NEXT: ; def s[0:1] 14092; GFX940-NEXT: ;;#ASMEND 14093; GFX940-NEXT: ;;#ASMSTART 14094; GFX940-NEXT: ; def s[2:3] 14095; GFX940-NEXT: ;;#ASMEND 14096; GFX940-NEXT: s_lshr_b32 s8, s3, 16 14097; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 14098; GFX940-NEXT: ;;#ASMSTART 14099; GFX940-NEXT: ; use s[8:9] 14100; GFX940-NEXT: ;;#ASMEND 14101; GFX940-NEXT: s_setpc_b64 s[30:31] 14102 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14103 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14104 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 0, i32 0> 14105 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14106 ret void 14107} 14108 14109define void @s_shuffle_v4i16_v4i16__7_1_0_0() { 14110; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_0_0: 14111; GFX900: ; %bb.0: 14112; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14113; GFX900-NEXT: ;;#ASMSTART 14114; GFX900-NEXT: ; def s[4:5] 14115; GFX900-NEXT: ;;#ASMEND 14116; GFX900-NEXT: ;;#ASMSTART 14117; GFX900-NEXT: ; def s[6:7] 14118; GFX900-NEXT: ;;#ASMEND 14119; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 14120; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14121; GFX900-NEXT: ;;#ASMSTART 14122; GFX900-NEXT: ; use s[8:9] 14123; GFX900-NEXT: ;;#ASMEND 14124; GFX900-NEXT: s_setpc_b64 s[30:31] 14125; 14126; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_0_0: 14127; GFX90A: ; %bb.0: 14128; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14129; GFX90A-NEXT: ;;#ASMSTART 14130; GFX90A-NEXT: ; def s[4:5] 14131; GFX90A-NEXT: ;;#ASMEND 14132; GFX90A-NEXT: ;;#ASMSTART 14133; GFX90A-NEXT: ; def s[6:7] 14134; GFX90A-NEXT: ;;#ASMEND 14135; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 14136; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14137; GFX90A-NEXT: ;;#ASMSTART 14138; GFX90A-NEXT: ; use s[8:9] 14139; GFX90A-NEXT: ;;#ASMEND 14140; GFX90A-NEXT: s_setpc_b64 s[30:31] 14141; 14142; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_0_0: 14143; GFX940: ; %bb.0: 14144; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14145; GFX940-NEXT: ;;#ASMSTART 14146; GFX940-NEXT: ; def s[0:1] 14147; GFX940-NEXT: ;;#ASMEND 14148; GFX940-NEXT: ;;#ASMSTART 14149; GFX940-NEXT: ; def s[2:3] 14150; GFX940-NEXT: ;;#ASMEND 14151; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 14152; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 14153; GFX940-NEXT: ;;#ASMSTART 14154; GFX940-NEXT: ; use s[8:9] 14155; GFX940-NEXT: ;;#ASMEND 14156; GFX940-NEXT: s_setpc_b64 s[30:31] 14157 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14158 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14159 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 0, i32 0> 14160 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14161 ret void 14162} 14163 14164define void @s_shuffle_v4i16_v4i16__7_2_0_0() { 14165; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_0_0: 14166; GFX900: ; %bb.0: 14167; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14168; GFX900-NEXT: ;;#ASMSTART 14169; GFX900-NEXT: ; def s[6:7] 14170; GFX900-NEXT: ;;#ASMEND 14171; GFX900-NEXT: s_lshr_b32 s6, s7, 16 14172; GFX900-NEXT: ;;#ASMSTART 14173; GFX900-NEXT: ; def s[4:5] 14174; GFX900-NEXT: ;;#ASMEND 14175; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 14176; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14177; GFX900-NEXT: ;;#ASMSTART 14178; GFX900-NEXT: ; use s[8:9] 14179; GFX900-NEXT: ;;#ASMEND 14180; GFX900-NEXT: s_setpc_b64 s[30:31] 14181; 14182; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_0_0: 14183; GFX90A: ; %bb.0: 14184; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14185; GFX90A-NEXT: ;;#ASMSTART 14186; GFX90A-NEXT: ; def s[6:7] 14187; GFX90A-NEXT: ;;#ASMEND 14188; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 14189; GFX90A-NEXT: ;;#ASMSTART 14190; GFX90A-NEXT: ; def s[4:5] 14191; GFX90A-NEXT: ;;#ASMEND 14192; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 14193; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14194; GFX90A-NEXT: ;;#ASMSTART 14195; GFX90A-NEXT: ; use s[8:9] 14196; GFX90A-NEXT: ;;#ASMEND 14197; GFX90A-NEXT: s_setpc_b64 s[30:31] 14198; 14199; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_0_0: 14200; GFX940: ; %bb.0: 14201; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14202; GFX940-NEXT: ;;#ASMSTART 14203; GFX940-NEXT: ; def s[2:3] 14204; GFX940-NEXT: ;;#ASMEND 14205; GFX940-NEXT: s_lshr_b32 s2, s3, 16 14206; GFX940-NEXT: ;;#ASMSTART 14207; GFX940-NEXT: ; def s[0:1] 14208; GFX940-NEXT: ;;#ASMEND 14209; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 14210; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 14211; GFX940-NEXT: ;;#ASMSTART 14212; GFX940-NEXT: ; use s[8:9] 14213; GFX940-NEXT: ;;#ASMEND 14214; GFX940-NEXT: s_setpc_b64 s[30:31] 14215 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14216 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14217 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 0, i32 0> 14218 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14219 ret void 14220} 14221 14222define void @s_shuffle_v4i16_v4i16__7_3_0_0() { 14223; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_0_0: 14224; GFX900: ; %bb.0: 14225; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14226; GFX900-NEXT: ;;#ASMSTART 14227; GFX900-NEXT: ; def s[4:5] 14228; GFX900-NEXT: ;;#ASMEND 14229; GFX900-NEXT: ;;#ASMSTART 14230; GFX900-NEXT: ; def s[6:7] 14231; GFX900-NEXT: ;;#ASMEND 14232; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 14233; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14234; GFX900-NEXT: ;;#ASMSTART 14235; GFX900-NEXT: ; use s[8:9] 14236; GFX900-NEXT: ;;#ASMEND 14237; GFX900-NEXT: s_setpc_b64 s[30:31] 14238; 14239; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_0_0: 14240; GFX90A: ; %bb.0: 14241; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14242; GFX90A-NEXT: ;;#ASMSTART 14243; GFX90A-NEXT: ; def s[4:5] 14244; GFX90A-NEXT: ;;#ASMEND 14245; GFX90A-NEXT: ;;#ASMSTART 14246; GFX90A-NEXT: ; def s[6:7] 14247; GFX90A-NEXT: ;;#ASMEND 14248; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 14249; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14250; GFX90A-NEXT: ;;#ASMSTART 14251; GFX90A-NEXT: ; use s[8:9] 14252; GFX90A-NEXT: ;;#ASMEND 14253; GFX90A-NEXT: s_setpc_b64 s[30:31] 14254; 14255; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_0_0: 14256; GFX940: ; %bb.0: 14257; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14258; GFX940-NEXT: ;;#ASMSTART 14259; GFX940-NEXT: ; def s[0:1] 14260; GFX940-NEXT: ;;#ASMEND 14261; GFX940-NEXT: ;;#ASMSTART 14262; GFX940-NEXT: ; def s[2:3] 14263; GFX940-NEXT: ;;#ASMEND 14264; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 14265; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 14266; GFX940-NEXT: ;;#ASMSTART 14267; GFX940-NEXT: ; use s[8:9] 14268; GFX940-NEXT: ;;#ASMEND 14269; GFX940-NEXT: s_setpc_b64 s[30:31] 14270 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14271 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14272 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 0, i32 0> 14273 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14274 ret void 14275} 14276 14277define void @s_shuffle_v4i16_v4i16__7_4_0_0() { 14278; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_0_0: 14279; GFX900: ; %bb.0: 14280; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14281; GFX900-NEXT: ;;#ASMSTART 14282; GFX900-NEXT: ; def s[4:5] 14283; GFX900-NEXT: ;;#ASMEND 14284; GFX900-NEXT: ;;#ASMSTART 14285; GFX900-NEXT: ; def s[6:7] 14286; GFX900-NEXT: ;;#ASMEND 14287; GFX900-NEXT: s_lshr_b32 s5, s7, 16 14288; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6 14289; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14290; GFX900-NEXT: ;;#ASMSTART 14291; GFX900-NEXT: ; use s[8:9] 14292; GFX900-NEXT: ;;#ASMEND 14293; GFX900-NEXT: s_setpc_b64 s[30:31] 14294; 14295; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_0_0: 14296; GFX90A: ; %bb.0: 14297; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14298; GFX90A-NEXT: ;;#ASMSTART 14299; GFX90A-NEXT: ; def s[4:5] 14300; GFX90A-NEXT: ;;#ASMEND 14301; GFX90A-NEXT: ;;#ASMSTART 14302; GFX90A-NEXT: ; def s[6:7] 14303; GFX90A-NEXT: ;;#ASMEND 14304; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 14305; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6 14306; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14307; GFX90A-NEXT: ;;#ASMSTART 14308; GFX90A-NEXT: ; use s[8:9] 14309; GFX90A-NEXT: ;;#ASMEND 14310; GFX90A-NEXT: s_setpc_b64 s[30:31] 14311; 14312; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_0_0: 14313; GFX940: ; %bb.0: 14314; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14315; GFX940-NEXT: ;;#ASMSTART 14316; GFX940-NEXT: ; def s[0:1] 14317; GFX940-NEXT: ;;#ASMEND 14318; GFX940-NEXT: ;;#ASMSTART 14319; GFX940-NEXT: ; def s[2:3] 14320; GFX940-NEXT: ;;#ASMEND 14321; GFX940-NEXT: s_lshr_b32 s1, s3, 16 14322; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 14323; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 14324; GFX940-NEXT: ;;#ASMSTART 14325; GFX940-NEXT: ; use s[8:9] 14326; GFX940-NEXT: ;;#ASMEND 14327; GFX940-NEXT: s_setpc_b64 s[30:31] 14328 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14329 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14330 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 0, i32 0> 14331 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14332 ret void 14333} 14334 14335define void @s_shuffle_v4i16_v4i16__7_5_0_0() { 14336; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_0_0: 14337; GFX900: ; %bb.0: 14338; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14339; GFX900-NEXT: ;;#ASMSTART 14340; GFX900-NEXT: ; def s[4:5] 14341; GFX900-NEXT: ;;#ASMEND 14342; GFX900-NEXT: ;;#ASMSTART 14343; GFX900-NEXT: ; def s[6:7] 14344; GFX900-NEXT: ;;#ASMEND 14345; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s6 14346; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14347; GFX900-NEXT: ;;#ASMSTART 14348; GFX900-NEXT: ; use s[8:9] 14349; GFX900-NEXT: ;;#ASMEND 14350; GFX900-NEXT: s_setpc_b64 s[30:31] 14351; 14352; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_0_0: 14353; GFX90A: ; %bb.0: 14354; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14355; GFX90A-NEXT: ;;#ASMSTART 14356; GFX90A-NEXT: ; def s[4:5] 14357; GFX90A-NEXT: ;;#ASMEND 14358; GFX90A-NEXT: ;;#ASMSTART 14359; GFX90A-NEXT: ; def s[6:7] 14360; GFX90A-NEXT: ;;#ASMEND 14361; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s6 14362; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14363; GFX90A-NEXT: ;;#ASMSTART 14364; GFX90A-NEXT: ; use s[8:9] 14365; GFX90A-NEXT: ;;#ASMEND 14366; GFX90A-NEXT: s_setpc_b64 s[30:31] 14367; 14368; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_0_0: 14369; GFX940: ; %bb.0: 14370; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14371; GFX940-NEXT: ;;#ASMSTART 14372; GFX940-NEXT: ; def s[0:1] 14373; GFX940-NEXT: ;;#ASMEND 14374; GFX940-NEXT: ;;#ASMSTART 14375; GFX940-NEXT: ; def s[2:3] 14376; GFX940-NEXT: ;;#ASMEND 14377; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 14378; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 14379; GFX940-NEXT: ;;#ASMSTART 14380; GFX940-NEXT: ; use s[8:9] 14381; GFX940-NEXT: ;;#ASMEND 14382; GFX940-NEXT: s_setpc_b64 s[30:31] 14383 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14384 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14385 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 0, i32 0> 14386 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14387 ret void 14388} 14389 14390define void @s_shuffle_v4i16_v4i16__7_6_0_0() { 14391; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_0_0: 14392; GFX900: ; %bb.0: 14393; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14394; GFX900-NEXT: ;;#ASMSTART 14395; GFX900-NEXT: ; def s[4:5] 14396; GFX900-NEXT: ;;#ASMEND 14397; GFX900-NEXT: ;;#ASMSTART 14398; GFX900-NEXT: ; def s[6:7] 14399; GFX900-NEXT: ;;#ASMEND 14400; GFX900-NEXT: s_lshr_b32 s5, s7, 16 14401; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 14402; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14403; GFX900-NEXT: ;;#ASMSTART 14404; GFX900-NEXT: ; use s[8:9] 14405; GFX900-NEXT: ;;#ASMEND 14406; GFX900-NEXT: s_setpc_b64 s[30:31] 14407; 14408; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_0_0: 14409; GFX90A: ; %bb.0: 14410; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14411; GFX90A-NEXT: ;;#ASMSTART 14412; GFX90A-NEXT: ; def s[4:5] 14413; GFX90A-NEXT: ;;#ASMEND 14414; GFX90A-NEXT: ;;#ASMSTART 14415; GFX90A-NEXT: ; def s[6:7] 14416; GFX90A-NEXT: ;;#ASMEND 14417; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 14418; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 14419; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14420; GFX90A-NEXT: ;;#ASMSTART 14421; GFX90A-NEXT: ; use s[8:9] 14422; GFX90A-NEXT: ;;#ASMEND 14423; GFX90A-NEXT: s_setpc_b64 s[30:31] 14424; 14425; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_0_0: 14426; GFX940: ; %bb.0: 14427; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14428; GFX940-NEXT: ;;#ASMSTART 14429; GFX940-NEXT: ; def s[0:1] 14430; GFX940-NEXT: ;;#ASMEND 14431; GFX940-NEXT: ;;#ASMSTART 14432; GFX940-NEXT: ; def s[2:3] 14433; GFX940-NEXT: ;;#ASMEND 14434; GFX940-NEXT: s_lshr_b32 s1, s3, 16 14435; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 14436; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 14437; GFX940-NEXT: ;;#ASMSTART 14438; GFX940-NEXT: ; use s[8:9] 14439; GFX940-NEXT: ;;#ASMEND 14440; GFX940-NEXT: s_setpc_b64 s[30:31] 14441 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14442 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14443 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 0, i32 0> 14444 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14445 ret void 14446} 14447 14448define void @s_shuffle_v4i16_v4i16__7_7_0_0() { 14449; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_0: 14450; GFX900: ; %bb.0: 14451; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14452; GFX900-NEXT: ;;#ASMSTART 14453; GFX900-NEXT: ; def s[4:5] 14454; GFX900-NEXT: ;;#ASMEND 14455; GFX900-NEXT: ;;#ASMSTART 14456; GFX900-NEXT: ; def s[6:7] 14457; GFX900-NEXT: ;;#ASMEND 14458; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14459; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14460; GFX900-NEXT: ;;#ASMSTART 14461; GFX900-NEXT: ; use s[8:9] 14462; GFX900-NEXT: ;;#ASMEND 14463; GFX900-NEXT: s_setpc_b64 s[30:31] 14464; 14465; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_0: 14466; GFX90A: ; %bb.0: 14467; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14468; GFX90A-NEXT: ;;#ASMSTART 14469; GFX90A-NEXT: ; def s[4:5] 14470; GFX90A-NEXT: ;;#ASMEND 14471; GFX90A-NEXT: ;;#ASMSTART 14472; GFX90A-NEXT: ; def s[6:7] 14473; GFX90A-NEXT: ;;#ASMEND 14474; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 14475; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14476; GFX90A-NEXT: ;;#ASMSTART 14477; GFX90A-NEXT: ; use s[8:9] 14478; GFX90A-NEXT: ;;#ASMEND 14479; GFX90A-NEXT: s_setpc_b64 s[30:31] 14480; 14481; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_0: 14482; GFX940: ; %bb.0: 14483; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14484; GFX940-NEXT: ;;#ASMSTART 14485; GFX940-NEXT: ; def s[0:1] 14486; GFX940-NEXT: ;;#ASMEND 14487; GFX940-NEXT: ;;#ASMSTART 14488; GFX940-NEXT: ; def s[2:3] 14489; GFX940-NEXT: ;;#ASMEND 14490; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 14491; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 14492; GFX940-NEXT: ;;#ASMSTART 14493; GFX940-NEXT: ; use s[8:9] 14494; GFX940-NEXT: ;;#ASMEND 14495; GFX940-NEXT: s_setpc_b64 s[30:31] 14496 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14497 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14498 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 0> 14499 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14500 ret void 14501} 14502 14503define void @s_shuffle_v4i16_v4i16__7_7_u_0() { 14504; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_0: 14505; GFX900: ; %bb.0: 14506; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14507; GFX900-NEXT: ;;#ASMSTART 14508; GFX900-NEXT: ; def s[4:5] 14509; GFX900-NEXT: ;;#ASMEND 14510; GFX900-NEXT: ;;#ASMSTART 14511; GFX900-NEXT: ; def s[6:7] 14512; GFX900-NEXT: ;;#ASMEND 14513; GFX900-NEXT: s_lshl_b32 s9, s4, 16 14514; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14515; GFX900-NEXT: ;;#ASMSTART 14516; GFX900-NEXT: ; use s[8:9] 14517; GFX900-NEXT: ;;#ASMEND 14518; GFX900-NEXT: s_setpc_b64 s[30:31] 14519; 14520; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_0: 14521; GFX90A: ; %bb.0: 14522; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14523; GFX90A-NEXT: ;;#ASMSTART 14524; GFX90A-NEXT: ; def s[4:5] 14525; GFX90A-NEXT: ;;#ASMEND 14526; GFX90A-NEXT: ;;#ASMSTART 14527; GFX90A-NEXT: ; def s[6:7] 14528; GFX90A-NEXT: ;;#ASMEND 14529; GFX90A-NEXT: s_lshl_b32 s9, s4, 16 14530; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14531; GFX90A-NEXT: ;;#ASMSTART 14532; GFX90A-NEXT: ; use s[8:9] 14533; GFX90A-NEXT: ;;#ASMEND 14534; GFX90A-NEXT: s_setpc_b64 s[30:31] 14535; 14536; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_0: 14537; GFX940: ; %bb.0: 14538; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14539; GFX940-NEXT: ;;#ASMSTART 14540; GFX940-NEXT: ; def s[0:1] 14541; GFX940-NEXT: ;;#ASMEND 14542; GFX940-NEXT: ;;#ASMSTART 14543; GFX940-NEXT: ; def s[2:3] 14544; GFX940-NEXT: ;;#ASMEND 14545; GFX940-NEXT: s_lshl_b32 s9, s0, 16 14546; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 14547; GFX940-NEXT: ;;#ASMSTART 14548; GFX940-NEXT: ; use s[8:9] 14549; GFX940-NEXT: ;;#ASMEND 14550; GFX940-NEXT: s_setpc_b64 s[30:31] 14551 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14552 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14553 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 0> 14554 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14555 ret void 14556} 14557 14558define void @s_shuffle_v4i16_v4i16__7_7_1_0() { 14559; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_0: 14560; GFX900: ; %bb.0: 14561; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14562; GFX900-NEXT: ;;#ASMSTART 14563; GFX900-NEXT: ; def s[4:5] 14564; GFX900-NEXT: ;;#ASMEND 14565; GFX900-NEXT: s_lshr_b32 s5, s4, 16 14566; GFX900-NEXT: ;;#ASMSTART 14567; GFX900-NEXT: ; def s[6:7] 14568; GFX900-NEXT: ;;#ASMEND 14569; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4 14570; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14571; GFX900-NEXT: ;;#ASMSTART 14572; GFX900-NEXT: ; use s[8:9] 14573; GFX900-NEXT: ;;#ASMEND 14574; GFX900-NEXT: s_setpc_b64 s[30:31] 14575; 14576; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_0: 14577; GFX90A: ; %bb.0: 14578; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14579; GFX90A-NEXT: ;;#ASMSTART 14580; GFX90A-NEXT: ; def s[4:5] 14581; GFX90A-NEXT: ;;#ASMEND 14582; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 14583; GFX90A-NEXT: ;;#ASMSTART 14584; GFX90A-NEXT: ; def s[6:7] 14585; GFX90A-NEXT: ;;#ASMEND 14586; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4 14587; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14588; GFX90A-NEXT: ;;#ASMSTART 14589; GFX90A-NEXT: ; use s[8:9] 14590; GFX90A-NEXT: ;;#ASMEND 14591; GFX90A-NEXT: s_setpc_b64 s[30:31] 14592; 14593; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_0: 14594; GFX940: ; %bb.0: 14595; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14596; GFX940-NEXT: ;;#ASMSTART 14597; GFX940-NEXT: ; def s[0:1] 14598; GFX940-NEXT: ;;#ASMEND 14599; GFX940-NEXT: s_lshr_b32 s1, s0, 16 14600; GFX940-NEXT: ;;#ASMSTART 14601; GFX940-NEXT: ; def s[2:3] 14602; GFX940-NEXT: ;;#ASMEND 14603; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 14604; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 14605; GFX940-NEXT: ;;#ASMSTART 14606; GFX940-NEXT: ; use s[8:9] 14607; GFX940-NEXT: ;;#ASMEND 14608; GFX940-NEXT: s_setpc_b64 s[30:31] 14609 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14610 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14611 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 0> 14612 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14613 ret void 14614} 14615 14616define void @s_shuffle_v4i16_v4i16__7_7_2_0() { 14617; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_0: 14618; GFX900: ; %bb.0: 14619; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14620; GFX900-NEXT: ;;#ASMSTART 14621; GFX900-NEXT: ; def s[4:5] 14622; GFX900-NEXT: ;;#ASMEND 14623; GFX900-NEXT: ;;#ASMSTART 14624; GFX900-NEXT: ; def s[6:7] 14625; GFX900-NEXT: ;;#ASMEND 14626; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4 14627; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14628; GFX900-NEXT: ;;#ASMSTART 14629; GFX900-NEXT: ; use s[8:9] 14630; GFX900-NEXT: ;;#ASMEND 14631; GFX900-NEXT: s_setpc_b64 s[30:31] 14632; 14633; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_0: 14634; GFX90A: ; %bb.0: 14635; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14636; GFX90A-NEXT: ;;#ASMSTART 14637; GFX90A-NEXT: ; def s[4:5] 14638; GFX90A-NEXT: ;;#ASMEND 14639; GFX90A-NEXT: ;;#ASMSTART 14640; GFX90A-NEXT: ; def s[6:7] 14641; GFX90A-NEXT: ;;#ASMEND 14642; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4 14643; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14644; GFX90A-NEXT: ;;#ASMSTART 14645; GFX90A-NEXT: ; use s[8:9] 14646; GFX90A-NEXT: ;;#ASMEND 14647; GFX90A-NEXT: s_setpc_b64 s[30:31] 14648; 14649; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_0: 14650; GFX940: ; %bb.0: 14651; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14652; GFX940-NEXT: ;;#ASMSTART 14653; GFX940-NEXT: ; def s[0:1] 14654; GFX940-NEXT: ;;#ASMEND 14655; GFX940-NEXT: ;;#ASMSTART 14656; GFX940-NEXT: ; def s[2:3] 14657; GFX940-NEXT: ;;#ASMEND 14658; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 14659; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 14660; GFX940-NEXT: ;;#ASMSTART 14661; GFX940-NEXT: ; use s[8:9] 14662; GFX940-NEXT: ;;#ASMEND 14663; GFX940-NEXT: s_setpc_b64 s[30:31] 14664 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14665 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14666 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 0> 14667 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14668 ret void 14669} 14670 14671define void @s_shuffle_v4i16_v4i16__7_7_3_0() { 14672; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_0: 14673; GFX900: ; %bb.0: 14674; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14675; GFX900-NEXT: ;;#ASMSTART 14676; GFX900-NEXT: ; def s[4:5] 14677; GFX900-NEXT: ;;#ASMEND 14678; GFX900-NEXT: s_lshr_b32 s5, s5, 16 14679; GFX900-NEXT: ;;#ASMSTART 14680; GFX900-NEXT: ; def s[6:7] 14681; GFX900-NEXT: ;;#ASMEND 14682; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4 14683; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14684; GFX900-NEXT: ;;#ASMSTART 14685; GFX900-NEXT: ; use s[8:9] 14686; GFX900-NEXT: ;;#ASMEND 14687; GFX900-NEXT: s_setpc_b64 s[30:31] 14688; 14689; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_0: 14690; GFX90A: ; %bb.0: 14691; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14692; GFX90A-NEXT: ;;#ASMSTART 14693; GFX90A-NEXT: ; def s[4:5] 14694; GFX90A-NEXT: ;;#ASMEND 14695; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 14696; GFX90A-NEXT: ;;#ASMSTART 14697; GFX90A-NEXT: ; def s[6:7] 14698; GFX90A-NEXT: ;;#ASMEND 14699; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4 14700; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14701; GFX90A-NEXT: ;;#ASMSTART 14702; GFX90A-NEXT: ; use s[8:9] 14703; GFX90A-NEXT: ;;#ASMEND 14704; GFX90A-NEXT: s_setpc_b64 s[30:31] 14705; 14706; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_0: 14707; GFX940: ; %bb.0: 14708; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14709; GFX940-NEXT: ;;#ASMSTART 14710; GFX940-NEXT: ; def s[0:1] 14711; GFX940-NEXT: ;;#ASMEND 14712; GFX940-NEXT: s_lshr_b32 s1, s1, 16 14713; GFX940-NEXT: ;;#ASMSTART 14714; GFX940-NEXT: ; def s[2:3] 14715; GFX940-NEXT: ;;#ASMEND 14716; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 14717; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 14718; GFX940-NEXT: ;;#ASMSTART 14719; GFX940-NEXT: ; use s[8:9] 14720; GFX940-NEXT: ;;#ASMEND 14721; GFX940-NEXT: s_setpc_b64 s[30:31] 14722 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14723 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14724 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 0> 14725 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14726 ret void 14727} 14728 14729define void @s_shuffle_v4i16_v4i16__7_7_4_0() { 14730; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_0: 14731; GFX900: ; %bb.0: 14732; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14733; GFX900-NEXT: ;;#ASMSTART 14734; GFX900-NEXT: ; def s[4:5] 14735; GFX900-NEXT: ;;#ASMEND 14736; GFX900-NEXT: ;;#ASMSTART 14737; GFX900-NEXT: ; def s[6:7] 14738; GFX900-NEXT: ;;#ASMEND 14739; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s4 14740; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14741; GFX900-NEXT: ;;#ASMSTART 14742; GFX900-NEXT: ; use s[8:9] 14743; GFX900-NEXT: ;;#ASMEND 14744; GFX900-NEXT: s_setpc_b64 s[30:31] 14745; 14746; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_0: 14747; GFX90A: ; %bb.0: 14748; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14749; GFX90A-NEXT: ;;#ASMSTART 14750; GFX90A-NEXT: ; def s[4:5] 14751; GFX90A-NEXT: ;;#ASMEND 14752; GFX90A-NEXT: ;;#ASMSTART 14753; GFX90A-NEXT: ; def s[6:7] 14754; GFX90A-NEXT: ;;#ASMEND 14755; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s4 14756; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14757; GFX90A-NEXT: ;;#ASMSTART 14758; GFX90A-NEXT: ; use s[8:9] 14759; GFX90A-NEXT: ;;#ASMEND 14760; GFX90A-NEXT: s_setpc_b64 s[30:31] 14761; 14762; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_0: 14763; GFX940: ; %bb.0: 14764; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14765; GFX940-NEXT: ;;#ASMSTART 14766; GFX940-NEXT: ; def s[0:1] 14767; GFX940-NEXT: ;;#ASMEND 14768; GFX940-NEXT: ;;#ASMSTART 14769; GFX940-NEXT: ; def s[2:3] 14770; GFX940-NEXT: ;;#ASMEND 14771; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 14772; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 14773; GFX940-NEXT: ;;#ASMSTART 14774; GFX940-NEXT: ; use s[8:9] 14775; GFX940-NEXT: ;;#ASMEND 14776; GFX940-NEXT: s_setpc_b64 s[30:31] 14777 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14778 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14779 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 0> 14780 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14781 ret void 14782} 14783 14784define void @s_shuffle_v4i16_v4i16__7_7_5_0() { 14785; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_0: 14786; GFX900: ; %bb.0: 14787; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14788; GFX900-NEXT: ;;#ASMSTART 14789; GFX900-NEXT: ; def s[4:5] 14790; GFX900-NEXT: ;;#ASMEND 14791; GFX900-NEXT: ;;#ASMSTART 14792; GFX900-NEXT: ; def s[6:7] 14793; GFX900-NEXT: ;;#ASMEND 14794; GFX900-NEXT: s_lshr_b32 s5, s6, 16 14795; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4 14796; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14797; GFX900-NEXT: ;;#ASMSTART 14798; GFX900-NEXT: ; use s[8:9] 14799; GFX900-NEXT: ;;#ASMEND 14800; GFX900-NEXT: s_setpc_b64 s[30:31] 14801; 14802; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_0: 14803; GFX90A: ; %bb.0: 14804; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14805; GFX90A-NEXT: ;;#ASMSTART 14806; GFX90A-NEXT: ; def s[4:5] 14807; GFX90A-NEXT: ;;#ASMEND 14808; GFX90A-NEXT: ;;#ASMSTART 14809; GFX90A-NEXT: ; def s[6:7] 14810; GFX90A-NEXT: ;;#ASMEND 14811; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 14812; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4 14813; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14814; GFX90A-NEXT: ;;#ASMSTART 14815; GFX90A-NEXT: ; use s[8:9] 14816; GFX90A-NEXT: ;;#ASMEND 14817; GFX90A-NEXT: s_setpc_b64 s[30:31] 14818; 14819; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_0: 14820; GFX940: ; %bb.0: 14821; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14822; GFX940-NEXT: ;;#ASMSTART 14823; GFX940-NEXT: ; def s[0:1] 14824; GFX940-NEXT: ;;#ASMEND 14825; GFX940-NEXT: ;;#ASMSTART 14826; GFX940-NEXT: ; def s[2:3] 14827; GFX940-NEXT: ;;#ASMEND 14828; GFX940-NEXT: s_lshr_b32 s1, s2, 16 14829; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 14830; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 14831; GFX940-NEXT: ;;#ASMSTART 14832; GFX940-NEXT: ; use s[8:9] 14833; GFX940-NEXT: ;;#ASMEND 14834; GFX940-NEXT: s_setpc_b64 s[30:31] 14835 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14836 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14837 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 0> 14838 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14839 ret void 14840} 14841 14842define void @s_shuffle_v4i16_v4i16__7_7_6_0() { 14843; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_0: 14844; GFX900: ; %bb.0: 14845; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14846; GFX900-NEXT: ;;#ASMSTART 14847; GFX900-NEXT: ; def s[4:5] 14848; GFX900-NEXT: ;;#ASMEND 14849; GFX900-NEXT: ;;#ASMSTART 14850; GFX900-NEXT: ; def s[6:7] 14851; GFX900-NEXT: ;;#ASMEND 14852; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s4 14853; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14854; GFX900-NEXT: ;;#ASMSTART 14855; GFX900-NEXT: ; use s[8:9] 14856; GFX900-NEXT: ;;#ASMEND 14857; GFX900-NEXT: s_setpc_b64 s[30:31] 14858; 14859; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_0: 14860; GFX90A: ; %bb.0: 14861; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14862; GFX90A-NEXT: ;;#ASMSTART 14863; GFX90A-NEXT: ; def s[4:5] 14864; GFX90A-NEXT: ;;#ASMEND 14865; GFX90A-NEXT: ;;#ASMSTART 14866; GFX90A-NEXT: ; def s[6:7] 14867; GFX90A-NEXT: ;;#ASMEND 14868; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s4 14869; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 14870; GFX90A-NEXT: ;;#ASMSTART 14871; GFX90A-NEXT: ; use s[8:9] 14872; GFX90A-NEXT: ;;#ASMEND 14873; GFX90A-NEXT: s_setpc_b64 s[30:31] 14874; 14875; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_0: 14876; GFX940: ; %bb.0: 14877; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14878; GFX940-NEXT: ;;#ASMSTART 14879; GFX940-NEXT: ; def s[0:1] 14880; GFX940-NEXT: ;;#ASMEND 14881; GFX940-NEXT: ;;#ASMSTART 14882; GFX940-NEXT: ; def s[2:3] 14883; GFX940-NEXT: ;;#ASMEND 14884; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s0 14885; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 14886; GFX940-NEXT: ;;#ASMSTART 14887; GFX940-NEXT: ; use s[8:9] 14888; GFX940-NEXT: ;;#ASMEND 14889; GFX940-NEXT: s_setpc_b64 s[30:31] 14890 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14891 %vec1 = call <4 x i16> asm "; def $0", "=s"() 14892 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0> 14893 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14894 ret void 14895} 14896 14897define void @s_shuffle_v4i16_v4i16__u_1_1_1() { 14898; GFX9-LABEL: s_shuffle_v4i16_v4i16__u_1_1_1: 14899; GFX9: ; %bb.0: 14900; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14901; GFX9-NEXT: ;;#ASMSTART 14902; GFX9-NEXT: ; def s[8:9] 14903; GFX9-NEXT: ;;#ASMEND 14904; GFX9-NEXT: s_pack_hh_b32_b16 s9, s8, s8 14905; GFX9-NEXT: ;;#ASMSTART 14906; GFX9-NEXT: ; use s[8:9] 14907; GFX9-NEXT: ;;#ASMEND 14908; GFX9-NEXT: s_setpc_b64 s[30:31] 14909 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14910 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1> 14911 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14912 ret void 14913} 14914 14915define void @s_shuffle_v4i16_v4i16__0_1_1_1() { 14916; GFX9-LABEL: s_shuffle_v4i16_v4i16__0_1_1_1: 14917; GFX9: ; %bb.0: 14918; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14919; GFX9-NEXT: ;;#ASMSTART 14920; GFX9-NEXT: ; def s[8:9] 14921; GFX9-NEXT: ;;#ASMEND 14922; GFX9-NEXT: s_pack_hh_b32_b16 s9, s8, s8 14923; GFX9-NEXT: ;;#ASMSTART 14924; GFX9-NEXT: ; use s[8:9] 14925; GFX9-NEXT: ;;#ASMEND 14926; GFX9-NEXT: s_setpc_b64 s[30:31] 14927 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14928 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> 14929 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14930 ret void 14931} 14932 14933define void @s_shuffle_v4i16_v4i16__1_1_1_1() { 14934; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_1_1_1: 14935; GFX900: ; %bb.0: 14936; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14937; GFX900-NEXT: ;;#ASMSTART 14938; GFX900-NEXT: ; def s[4:5] 14939; GFX900-NEXT: ;;#ASMEND 14940; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4 14941; GFX900-NEXT: s_mov_b32 s9, s8 14942; GFX900-NEXT: ;;#ASMSTART 14943; GFX900-NEXT: ; use s[8:9] 14944; GFX900-NEXT: ;;#ASMEND 14945; GFX900-NEXT: s_setpc_b64 s[30:31] 14946; 14947; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_1_1_1: 14948; GFX90A: ; %bb.0: 14949; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14950; GFX90A-NEXT: ;;#ASMSTART 14951; GFX90A-NEXT: ; def s[4:5] 14952; GFX90A-NEXT: ;;#ASMEND 14953; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4 14954; GFX90A-NEXT: s_mov_b32 s9, s8 14955; GFX90A-NEXT: ;;#ASMSTART 14956; GFX90A-NEXT: ; use s[8:9] 14957; GFX90A-NEXT: ;;#ASMEND 14958; GFX90A-NEXT: s_setpc_b64 s[30:31] 14959; 14960; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_1_1_1: 14961; GFX940: ; %bb.0: 14962; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14963; GFX940-NEXT: ;;#ASMSTART 14964; GFX940-NEXT: ; def s[0:1] 14965; GFX940-NEXT: ;;#ASMEND 14966; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 14967; GFX940-NEXT: s_mov_b32 s9, s8 14968; GFX940-NEXT: ;;#ASMSTART 14969; GFX940-NEXT: ; use s[8:9] 14970; GFX940-NEXT: ;;#ASMEND 14971; GFX940-NEXT: s_setpc_b64 s[30:31] 14972 %vec0 = call <4 x i16> asm "; def $0", "=s"() 14973 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 14974 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 14975 ret void 14976} 14977 14978define void @s_shuffle_v4i16_v4i16__2_1_1_1() { 14979; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_1_1_1: 14980; GFX900: ; %bb.0: 14981; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14982; GFX900-NEXT: ;;#ASMSTART 14983; GFX900-NEXT: ; def s[4:5] 14984; GFX900-NEXT: ;;#ASMEND 14985; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 14986; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 14987; GFX900-NEXT: ;;#ASMSTART 14988; GFX900-NEXT: ; use s[8:9] 14989; GFX900-NEXT: ;;#ASMEND 14990; GFX900-NEXT: s_setpc_b64 s[30:31] 14991; 14992; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_1_1_1: 14993; GFX90A: ; %bb.0: 14994; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14995; GFX90A-NEXT: ;;#ASMSTART 14996; GFX90A-NEXT: ; def s[4:5] 14997; GFX90A-NEXT: ;;#ASMEND 14998; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 14999; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15000; GFX90A-NEXT: ;;#ASMSTART 15001; GFX90A-NEXT: ; use s[8:9] 15002; GFX90A-NEXT: ;;#ASMEND 15003; GFX90A-NEXT: s_setpc_b64 s[30:31] 15004; 15005; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_1_1_1: 15006; GFX940: ; %bb.0: 15007; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15008; GFX940-NEXT: ;;#ASMSTART 15009; GFX940-NEXT: ; def s[0:1] 15010; GFX940-NEXT: ;;#ASMEND 15011; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 15012; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15013; GFX940-NEXT: ;;#ASMSTART 15014; GFX940-NEXT: ; use s[8:9] 15015; GFX940-NEXT: ;;#ASMEND 15016; GFX940-NEXT: s_setpc_b64 s[30:31] 15017 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15018 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 15019 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15020 ret void 15021} 15022 15023define void @s_shuffle_v4i16_v4i16__3_1_1_1() { 15024; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_1_1_1: 15025; GFX900: ; %bb.0: 15026; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15027; GFX900-NEXT: ;;#ASMSTART 15028; GFX900-NEXT: ; def s[4:5] 15029; GFX900-NEXT: ;;#ASMEND 15030; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 15031; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15032; GFX900-NEXT: ;;#ASMSTART 15033; GFX900-NEXT: ; use s[8:9] 15034; GFX900-NEXT: ;;#ASMEND 15035; GFX900-NEXT: s_setpc_b64 s[30:31] 15036; 15037; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_1_1_1: 15038; GFX90A: ; %bb.0: 15039; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15040; GFX90A-NEXT: ;;#ASMSTART 15041; GFX90A-NEXT: ; def s[4:5] 15042; GFX90A-NEXT: ;;#ASMEND 15043; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 15044; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15045; GFX90A-NEXT: ;;#ASMSTART 15046; GFX90A-NEXT: ; use s[8:9] 15047; GFX90A-NEXT: ;;#ASMEND 15048; GFX90A-NEXT: s_setpc_b64 s[30:31] 15049; 15050; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_1_1_1: 15051; GFX940: ; %bb.0: 15052; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15053; GFX940-NEXT: ;;#ASMSTART 15054; GFX940-NEXT: ; def s[0:1] 15055; GFX940-NEXT: ;;#ASMEND 15056; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 15057; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15058; GFX940-NEXT: ;;#ASMSTART 15059; GFX940-NEXT: ; use s[8:9] 15060; GFX940-NEXT: ;;#ASMEND 15061; GFX940-NEXT: s_setpc_b64 s[30:31] 15062 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15063 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 15064 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15065 ret void 15066} 15067 15068define void @s_shuffle_v4i16_v4i16__4_1_1_1() { 15069; GFX9-LABEL: s_shuffle_v4i16_v4i16__4_1_1_1: 15070; GFX9: ; %bb.0: 15071; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15072; GFX9-NEXT: ;;#ASMSTART 15073; GFX9-NEXT: ; def s[8:9] 15074; GFX9-NEXT: ;;#ASMEND 15075; GFX9-NEXT: s_pack_hh_b32_b16 s9, s8, s8 15076; GFX9-NEXT: ;;#ASMSTART 15077; GFX9-NEXT: ; use s[8:9] 15078; GFX9-NEXT: ;;#ASMEND 15079; GFX9-NEXT: s_setpc_b64 s[30:31] 15080 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15081 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1> 15082 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15083 ret void 15084} 15085 15086define void @s_shuffle_v4i16_v4i16__5_1_1_1() { 15087; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_1_1_1: 15088; GFX900: ; %bb.0: 15089; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15090; GFX900-NEXT: ;;#ASMSTART 15091; GFX900-NEXT: ; def s[4:5] 15092; GFX900-NEXT: ;;#ASMEND 15093; GFX900-NEXT: ;;#ASMSTART 15094; GFX900-NEXT: ; def s[6:7] 15095; GFX900-NEXT: ;;#ASMEND 15096; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s4 15097; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15098; GFX900-NEXT: ;;#ASMSTART 15099; GFX900-NEXT: ; use s[8:9] 15100; GFX900-NEXT: ;;#ASMEND 15101; GFX900-NEXT: s_setpc_b64 s[30:31] 15102; 15103; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_1_1_1: 15104; GFX90A: ; %bb.0: 15105; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15106; GFX90A-NEXT: ;;#ASMSTART 15107; GFX90A-NEXT: ; def s[4:5] 15108; GFX90A-NEXT: ;;#ASMEND 15109; GFX90A-NEXT: ;;#ASMSTART 15110; GFX90A-NEXT: ; def s[6:7] 15111; GFX90A-NEXT: ;;#ASMEND 15112; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s4 15113; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15114; GFX90A-NEXT: ;;#ASMSTART 15115; GFX90A-NEXT: ; use s[8:9] 15116; GFX90A-NEXT: ;;#ASMEND 15117; GFX90A-NEXT: s_setpc_b64 s[30:31] 15118; 15119; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_1_1_1: 15120; GFX940: ; %bb.0: 15121; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15122; GFX940-NEXT: ;;#ASMSTART 15123; GFX940-NEXT: ; def s[0:1] 15124; GFX940-NEXT: ;;#ASMEND 15125; GFX940-NEXT: ;;#ASMSTART 15126; GFX940-NEXT: ; def s[2:3] 15127; GFX940-NEXT: ;;#ASMEND 15128; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 15129; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15130; GFX940-NEXT: ;;#ASMSTART 15131; GFX940-NEXT: ; use s[8:9] 15132; GFX940-NEXT: ;;#ASMEND 15133; GFX940-NEXT: s_setpc_b64 s[30:31] 15134 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15135 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15136 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1> 15137 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15138 ret void 15139} 15140 15141define void @s_shuffle_v4i16_v4i16__6_1_1_1() { 15142; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_1_1_1: 15143; GFX900: ; %bb.0: 15144; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15145; GFX900-NEXT: ;;#ASMSTART 15146; GFX900-NEXT: ; def s[4:5] 15147; GFX900-NEXT: ;;#ASMEND 15148; GFX900-NEXT: ;;#ASMSTART 15149; GFX900-NEXT: ; def s[6:7] 15150; GFX900-NEXT: ;;#ASMEND 15151; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 15152; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15153; GFX900-NEXT: ;;#ASMSTART 15154; GFX900-NEXT: ; use s[8:9] 15155; GFX900-NEXT: ;;#ASMEND 15156; GFX900-NEXT: s_setpc_b64 s[30:31] 15157; 15158; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_1_1_1: 15159; GFX90A: ; %bb.0: 15160; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15161; GFX90A-NEXT: ;;#ASMSTART 15162; GFX90A-NEXT: ; def s[4:5] 15163; GFX90A-NEXT: ;;#ASMEND 15164; GFX90A-NEXT: ;;#ASMSTART 15165; GFX90A-NEXT: ; def s[6:7] 15166; GFX90A-NEXT: ;;#ASMEND 15167; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 15168; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15169; GFX90A-NEXT: ;;#ASMSTART 15170; GFX90A-NEXT: ; use s[8:9] 15171; GFX90A-NEXT: ;;#ASMEND 15172; GFX90A-NEXT: s_setpc_b64 s[30:31] 15173; 15174; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_1_1_1: 15175; GFX940: ; %bb.0: 15176; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15177; GFX940-NEXT: ;;#ASMSTART 15178; GFX940-NEXT: ; def s[0:1] 15179; GFX940-NEXT: ;;#ASMEND 15180; GFX940-NEXT: ;;#ASMSTART 15181; GFX940-NEXT: ; def s[2:3] 15182; GFX940-NEXT: ;;#ASMEND 15183; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 15184; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15185; GFX940-NEXT: ;;#ASMSTART 15186; GFX940-NEXT: ; use s[8:9] 15187; GFX940-NEXT: ;;#ASMEND 15188; GFX940-NEXT: s_setpc_b64 s[30:31] 15189 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15190 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15191 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1> 15192 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15193 ret void 15194} 15195 15196define void @s_shuffle_v4i16_v4i16__7_1_1_1() { 15197; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_1_1: 15198; GFX900: ; %bb.0: 15199; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15200; GFX900-NEXT: ;;#ASMSTART 15201; GFX900-NEXT: ; def s[4:5] 15202; GFX900-NEXT: ;;#ASMEND 15203; GFX900-NEXT: ;;#ASMSTART 15204; GFX900-NEXT: ; def s[6:7] 15205; GFX900-NEXT: ;;#ASMEND 15206; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 15207; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15208; GFX900-NEXT: ;;#ASMSTART 15209; GFX900-NEXT: ; use s[8:9] 15210; GFX900-NEXT: ;;#ASMEND 15211; GFX900-NEXT: s_setpc_b64 s[30:31] 15212; 15213; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_1_1: 15214; GFX90A: ; %bb.0: 15215; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15216; GFX90A-NEXT: ;;#ASMSTART 15217; GFX90A-NEXT: ; def s[4:5] 15218; GFX90A-NEXT: ;;#ASMEND 15219; GFX90A-NEXT: ;;#ASMSTART 15220; GFX90A-NEXT: ; def s[6:7] 15221; GFX90A-NEXT: ;;#ASMEND 15222; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 15223; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15224; GFX90A-NEXT: ;;#ASMSTART 15225; GFX90A-NEXT: ; use s[8:9] 15226; GFX90A-NEXT: ;;#ASMEND 15227; GFX90A-NEXT: s_setpc_b64 s[30:31] 15228; 15229; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_1_1: 15230; GFX940: ; %bb.0: 15231; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15232; GFX940-NEXT: ;;#ASMSTART 15233; GFX940-NEXT: ; def s[0:1] 15234; GFX940-NEXT: ;;#ASMEND 15235; GFX940-NEXT: ;;#ASMSTART 15236; GFX940-NEXT: ; def s[2:3] 15237; GFX940-NEXT: ;;#ASMEND 15238; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 15239; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15240; GFX940-NEXT: ;;#ASMSTART 15241; GFX940-NEXT: ; use s[8:9] 15242; GFX940-NEXT: ;;#ASMEND 15243; GFX940-NEXT: s_setpc_b64 s[30:31] 15244 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15245 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15246 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1> 15247 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15248 ret void 15249} 15250 15251define void @s_shuffle_v4i16_v4i16__7_u_1_1() { 15252; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_1_1: 15253; GFX900: ; %bb.0: 15254; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15255; GFX900-NEXT: ;;#ASMSTART 15256; GFX900-NEXT: ; def s[4:5] 15257; GFX900-NEXT: ;;#ASMEND 15258; GFX900-NEXT: ;;#ASMSTART 15259; GFX900-NEXT: ; def s[6:7] 15260; GFX900-NEXT: ;;#ASMEND 15261; GFX900-NEXT: s_lshr_b32 s8, s7, 16 15262; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15263; GFX900-NEXT: ;;#ASMSTART 15264; GFX900-NEXT: ; use s[8:9] 15265; GFX900-NEXT: ;;#ASMEND 15266; GFX900-NEXT: s_setpc_b64 s[30:31] 15267; 15268; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_1_1: 15269; GFX90A: ; %bb.0: 15270; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15271; GFX90A-NEXT: ;;#ASMSTART 15272; GFX90A-NEXT: ; def s[4:5] 15273; GFX90A-NEXT: ;;#ASMEND 15274; GFX90A-NEXT: ;;#ASMSTART 15275; GFX90A-NEXT: ; def s[6:7] 15276; GFX90A-NEXT: ;;#ASMEND 15277; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 15278; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15279; GFX90A-NEXT: ;;#ASMSTART 15280; GFX90A-NEXT: ; use s[8:9] 15281; GFX90A-NEXT: ;;#ASMEND 15282; GFX90A-NEXT: s_setpc_b64 s[30:31] 15283; 15284; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_1_1: 15285; GFX940: ; %bb.0: 15286; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15287; GFX940-NEXT: ;;#ASMSTART 15288; GFX940-NEXT: ; def s[0:1] 15289; GFX940-NEXT: ;;#ASMEND 15290; GFX940-NEXT: ;;#ASMSTART 15291; GFX940-NEXT: ; def s[2:3] 15292; GFX940-NEXT: ;;#ASMEND 15293; GFX940-NEXT: s_lshr_b32 s8, s3, 16 15294; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15295; GFX940-NEXT: ;;#ASMSTART 15296; GFX940-NEXT: ; use s[8:9] 15297; GFX940-NEXT: ;;#ASMEND 15298; GFX940-NEXT: s_setpc_b64 s[30:31] 15299 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15300 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15301 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1> 15302 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15303 ret void 15304} 15305 15306define void @s_shuffle_v4i16_v4i16__7_0_1_1() { 15307; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_1_1: 15308; GFX900: ; %bb.0: 15309; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15310; GFX900-NEXT: ;;#ASMSTART 15311; GFX900-NEXT: ; def s[4:5] 15312; GFX900-NEXT: ;;#ASMEND 15313; GFX900-NEXT: ;;#ASMSTART 15314; GFX900-NEXT: ; def s[6:7] 15315; GFX900-NEXT: ;;#ASMEND 15316; GFX900-NEXT: s_lshr_b32 s5, s7, 16 15317; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15318; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15319; GFX900-NEXT: ;;#ASMSTART 15320; GFX900-NEXT: ; use s[8:9] 15321; GFX900-NEXT: ;;#ASMEND 15322; GFX900-NEXT: s_setpc_b64 s[30:31] 15323; 15324; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_1_1: 15325; GFX90A: ; %bb.0: 15326; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15327; GFX90A-NEXT: ;;#ASMSTART 15328; GFX90A-NEXT: ; def s[4:5] 15329; GFX90A-NEXT: ;;#ASMEND 15330; GFX90A-NEXT: ;;#ASMSTART 15331; GFX90A-NEXT: ; def s[6:7] 15332; GFX90A-NEXT: ;;#ASMEND 15333; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 15334; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15335; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15336; GFX90A-NEXT: ;;#ASMSTART 15337; GFX90A-NEXT: ; use s[8:9] 15338; GFX90A-NEXT: ;;#ASMEND 15339; GFX90A-NEXT: s_setpc_b64 s[30:31] 15340; 15341; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_1_1: 15342; GFX940: ; %bb.0: 15343; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15344; GFX940-NEXT: ;;#ASMSTART 15345; GFX940-NEXT: ; def s[0:1] 15346; GFX940-NEXT: ;;#ASMEND 15347; GFX940-NEXT: ;;#ASMSTART 15348; GFX940-NEXT: ; def s[2:3] 15349; GFX940-NEXT: ;;#ASMEND 15350; GFX940-NEXT: s_lshr_b32 s1, s3, 16 15351; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 15352; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15353; GFX940-NEXT: ;;#ASMSTART 15354; GFX940-NEXT: ; use s[8:9] 15355; GFX940-NEXT: ;;#ASMEND 15356; GFX940-NEXT: s_setpc_b64 s[30:31] 15357 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15358 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15359 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1> 15360 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15361 ret void 15362} 15363 15364define void @s_shuffle_v4i16_v4i16__7_2_1_1() { 15365; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_1_1: 15366; GFX900: ; %bb.0: 15367; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15368; GFX900-NEXT: ;;#ASMSTART 15369; GFX900-NEXT: ; def s[6:7] 15370; GFX900-NEXT: ;;#ASMEND 15371; GFX900-NEXT: s_lshr_b32 s6, s7, 16 15372; GFX900-NEXT: ;;#ASMSTART 15373; GFX900-NEXT: ; def s[4:5] 15374; GFX900-NEXT: ;;#ASMEND 15375; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 15376; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15377; GFX900-NEXT: ;;#ASMSTART 15378; GFX900-NEXT: ; use s[8:9] 15379; GFX900-NEXT: ;;#ASMEND 15380; GFX900-NEXT: s_setpc_b64 s[30:31] 15381; 15382; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_1_1: 15383; GFX90A: ; %bb.0: 15384; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15385; GFX90A-NEXT: ;;#ASMSTART 15386; GFX90A-NEXT: ; def s[6:7] 15387; GFX90A-NEXT: ;;#ASMEND 15388; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 15389; GFX90A-NEXT: ;;#ASMSTART 15390; GFX90A-NEXT: ; def s[4:5] 15391; GFX90A-NEXT: ;;#ASMEND 15392; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 15393; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15394; GFX90A-NEXT: ;;#ASMSTART 15395; GFX90A-NEXT: ; use s[8:9] 15396; GFX90A-NEXT: ;;#ASMEND 15397; GFX90A-NEXT: s_setpc_b64 s[30:31] 15398; 15399; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_1_1: 15400; GFX940: ; %bb.0: 15401; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15402; GFX940-NEXT: ;;#ASMSTART 15403; GFX940-NEXT: ; def s[2:3] 15404; GFX940-NEXT: ;;#ASMEND 15405; GFX940-NEXT: s_lshr_b32 s2, s3, 16 15406; GFX940-NEXT: ;;#ASMSTART 15407; GFX940-NEXT: ; def s[0:1] 15408; GFX940-NEXT: ;;#ASMEND 15409; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 15410; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15411; GFX940-NEXT: ;;#ASMSTART 15412; GFX940-NEXT: ; use s[8:9] 15413; GFX940-NEXT: ;;#ASMEND 15414; GFX940-NEXT: s_setpc_b64 s[30:31] 15415 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15416 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15417 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1> 15418 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15419 ret void 15420} 15421 15422define void @s_shuffle_v4i16_v4i16__7_3_1_1() { 15423; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_1_1: 15424; GFX900: ; %bb.0: 15425; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15426; GFX900-NEXT: ;;#ASMSTART 15427; GFX900-NEXT: ; def s[4:5] 15428; GFX900-NEXT: ;;#ASMEND 15429; GFX900-NEXT: ;;#ASMSTART 15430; GFX900-NEXT: ; def s[6:7] 15431; GFX900-NEXT: ;;#ASMEND 15432; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 15433; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15434; GFX900-NEXT: ;;#ASMSTART 15435; GFX900-NEXT: ; use s[8:9] 15436; GFX900-NEXT: ;;#ASMEND 15437; GFX900-NEXT: s_setpc_b64 s[30:31] 15438; 15439; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_1_1: 15440; GFX90A: ; %bb.0: 15441; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15442; GFX90A-NEXT: ;;#ASMSTART 15443; GFX90A-NEXT: ; def s[4:5] 15444; GFX90A-NEXT: ;;#ASMEND 15445; GFX90A-NEXT: ;;#ASMSTART 15446; GFX90A-NEXT: ; def s[6:7] 15447; GFX90A-NEXT: ;;#ASMEND 15448; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 15449; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15450; GFX90A-NEXT: ;;#ASMSTART 15451; GFX90A-NEXT: ; use s[8:9] 15452; GFX90A-NEXT: ;;#ASMEND 15453; GFX90A-NEXT: s_setpc_b64 s[30:31] 15454; 15455; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_1_1: 15456; GFX940: ; %bb.0: 15457; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15458; GFX940-NEXT: ;;#ASMSTART 15459; GFX940-NEXT: ; def s[0:1] 15460; GFX940-NEXT: ;;#ASMEND 15461; GFX940-NEXT: ;;#ASMSTART 15462; GFX940-NEXT: ; def s[2:3] 15463; GFX940-NEXT: ;;#ASMEND 15464; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 15465; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15466; GFX940-NEXT: ;;#ASMSTART 15467; GFX940-NEXT: ; use s[8:9] 15468; GFX940-NEXT: ;;#ASMEND 15469; GFX940-NEXT: s_setpc_b64 s[30:31] 15470 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15471 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15472 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1> 15473 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15474 ret void 15475} 15476 15477define void @s_shuffle_v4i16_v4i16__7_4_1_1() { 15478; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_1_1: 15479; GFX900: ; %bb.0: 15480; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15481; GFX900-NEXT: ;;#ASMSTART 15482; GFX900-NEXT: ; def s[4:5] 15483; GFX900-NEXT: ;;#ASMEND 15484; GFX900-NEXT: ;;#ASMSTART 15485; GFX900-NEXT: ; def s[6:7] 15486; GFX900-NEXT: ;;#ASMEND 15487; GFX900-NEXT: s_lshr_b32 s5, s7, 16 15488; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6 15489; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15490; GFX900-NEXT: ;;#ASMSTART 15491; GFX900-NEXT: ; use s[8:9] 15492; GFX900-NEXT: ;;#ASMEND 15493; GFX900-NEXT: s_setpc_b64 s[30:31] 15494; 15495; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_1_1: 15496; GFX90A: ; %bb.0: 15497; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15498; GFX90A-NEXT: ;;#ASMSTART 15499; GFX90A-NEXT: ; def s[4:5] 15500; GFX90A-NEXT: ;;#ASMEND 15501; GFX90A-NEXT: ;;#ASMSTART 15502; GFX90A-NEXT: ; def s[6:7] 15503; GFX90A-NEXT: ;;#ASMEND 15504; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 15505; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6 15506; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15507; GFX90A-NEXT: ;;#ASMSTART 15508; GFX90A-NEXT: ; use s[8:9] 15509; GFX90A-NEXT: ;;#ASMEND 15510; GFX90A-NEXT: s_setpc_b64 s[30:31] 15511; 15512; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_1_1: 15513; GFX940: ; %bb.0: 15514; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15515; GFX940-NEXT: ;;#ASMSTART 15516; GFX940-NEXT: ; def s[0:1] 15517; GFX940-NEXT: ;;#ASMEND 15518; GFX940-NEXT: ;;#ASMSTART 15519; GFX940-NEXT: ; def s[2:3] 15520; GFX940-NEXT: ;;#ASMEND 15521; GFX940-NEXT: s_lshr_b32 s1, s3, 16 15522; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 15523; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15524; GFX940-NEXT: ;;#ASMSTART 15525; GFX940-NEXT: ; use s[8:9] 15526; GFX940-NEXT: ;;#ASMEND 15527; GFX940-NEXT: s_setpc_b64 s[30:31] 15528 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15529 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15530 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1> 15531 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15532 ret void 15533} 15534 15535define void @s_shuffle_v4i16_v4i16__7_5_1_1() { 15536; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_1_1: 15537; GFX900: ; %bb.0: 15538; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15539; GFX900-NEXT: ;;#ASMSTART 15540; GFX900-NEXT: ; def s[4:5] 15541; GFX900-NEXT: ;;#ASMEND 15542; GFX900-NEXT: ;;#ASMSTART 15543; GFX900-NEXT: ; def s[6:7] 15544; GFX900-NEXT: ;;#ASMEND 15545; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s6 15546; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15547; GFX900-NEXT: ;;#ASMSTART 15548; GFX900-NEXT: ; use s[8:9] 15549; GFX900-NEXT: ;;#ASMEND 15550; GFX900-NEXT: s_setpc_b64 s[30:31] 15551; 15552; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_1_1: 15553; GFX90A: ; %bb.0: 15554; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15555; GFX90A-NEXT: ;;#ASMSTART 15556; GFX90A-NEXT: ; def s[4:5] 15557; GFX90A-NEXT: ;;#ASMEND 15558; GFX90A-NEXT: ;;#ASMSTART 15559; GFX90A-NEXT: ; def s[6:7] 15560; GFX90A-NEXT: ;;#ASMEND 15561; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s6 15562; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15563; GFX90A-NEXT: ;;#ASMSTART 15564; GFX90A-NEXT: ; use s[8:9] 15565; GFX90A-NEXT: ;;#ASMEND 15566; GFX90A-NEXT: s_setpc_b64 s[30:31] 15567; 15568; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_1_1: 15569; GFX940: ; %bb.0: 15570; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15571; GFX940-NEXT: ;;#ASMSTART 15572; GFX940-NEXT: ; def s[0:1] 15573; GFX940-NEXT: ;;#ASMEND 15574; GFX940-NEXT: ;;#ASMSTART 15575; GFX940-NEXT: ; def s[2:3] 15576; GFX940-NEXT: ;;#ASMEND 15577; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 15578; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15579; GFX940-NEXT: ;;#ASMSTART 15580; GFX940-NEXT: ; use s[8:9] 15581; GFX940-NEXT: ;;#ASMEND 15582; GFX940-NEXT: s_setpc_b64 s[30:31] 15583 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15584 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15585 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1> 15586 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15587 ret void 15588} 15589 15590define void @s_shuffle_v4i16_v4i16__7_6_1_1() { 15591; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_1_1: 15592; GFX900: ; %bb.0: 15593; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15594; GFX900-NEXT: ;;#ASMSTART 15595; GFX900-NEXT: ; def s[4:5] 15596; GFX900-NEXT: ;;#ASMEND 15597; GFX900-NEXT: ;;#ASMSTART 15598; GFX900-NEXT: ; def s[6:7] 15599; GFX900-NEXT: ;;#ASMEND 15600; GFX900-NEXT: s_lshr_b32 s5, s7, 16 15601; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 15602; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15603; GFX900-NEXT: ;;#ASMSTART 15604; GFX900-NEXT: ; use s[8:9] 15605; GFX900-NEXT: ;;#ASMEND 15606; GFX900-NEXT: s_setpc_b64 s[30:31] 15607; 15608; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_1_1: 15609; GFX90A: ; %bb.0: 15610; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15611; GFX90A-NEXT: ;;#ASMSTART 15612; GFX90A-NEXT: ; def s[4:5] 15613; GFX90A-NEXT: ;;#ASMEND 15614; GFX90A-NEXT: ;;#ASMSTART 15615; GFX90A-NEXT: ; def s[6:7] 15616; GFX90A-NEXT: ;;#ASMEND 15617; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 15618; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 15619; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15620; GFX90A-NEXT: ;;#ASMSTART 15621; GFX90A-NEXT: ; use s[8:9] 15622; GFX90A-NEXT: ;;#ASMEND 15623; GFX90A-NEXT: s_setpc_b64 s[30:31] 15624; 15625; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_1_1: 15626; GFX940: ; %bb.0: 15627; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15628; GFX940-NEXT: ;;#ASMSTART 15629; GFX940-NEXT: ; def s[0:1] 15630; GFX940-NEXT: ;;#ASMEND 15631; GFX940-NEXT: ;;#ASMSTART 15632; GFX940-NEXT: ; def s[2:3] 15633; GFX940-NEXT: ;;#ASMEND 15634; GFX940-NEXT: s_lshr_b32 s1, s3, 16 15635; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 15636; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15637; GFX940-NEXT: ;;#ASMSTART 15638; GFX940-NEXT: ; use s[8:9] 15639; GFX940-NEXT: ;;#ASMEND 15640; GFX940-NEXT: s_setpc_b64 s[30:31] 15641 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15642 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15643 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1> 15644 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15645 ret void 15646} 15647 15648define void @s_shuffle_v4i16_v4i16__7_7_1_1() { 15649; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_1: 15650; GFX900: ; %bb.0: 15651; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15652; GFX900-NEXT: ;;#ASMSTART 15653; GFX900-NEXT: ; def s[4:5] 15654; GFX900-NEXT: ;;#ASMEND 15655; GFX900-NEXT: ;;#ASMSTART 15656; GFX900-NEXT: ; def s[6:7] 15657; GFX900-NEXT: ;;#ASMEND 15658; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15659; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15660; GFX900-NEXT: ;;#ASMSTART 15661; GFX900-NEXT: ; use s[8:9] 15662; GFX900-NEXT: ;;#ASMEND 15663; GFX900-NEXT: s_setpc_b64 s[30:31] 15664; 15665; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_1: 15666; GFX90A: ; %bb.0: 15667; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15668; GFX90A-NEXT: ;;#ASMSTART 15669; GFX90A-NEXT: ; def s[4:5] 15670; GFX90A-NEXT: ;;#ASMEND 15671; GFX90A-NEXT: ;;#ASMSTART 15672; GFX90A-NEXT: ; def s[6:7] 15673; GFX90A-NEXT: ;;#ASMEND 15674; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 15675; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15676; GFX90A-NEXT: ;;#ASMSTART 15677; GFX90A-NEXT: ; use s[8:9] 15678; GFX90A-NEXT: ;;#ASMEND 15679; GFX90A-NEXT: s_setpc_b64 s[30:31] 15680; 15681; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_1: 15682; GFX940: ; %bb.0: 15683; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15684; GFX940-NEXT: ;;#ASMSTART 15685; GFX940-NEXT: ; def s[0:1] 15686; GFX940-NEXT: ;;#ASMEND 15687; GFX940-NEXT: ;;#ASMSTART 15688; GFX940-NEXT: ; def s[2:3] 15689; GFX940-NEXT: ;;#ASMEND 15690; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 15691; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 15692; GFX940-NEXT: ;;#ASMSTART 15693; GFX940-NEXT: ; use s[8:9] 15694; GFX940-NEXT: ;;#ASMEND 15695; GFX940-NEXT: s_setpc_b64 s[30:31] 15696 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15697 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15698 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1> 15699 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15700 ret void 15701} 15702 15703define void @s_shuffle_v4i16_v4i16__7_7_u_1() { 15704; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_1: 15705; GFX900: ; %bb.0: 15706; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15707; GFX900-NEXT: ;;#ASMSTART 15708; GFX900-NEXT: ; def s[4:5] 15709; GFX900-NEXT: ;;#ASMEND 15710; GFX900-NEXT: ;;#ASMSTART 15711; GFX900-NEXT: ; def s[6:7] 15712; GFX900-NEXT: ;;#ASMEND 15713; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15714; GFX900-NEXT: s_mov_b32 s9, s4 15715; GFX900-NEXT: ;;#ASMSTART 15716; GFX900-NEXT: ; use s[8:9] 15717; GFX900-NEXT: ;;#ASMEND 15718; GFX900-NEXT: s_setpc_b64 s[30:31] 15719; 15720; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_1: 15721; GFX90A: ; %bb.0: 15722; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15723; GFX90A-NEXT: ;;#ASMSTART 15724; GFX90A-NEXT: ; def s[4:5] 15725; GFX90A-NEXT: ;;#ASMEND 15726; GFX90A-NEXT: ;;#ASMSTART 15727; GFX90A-NEXT: ; def s[6:7] 15728; GFX90A-NEXT: ;;#ASMEND 15729; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15730; GFX90A-NEXT: s_mov_b32 s9, s4 15731; GFX90A-NEXT: ;;#ASMSTART 15732; GFX90A-NEXT: ; use s[8:9] 15733; GFX90A-NEXT: ;;#ASMEND 15734; GFX90A-NEXT: s_setpc_b64 s[30:31] 15735; 15736; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_1: 15737; GFX940: ; %bb.0: 15738; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15739; GFX940-NEXT: ;;#ASMSTART 15740; GFX940-NEXT: ; def s[0:1] 15741; GFX940-NEXT: ;;#ASMEND 15742; GFX940-NEXT: ;;#ASMSTART 15743; GFX940-NEXT: ; def s[2:3] 15744; GFX940-NEXT: ;;#ASMEND 15745; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 15746; GFX940-NEXT: s_mov_b32 s9, s0 15747; GFX940-NEXT: ;;#ASMSTART 15748; GFX940-NEXT: ; use s[8:9] 15749; GFX940-NEXT: ;;#ASMEND 15750; GFX940-NEXT: s_setpc_b64 s[30:31] 15751 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15752 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15753 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1> 15754 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15755 ret void 15756} 15757 15758define void @s_shuffle_v4i16_v4i16__7_7_0_1() { 15759; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_1: 15760; GFX900: ; %bb.0: 15761; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15762; GFX900-NEXT: ;;#ASMSTART 15763; GFX900-NEXT: ; def s[4:5] 15764; GFX900-NEXT: ;;#ASMEND 15765; GFX900-NEXT: ;;#ASMSTART 15766; GFX900-NEXT: ; def s[6:7] 15767; GFX900-NEXT: ;;#ASMEND 15768; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15769; GFX900-NEXT: s_mov_b32 s9, s4 15770; GFX900-NEXT: ;;#ASMSTART 15771; GFX900-NEXT: ; use s[8:9] 15772; GFX900-NEXT: ;;#ASMEND 15773; GFX900-NEXT: s_setpc_b64 s[30:31] 15774; 15775; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_1: 15776; GFX90A: ; %bb.0: 15777; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15778; GFX90A-NEXT: ;;#ASMSTART 15779; GFX90A-NEXT: ; def s[4:5] 15780; GFX90A-NEXT: ;;#ASMEND 15781; GFX90A-NEXT: ;;#ASMSTART 15782; GFX90A-NEXT: ; def s[6:7] 15783; GFX90A-NEXT: ;;#ASMEND 15784; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15785; GFX90A-NEXT: s_mov_b32 s9, s4 15786; GFX90A-NEXT: ;;#ASMSTART 15787; GFX90A-NEXT: ; use s[8:9] 15788; GFX90A-NEXT: ;;#ASMEND 15789; GFX90A-NEXT: s_setpc_b64 s[30:31] 15790; 15791; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_1: 15792; GFX940: ; %bb.0: 15793; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15794; GFX940-NEXT: ;;#ASMSTART 15795; GFX940-NEXT: ; def s[0:1] 15796; GFX940-NEXT: ;;#ASMEND 15797; GFX940-NEXT: ;;#ASMSTART 15798; GFX940-NEXT: ; def s[2:3] 15799; GFX940-NEXT: ;;#ASMEND 15800; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 15801; GFX940-NEXT: s_mov_b32 s9, s0 15802; GFX940-NEXT: ;;#ASMSTART 15803; GFX940-NEXT: ; use s[8:9] 15804; GFX940-NEXT: ;;#ASMEND 15805; GFX940-NEXT: s_setpc_b64 s[30:31] 15806 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15807 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15808 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1> 15809 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15810 ret void 15811} 15812 15813define void @s_shuffle_v4i16_v4i16__7_7_2_1() { 15814; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_1: 15815; GFX900: ; %bb.0: 15816; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15817; GFX900-NEXT: ;;#ASMSTART 15818; GFX900-NEXT: ; def s[4:5] 15819; GFX900-NEXT: ;;#ASMEND 15820; GFX900-NEXT: ;;#ASMSTART 15821; GFX900-NEXT: ; def s[6:7] 15822; GFX900-NEXT: ;;#ASMEND 15823; GFX900-NEXT: s_pack_lh_b32_b16 s9, s5, s4 15824; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15825; GFX900-NEXT: ;;#ASMSTART 15826; GFX900-NEXT: ; use s[8:9] 15827; GFX900-NEXT: ;;#ASMEND 15828; GFX900-NEXT: s_setpc_b64 s[30:31] 15829; 15830; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_1: 15831; GFX90A: ; %bb.0: 15832; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15833; GFX90A-NEXT: ;;#ASMSTART 15834; GFX90A-NEXT: ; def s[4:5] 15835; GFX90A-NEXT: ;;#ASMEND 15836; GFX90A-NEXT: ;;#ASMSTART 15837; GFX90A-NEXT: ; def s[6:7] 15838; GFX90A-NEXT: ;;#ASMEND 15839; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s5, s4 15840; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15841; GFX90A-NEXT: ;;#ASMSTART 15842; GFX90A-NEXT: ; use s[8:9] 15843; GFX90A-NEXT: ;;#ASMEND 15844; GFX90A-NEXT: s_setpc_b64 s[30:31] 15845; 15846; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_1: 15847; GFX940: ; %bb.0: 15848; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15849; GFX940-NEXT: ;;#ASMSTART 15850; GFX940-NEXT: ; def s[0:1] 15851; GFX940-NEXT: ;;#ASMEND 15852; GFX940-NEXT: ;;#ASMSTART 15853; GFX940-NEXT: ; def s[2:3] 15854; GFX940-NEXT: ;;#ASMEND 15855; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s0 15856; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 15857; GFX940-NEXT: ;;#ASMSTART 15858; GFX940-NEXT: ; use s[8:9] 15859; GFX940-NEXT: ;;#ASMEND 15860; GFX940-NEXT: s_setpc_b64 s[30:31] 15861 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15862 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15863 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1> 15864 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15865 ret void 15866} 15867 15868define void @s_shuffle_v4i16_v4i16__7_7_3_1() { 15869; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_1: 15870; GFX900: ; %bb.0: 15871; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15872; GFX900-NEXT: ;;#ASMSTART 15873; GFX900-NEXT: ; def s[4:5] 15874; GFX900-NEXT: ;;#ASMEND 15875; GFX900-NEXT: ;;#ASMSTART 15876; GFX900-NEXT: ; def s[6:7] 15877; GFX900-NEXT: ;;#ASMEND 15878; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s4 15879; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15880; GFX900-NEXT: ;;#ASMSTART 15881; GFX900-NEXT: ; use s[8:9] 15882; GFX900-NEXT: ;;#ASMEND 15883; GFX900-NEXT: s_setpc_b64 s[30:31] 15884; 15885; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_1: 15886; GFX90A: ; %bb.0: 15887; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15888; GFX90A-NEXT: ;;#ASMSTART 15889; GFX90A-NEXT: ; def s[4:5] 15890; GFX90A-NEXT: ;;#ASMEND 15891; GFX90A-NEXT: ;;#ASMSTART 15892; GFX90A-NEXT: ; def s[6:7] 15893; GFX90A-NEXT: ;;#ASMEND 15894; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s4 15895; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15896; GFX90A-NEXT: ;;#ASMSTART 15897; GFX90A-NEXT: ; use s[8:9] 15898; GFX90A-NEXT: ;;#ASMEND 15899; GFX90A-NEXT: s_setpc_b64 s[30:31] 15900; 15901; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_1: 15902; GFX940: ; %bb.0: 15903; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15904; GFX940-NEXT: ;;#ASMSTART 15905; GFX940-NEXT: ; def s[0:1] 15906; GFX940-NEXT: ;;#ASMEND 15907; GFX940-NEXT: ;;#ASMSTART 15908; GFX940-NEXT: ; def s[2:3] 15909; GFX940-NEXT: ;;#ASMEND 15910; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s0 15911; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 15912; GFX940-NEXT: ;;#ASMSTART 15913; GFX940-NEXT: ; use s[8:9] 15914; GFX940-NEXT: ;;#ASMEND 15915; GFX940-NEXT: s_setpc_b64 s[30:31] 15916 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15917 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15918 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1> 15919 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15920 ret void 15921} 15922 15923define void @s_shuffle_v4i16_v4i16__7_7_4_1() { 15924; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_1: 15925; GFX900: ; %bb.0: 15926; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15927; GFX900-NEXT: ;;#ASMSTART 15928; GFX900-NEXT: ; def s[4:5] 15929; GFX900-NEXT: ;;#ASMEND 15930; GFX900-NEXT: ;;#ASMSTART 15931; GFX900-NEXT: ; def s[6:7] 15932; GFX900-NEXT: ;;#ASMEND 15933; GFX900-NEXT: s_pack_lh_b32_b16 s9, s6, s4 15934; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15935; GFX900-NEXT: ;;#ASMSTART 15936; GFX900-NEXT: ; use s[8:9] 15937; GFX900-NEXT: ;;#ASMEND 15938; GFX900-NEXT: s_setpc_b64 s[30:31] 15939; 15940; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_1: 15941; GFX90A: ; %bb.0: 15942; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15943; GFX90A-NEXT: ;;#ASMSTART 15944; GFX90A-NEXT: ; def s[4:5] 15945; GFX90A-NEXT: ;;#ASMEND 15946; GFX90A-NEXT: ;;#ASMSTART 15947; GFX90A-NEXT: ; def s[6:7] 15948; GFX90A-NEXT: ;;#ASMEND 15949; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s6, s4 15950; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15951; GFX90A-NEXT: ;;#ASMSTART 15952; GFX90A-NEXT: ; use s[8:9] 15953; GFX90A-NEXT: ;;#ASMEND 15954; GFX90A-NEXT: s_setpc_b64 s[30:31] 15955; 15956; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_1: 15957; GFX940: ; %bb.0: 15958; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15959; GFX940-NEXT: ;;#ASMSTART 15960; GFX940-NEXT: ; def s[0:1] 15961; GFX940-NEXT: ;;#ASMEND 15962; GFX940-NEXT: ;;#ASMSTART 15963; GFX940-NEXT: ; def s[2:3] 15964; GFX940-NEXT: ;;#ASMEND 15965; GFX940-NEXT: s_pack_lh_b32_b16 s9, s2, s0 15966; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 15967; GFX940-NEXT: ;;#ASMSTART 15968; GFX940-NEXT: ; use s[8:9] 15969; GFX940-NEXT: ;;#ASMEND 15970; GFX940-NEXT: s_setpc_b64 s[30:31] 15971 %vec0 = call <4 x i16> asm "; def $0", "=s"() 15972 %vec1 = call <4 x i16> asm "; def $0", "=s"() 15973 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1> 15974 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 15975 ret void 15976} 15977 15978define void @s_shuffle_v4i16_v4i16__7_7_5_1() { 15979; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_1: 15980; GFX900: ; %bb.0: 15981; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15982; GFX900-NEXT: ;;#ASMSTART 15983; GFX900-NEXT: ; def s[4:5] 15984; GFX900-NEXT: ;;#ASMEND 15985; GFX900-NEXT: ;;#ASMSTART 15986; GFX900-NEXT: ; def s[6:7] 15987; GFX900-NEXT: ;;#ASMEND 15988; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s4 15989; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15990; GFX900-NEXT: ;;#ASMSTART 15991; GFX900-NEXT: ; use s[8:9] 15992; GFX900-NEXT: ;;#ASMEND 15993; GFX900-NEXT: s_setpc_b64 s[30:31] 15994; 15995; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_1: 15996; GFX90A: ; %bb.0: 15997; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15998; GFX90A-NEXT: ;;#ASMSTART 15999; GFX90A-NEXT: ; def s[4:5] 16000; GFX90A-NEXT: ;;#ASMEND 16001; GFX90A-NEXT: ;;#ASMSTART 16002; GFX90A-NEXT: ; def s[6:7] 16003; GFX90A-NEXT: ;;#ASMEND 16004; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s4 16005; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 16006; GFX90A-NEXT: ;;#ASMSTART 16007; GFX90A-NEXT: ; use s[8:9] 16008; GFX90A-NEXT: ;;#ASMEND 16009; GFX90A-NEXT: s_setpc_b64 s[30:31] 16010; 16011; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_1: 16012; GFX940: ; %bb.0: 16013; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16014; GFX940-NEXT: ;;#ASMSTART 16015; GFX940-NEXT: ; def s[0:1] 16016; GFX940-NEXT: ;;#ASMEND 16017; GFX940-NEXT: ;;#ASMSTART 16018; GFX940-NEXT: ; def s[2:3] 16019; GFX940-NEXT: ;;#ASMEND 16020; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s0 16021; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 16022; GFX940-NEXT: ;;#ASMSTART 16023; GFX940-NEXT: ; use s[8:9] 16024; GFX940-NEXT: ;;#ASMEND 16025; GFX940-NEXT: s_setpc_b64 s[30:31] 16026 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16027 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16028 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1> 16029 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16030 ret void 16031} 16032 16033define void @s_shuffle_v4i16_v4i16__7_7_6_1() { 16034; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_1: 16035; GFX900: ; %bb.0: 16036; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16037; GFX900-NEXT: ;;#ASMSTART 16038; GFX900-NEXT: ; def s[4:5] 16039; GFX900-NEXT: ;;#ASMEND 16040; GFX900-NEXT: ;;#ASMSTART 16041; GFX900-NEXT: ; def s[6:7] 16042; GFX900-NEXT: ;;#ASMEND 16043; GFX900-NEXT: s_pack_lh_b32_b16 s9, s7, s4 16044; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 16045; GFX900-NEXT: ;;#ASMSTART 16046; GFX900-NEXT: ; use s[8:9] 16047; GFX900-NEXT: ;;#ASMEND 16048; GFX900-NEXT: s_setpc_b64 s[30:31] 16049; 16050; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_1: 16051; GFX90A: ; %bb.0: 16052; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16053; GFX90A-NEXT: ;;#ASMSTART 16054; GFX90A-NEXT: ; def s[4:5] 16055; GFX90A-NEXT: ;;#ASMEND 16056; GFX90A-NEXT: ;;#ASMSTART 16057; GFX90A-NEXT: ; def s[6:7] 16058; GFX90A-NEXT: ;;#ASMEND 16059; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s7, s4 16060; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 16061; GFX90A-NEXT: ;;#ASMSTART 16062; GFX90A-NEXT: ; use s[8:9] 16063; GFX90A-NEXT: ;;#ASMEND 16064; GFX90A-NEXT: s_setpc_b64 s[30:31] 16065; 16066; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_1: 16067; GFX940: ; %bb.0: 16068; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16069; GFX940-NEXT: ;;#ASMSTART 16070; GFX940-NEXT: ; def s[0:1] 16071; GFX940-NEXT: ;;#ASMEND 16072; GFX940-NEXT: ;;#ASMSTART 16073; GFX940-NEXT: ; def s[2:3] 16074; GFX940-NEXT: ;;#ASMEND 16075; GFX940-NEXT: s_pack_lh_b32_b16 s9, s3, s0 16076; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 16077; GFX940-NEXT: ;;#ASMSTART 16078; GFX940-NEXT: ; use s[8:9] 16079; GFX940-NEXT: ;;#ASMEND 16080; GFX940-NEXT: s_setpc_b64 s[30:31] 16081 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16082 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16083 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1> 16084 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16085 ret void 16086} 16087 16088define void @s_shuffle_v4i16_v4i16__u_2_2_2() { 16089; GFX900-LABEL: s_shuffle_v4i16_v4i16__u_2_2_2: 16090; GFX900: ; %bb.0: 16091; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16092; GFX900-NEXT: ;;#ASMSTART 16093; GFX900-NEXT: ; def s[4:5] 16094; GFX900-NEXT: ;;#ASMEND 16095; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16096; GFX900-NEXT: s_lshl_b32 s8, s5, 16 16097; GFX900-NEXT: ;;#ASMSTART 16098; GFX900-NEXT: ; use s[8:9] 16099; GFX900-NEXT: ;;#ASMEND 16100; GFX900-NEXT: s_setpc_b64 s[30:31] 16101; 16102; GFX90A-LABEL: s_shuffle_v4i16_v4i16__u_2_2_2: 16103; GFX90A: ; %bb.0: 16104; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16105; GFX90A-NEXT: ;;#ASMSTART 16106; GFX90A-NEXT: ; def s[4:5] 16107; GFX90A-NEXT: ;;#ASMEND 16108; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16109; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 16110; GFX90A-NEXT: ;;#ASMSTART 16111; GFX90A-NEXT: ; use s[8:9] 16112; GFX90A-NEXT: ;;#ASMEND 16113; GFX90A-NEXT: s_setpc_b64 s[30:31] 16114; 16115; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_2_2_2: 16116; GFX940: ; %bb.0: 16117; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16118; GFX940-NEXT: ;;#ASMSTART 16119; GFX940-NEXT: ; def s[0:1] 16120; GFX940-NEXT: ;;#ASMEND 16121; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16122; GFX940-NEXT: s_lshl_b32 s8, s1, 16 16123; GFX940-NEXT: ;;#ASMSTART 16124; GFX940-NEXT: ; use s[8:9] 16125; GFX940-NEXT: ;;#ASMEND 16126; GFX940-NEXT: s_setpc_b64 s[30:31] 16127 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16128 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2> 16129 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16130 ret void 16131} 16132 16133define void @s_shuffle_v4i16_v4i16__0_2_2_2() { 16134; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_2_2_2: 16135; GFX900: ; %bb.0: 16136; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16137; GFX900-NEXT: ;;#ASMSTART 16138; GFX900-NEXT: ; def s[4:5] 16139; GFX900-NEXT: ;;#ASMEND 16140; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16141; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16142; GFX900-NEXT: ;;#ASMSTART 16143; GFX900-NEXT: ; use s[8:9] 16144; GFX900-NEXT: ;;#ASMEND 16145; GFX900-NEXT: s_setpc_b64 s[30:31] 16146; 16147; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_2_2_2: 16148; GFX90A: ; %bb.0: 16149; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16150; GFX90A-NEXT: ;;#ASMSTART 16151; GFX90A-NEXT: ; def s[4:5] 16152; GFX90A-NEXT: ;;#ASMEND 16153; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16154; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16155; GFX90A-NEXT: ;;#ASMSTART 16156; GFX90A-NEXT: ; use s[8:9] 16157; GFX90A-NEXT: ;;#ASMEND 16158; GFX90A-NEXT: s_setpc_b64 s[30:31] 16159; 16160; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_2_2_2: 16161; GFX940: ; %bb.0: 16162; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16163; GFX940-NEXT: ;;#ASMSTART 16164; GFX940-NEXT: ; def s[0:1] 16165; GFX940-NEXT: ;;#ASMEND 16166; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 16167; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16168; GFX940-NEXT: ;;#ASMSTART 16169; GFX940-NEXT: ; use s[8:9] 16170; GFX940-NEXT: ;;#ASMEND 16171; GFX940-NEXT: s_setpc_b64 s[30:31] 16172 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16173 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2> 16174 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16175 ret void 16176} 16177 16178define void @s_shuffle_v4i16_v4i16__1_2_2_2() { 16179; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_2_2_2: 16180; GFX900: ; %bb.0: 16181; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16182; GFX900-NEXT: ;;#ASMSTART 16183; GFX900-NEXT: ; def s[4:5] 16184; GFX900-NEXT: ;;#ASMEND 16185; GFX900-NEXT: s_lshr_b32 s4, s4, 16 16186; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16187; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16188; GFX900-NEXT: ;;#ASMSTART 16189; GFX900-NEXT: ; use s[8:9] 16190; GFX900-NEXT: ;;#ASMEND 16191; GFX900-NEXT: s_setpc_b64 s[30:31] 16192; 16193; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_2_2_2: 16194; GFX90A: ; %bb.0: 16195; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16196; GFX90A-NEXT: ;;#ASMSTART 16197; GFX90A-NEXT: ; def s[4:5] 16198; GFX90A-NEXT: ;;#ASMEND 16199; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 16200; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16201; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16202; GFX90A-NEXT: ;;#ASMSTART 16203; GFX90A-NEXT: ; use s[8:9] 16204; GFX90A-NEXT: ;;#ASMEND 16205; GFX90A-NEXT: s_setpc_b64 s[30:31] 16206; 16207; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_2_2_2: 16208; GFX940: ; %bb.0: 16209; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16210; GFX940-NEXT: ;;#ASMSTART 16211; GFX940-NEXT: ; def s[0:1] 16212; GFX940-NEXT: ;;#ASMEND 16213; GFX940-NEXT: s_lshr_b32 s0, s0, 16 16214; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 16215; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16216; GFX940-NEXT: ;;#ASMSTART 16217; GFX940-NEXT: ; use s[8:9] 16218; GFX940-NEXT: ;;#ASMEND 16219; GFX940-NEXT: s_setpc_b64 s[30:31] 16220 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16221 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2> 16222 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16223 ret void 16224} 16225 16226define void @s_shuffle_v4i16_v4i16__2_2_2_2() { 16227; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_2_2_2: 16228; GFX900: ; %bb.0: 16229; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16230; GFX900-NEXT: ;;#ASMSTART 16231; GFX900-NEXT: ; def s[4:5] 16232; GFX900-NEXT: ;;#ASMEND 16233; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 16234; GFX900-NEXT: s_mov_b32 s9, s8 16235; GFX900-NEXT: ;;#ASMSTART 16236; GFX900-NEXT: ; use s[8:9] 16237; GFX900-NEXT: ;;#ASMEND 16238; GFX900-NEXT: s_setpc_b64 s[30:31] 16239; 16240; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_2_2_2: 16241; GFX90A: ; %bb.0: 16242; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16243; GFX90A-NEXT: ;;#ASMSTART 16244; GFX90A-NEXT: ; def s[4:5] 16245; GFX90A-NEXT: ;;#ASMEND 16246; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 16247; GFX90A-NEXT: s_mov_b32 s9, s8 16248; GFX90A-NEXT: ;;#ASMSTART 16249; GFX90A-NEXT: ; use s[8:9] 16250; GFX90A-NEXT: ;;#ASMEND 16251; GFX90A-NEXT: s_setpc_b64 s[30:31] 16252; 16253; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_2_2_2: 16254; GFX940: ; %bb.0: 16255; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16256; GFX940-NEXT: ;;#ASMSTART 16257; GFX940-NEXT: ; def s[0:1] 16258; GFX940-NEXT: ;;#ASMEND 16259; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 16260; GFX940-NEXT: s_mov_b32 s9, s8 16261; GFX940-NEXT: ;;#ASMSTART 16262; GFX940-NEXT: ; use s[8:9] 16263; GFX940-NEXT: ;;#ASMEND 16264; GFX940-NEXT: s_setpc_b64 s[30:31] 16265 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16266 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 16267 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16268 ret void 16269} 16270 16271define void @s_shuffle_v4i16_v4i16__3_2_2_2() { 16272; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_2_2_2: 16273; GFX900: ; %bb.0: 16274; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16275; GFX900-NEXT: ;;#ASMSTART 16276; GFX900-NEXT: ; def s[4:5] 16277; GFX900-NEXT: ;;#ASMEND 16278; GFX900-NEXT: s_lshr_b32 s4, s5, 16 16279; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16280; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16281; GFX900-NEXT: ;;#ASMSTART 16282; GFX900-NEXT: ; use s[8:9] 16283; GFX900-NEXT: ;;#ASMEND 16284; GFX900-NEXT: s_setpc_b64 s[30:31] 16285; 16286; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_2_2_2: 16287; GFX90A: ; %bb.0: 16288; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16289; GFX90A-NEXT: ;;#ASMSTART 16290; GFX90A-NEXT: ; def s[4:5] 16291; GFX90A-NEXT: ;;#ASMEND 16292; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 16293; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16294; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16295; GFX90A-NEXT: ;;#ASMSTART 16296; GFX90A-NEXT: ; use s[8:9] 16297; GFX90A-NEXT: ;;#ASMEND 16298; GFX90A-NEXT: s_setpc_b64 s[30:31] 16299; 16300; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_2_2_2: 16301; GFX940: ; %bb.0: 16302; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16303; GFX940-NEXT: ;;#ASMSTART 16304; GFX940-NEXT: ; def s[0:1] 16305; GFX940-NEXT: ;;#ASMEND 16306; GFX940-NEXT: s_lshr_b32 s0, s1, 16 16307; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 16308; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16309; GFX940-NEXT: ;;#ASMSTART 16310; GFX940-NEXT: ; use s[8:9] 16311; GFX940-NEXT: ;;#ASMEND 16312; GFX940-NEXT: s_setpc_b64 s[30:31] 16313 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16314 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2> 16315 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16316 ret void 16317} 16318 16319define void @s_shuffle_v4i16_v4i16__4_2_2_2() { 16320; GFX900-LABEL: s_shuffle_v4i16_v4i16__4_2_2_2: 16321; GFX900: ; %bb.0: 16322; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16323; GFX900-NEXT: ;;#ASMSTART 16324; GFX900-NEXT: ; def s[4:5] 16325; GFX900-NEXT: ;;#ASMEND 16326; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16327; GFX900-NEXT: s_lshl_b32 s8, s5, 16 16328; GFX900-NEXT: ;;#ASMSTART 16329; GFX900-NEXT: ; use s[8:9] 16330; GFX900-NEXT: ;;#ASMEND 16331; GFX900-NEXT: s_setpc_b64 s[30:31] 16332; 16333; GFX90A-LABEL: s_shuffle_v4i16_v4i16__4_2_2_2: 16334; GFX90A: ; %bb.0: 16335; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16336; GFX90A-NEXT: ;;#ASMSTART 16337; GFX90A-NEXT: ; def s[4:5] 16338; GFX90A-NEXT: ;;#ASMEND 16339; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16340; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 16341; GFX90A-NEXT: ;;#ASMSTART 16342; GFX90A-NEXT: ; use s[8:9] 16343; GFX90A-NEXT: ;;#ASMEND 16344; GFX90A-NEXT: s_setpc_b64 s[30:31] 16345; 16346; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_2_2_2: 16347; GFX940: ; %bb.0: 16348; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16349; GFX940-NEXT: ;;#ASMSTART 16350; GFX940-NEXT: ; def s[0:1] 16351; GFX940-NEXT: ;;#ASMEND 16352; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16353; GFX940-NEXT: s_lshl_b32 s8, s1, 16 16354; GFX940-NEXT: ;;#ASMSTART 16355; GFX940-NEXT: ; use s[8:9] 16356; GFX940-NEXT: ;;#ASMEND 16357; GFX940-NEXT: s_setpc_b64 s[30:31] 16358 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16359 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2> 16360 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16361 ret void 16362} 16363 16364define void @s_shuffle_v4i16_v4i16__5_2_2_2() { 16365; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_2_2_2: 16366; GFX900: ; %bb.0: 16367; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16368; GFX900-NEXT: ;;#ASMSTART 16369; GFX900-NEXT: ; def s[4:5] 16370; GFX900-NEXT: ;;#ASMEND 16371; GFX900-NEXT: ;;#ASMSTART 16372; GFX900-NEXT: ; def s[6:7] 16373; GFX900-NEXT: ;;#ASMEND 16374; GFX900-NEXT: s_lshr_b32 s4, s6, 16 16375; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16376; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16377; GFX900-NEXT: ;;#ASMSTART 16378; GFX900-NEXT: ; use s[8:9] 16379; GFX900-NEXT: ;;#ASMEND 16380; GFX900-NEXT: s_setpc_b64 s[30:31] 16381; 16382; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_2_2_2: 16383; GFX90A: ; %bb.0: 16384; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16385; GFX90A-NEXT: ;;#ASMSTART 16386; GFX90A-NEXT: ; def s[4:5] 16387; GFX90A-NEXT: ;;#ASMEND 16388; GFX90A-NEXT: ;;#ASMSTART 16389; GFX90A-NEXT: ; def s[6:7] 16390; GFX90A-NEXT: ;;#ASMEND 16391; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 16392; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16393; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16394; GFX90A-NEXT: ;;#ASMSTART 16395; GFX90A-NEXT: ; use s[8:9] 16396; GFX90A-NEXT: ;;#ASMEND 16397; GFX90A-NEXT: s_setpc_b64 s[30:31] 16398; 16399; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_2_2_2: 16400; GFX940: ; %bb.0: 16401; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16402; GFX940-NEXT: ;;#ASMSTART 16403; GFX940-NEXT: ; def s[0:1] 16404; GFX940-NEXT: ;;#ASMEND 16405; GFX940-NEXT: ;;#ASMSTART 16406; GFX940-NEXT: ; def s[2:3] 16407; GFX940-NEXT: ;;#ASMEND 16408; GFX940-NEXT: s_lshr_b32 s0, s2, 16 16409; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 16410; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16411; GFX940-NEXT: ;;#ASMSTART 16412; GFX940-NEXT: ; use s[8:9] 16413; GFX940-NEXT: ;;#ASMEND 16414; GFX940-NEXT: s_setpc_b64 s[30:31] 16415 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16416 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16417 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2> 16418 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16419 ret void 16420} 16421 16422define void @s_shuffle_v4i16_v4i16__6_2_2_2() { 16423; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_2_2_2: 16424; GFX900: ; %bb.0: 16425; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16426; GFX900-NEXT: ;;#ASMSTART 16427; GFX900-NEXT: ; def s[4:5] 16428; GFX900-NEXT: ;;#ASMEND 16429; GFX900-NEXT: ;;#ASMSTART 16430; GFX900-NEXT: ; def s[6:7] 16431; GFX900-NEXT: ;;#ASMEND 16432; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 16433; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16434; GFX900-NEXT: ;;#ASMSTART 16435; GFX900-NEXT: ; use s[8:9] 16436; GFX900-NEXT: ;;#ASMEND 16437; GFX900-NEXT: s_setpc_b64 s[30:31] 16438; 16439; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_2_2_2: 16440; GFX90A: ; %bb.0: 16441; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16442; GFX90A-NEXT: ;;#ASMSTART 16443; GFX90A-NEXT: ; def s[4:5] 16444; GFX90A-NEXT: ;;#ASMEND 16445; GFX90A-NEXT: ;;#ASMSTART 16446; GFX90A-NEXT: ; def s[6:7] 16447; GFX90A-NEXT: ;;#ASMEND 16448; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 16449; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16450; GFX90A-NEXT: ;;#ASMSTART 16451; GFX90A-NEXT: ; use s[8:9] 16452; GFX90A-NEXT: ;;#ASMEND 16453; GFX90A-NEXT: s_setpc_b64 s[30:31] 16454; 16455; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_2_2_2: 16456; GFX940: ; %bb.0: 16457; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16458; GFX940-NEXT: ;;#ASMSTART 16459; GFX940-NEXT: ; def s[0:1] 16460; GFX940-NEXT: ;;#ASMEND 16461; GFX940-NEXT: ;;#ASMSTART 16462; GFX940-NEXT: ; def s[2:3] 16463; GFX940-NEXT: ;;#ASMEND 16464; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 16465; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16466; GFX940-NEXT: ;;#ASMSTART 16467; GFX940-NEXT: ; use s[8:9] 16468; GFX940-NEXT: ;;#ASMEND 16469; GFX940-NEXT: s_setpc_b64 s[30:31] 16470 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16471 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16472 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2> 16473 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16474 ret void 16475} 16476 16477define void @s_shuffle_v4i16_v4i16__7_2_2_2() { 16478; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_2_2: 16479; GFX900: ; %bb.0: 16480; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16481; GFX900-NEXT: ;;#ASMSTART 16482; GFX900-NEXT: ; def s[4:5] 16483; GFX900-NEXT: ;;#ASMEND 16484; GFX900-NEXT: ;;#ASMSTART 16485; GFX900-NEXT: ; def s[6:7] 16486; GFX900-NEXT: ;;#ASMEND 16487; GFX900-NEXT: s_lshr_b32 s4, s7, 16 16488; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16489; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16490; GFX900-NEXT: ;;#ASMSTART 16491; GFX900-NEXT: ; use s[8:9] 16492; GFX900-NEXT: ;;#ASMEND 16493; GFX900-NEXT: s_setpc_b64 s[30:31] 16494; 16495; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_2_2: 16496; GFX90A: ; %bb.0: 16497; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16498; GFX90A-NEXT: ;;#ASMSTART 16499; GFX90A-NEXT: ; def s[4:5] 16500; GFX90A-NEXT: ;;#ASMEND 16501; GFX90A-NEXT: ;;#ASMSTART 16502; GFX90A-NEXT: ; def s[6:7] 16503; GFX90A-NEXT: ;;#ASMEND 16504; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 16505; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16506; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16507; GFX90A-NEXT: ;;#ASMSTART 16508; GFX90A-NEXT: ; use s[8:9] 16509; GFX90A-NEXT: ;;#ASMEND 16510; GFX90A-NEXT: s_setpc_b64 s[30:31] 16511; 16512; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_2_2: 16513; GFX940: ; %bb.0: 16514; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16515; GFX940-NEXT: ;;#ASMSTART 16516; GFX940-NEXT: ; def s[0:1] 16517; GFX940-NEXT: ;;#ASMEND 16518; GFX940-NEXT: ;;#ASMSTART 16519; GFX940-NEXT: ; def s[2:3] 16520; GFX940-NEXT: ;;#ASMEND 16521; GFX940-NEXT: s_lshr_b32 s0, s3, 16 16522; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 16523; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16524; GFX940-NEXT: ;;#ASMSTART 16525; GFX940-NEXT: ; use s[8:9] 16526; GFX940-NEXT: ;;#ASMEND 16527; GFX940-NEXT: s_setpc_b64 s[30:31] 16528 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16529 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16530 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2> 16531 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16532 ret void 16533} 16534 16535define void @s_shuffle_v4i16_v4i16__7_u_2_2() { 16536; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_2_2: 16537; GFX900: ; %bb.0: 16538; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16539; GFX900-NEXT: ;;#ASMSTART 16540; GFX900-NEXT: ; def s[4:5] 16541; GFX900-NEXT: ;;#ASMEND 16542; GFX900-NEXT: ;;#ASMSTART 16543; GFX900-NEXT: ; def s[6:7] 16544; GFX900-NEXT: ;;#ASMEND 16545; GFX900-NEXT: s_lshr_b32 s8, s7, 16 16546; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16547; GFX900-NEXT: ;;#ASMSTART 16548; GFX900-NEXT: ; use s[8:9] 16549; GFX900-NEXT: ;;#ASMEND 16550; GFX900-NEXT: s_setpc_b64 s[30:31] 16551; 16552; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_2_2: 16553; GFX90A: ; %bb.0: 16554; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16555; GFX90A-NEXT: ;;#ASMSTART 16556; GFX90A-NEXT: ; def s[4:5] 16557; GFX90A-NEXT: ;;#ASMEND 16558; GFX90A-NEXT: ;;#ASMSTART 16559; GFX90A-NEXT: ; def s[6:7] 16560; GFX90A-NEXT: ;;#ASMEND 16561; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 16562; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16563; GFX90A-NEXT: ;;#ASMSTART 16564; GFX90A-NEXT: ; use s[8:9] 16565; GFX90A-NEXT: ;;#ASMEND 16566; GFX90A-NEXT: s_setpc_b64 s[30:31] 16567; 16568; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_2_2: 16569; GFX940: ; %bb.0: 16570; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16571; GFX940-NEXT: ;;#ASMSTART 16572; GFX940-NEXT: ; def s[0:1] 16573; GFX940-NEXT: ;;#ASMEND 16574; GFX940-NEXT: ;;#ASMSTART 16575; GFX940-NEXT: ; def s[2:3] 16576; GFX940-NEXT: ;;#ASMEND 16577; GFX940-NEXT: s_lshr_b32 s8, s3, 16 16578; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16579; GFX940-NEXT: ;;#ASMSTART 16580; GFX940-NEXT: ; use s[8:9] 16581; GFX940-NEXT: ;;#ASMEND 16582; GFX940-NEXT: s_setpc_b64 s[30:31] 16583 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16584 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16585 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2> 16586 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16587 ret void 16588} 16589 16590define void @s_shuffle_v4i16_v4i16__7_0_2_2() { 16591; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_2_2: 16592; GFX900: ; %bb.0: 16593; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16594; GFX900-NEXT: ;;#ASMSTART 16595; GFX900-NEXT: ; def s[6:7] 16596; GFX900-NEXT: ;;#ASMEND 16597; GFX900-NEXT: s_lshr_b32 s6, s7, 16 16598; GFX900-NEXT: ;;#ASMSTART 16599; GFX900-NEXT: ; def s[4:5] 16600; GFX900-NEXT: ;;#ASMEND 16601; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4 16602; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16603; GFX900-NEXT: ;;#ASMSTART 16604; GFX900-NEXT: ; use s[8:9] 16605; GFX900-NEXT: ;;#ASMEND 16606; GFX900-NEXT: s_setpc_b64 s[30:31] 16607; 16608; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_2_2: 16609; GFX90A: ; %bb.0: 16610; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16611; GFX90A-NEXT: ;;#ASMSTART 16612; GFX90A-NEXT: ; def s[6:7] 16613; GFX90A-NEXT: ;;#ASMEND 16614; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 16615; GFX90A-NEXT: ;;#ASMSTART 16616; GFX90A-NEXT: ; def s[4:5] 16617; GFX90A-NEXT: ;;#ASMEND 16618; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4 16619; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16620; GFX90A-NEXT: ;;#ASMSTART 16621; GFX90A-NEXT: ; use s[8:9] 16622; GFX90A-NEXT: ;;#ASMEND 16623; GFX90A-NEXT: s_setpc_b64 s[30:31] 16624; 16625; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_2_2: 16626; GFX940: ; %bb.0: 16627; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16628; GFX940-NEXT: ;;#ASMSTART 16629; GFX940-NEXT: ; def s[2:3] 16630; GFX940-NEXT: ;;#ASMEND 16631; GFX940-NEXT: s_lshr_b32 s2, s3, 16 16632; GFX940-NEXT: ;;#ASMSTART 16633; GFX940-NEXT: ; def s[0:1] 16634; GFX940-NEXT: ;;#ASMEND 16635; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 16636; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16637; GFX940-NEXT: ;;#ASMSTART 16638; GFX940-NEXT: ; use s[8:9] 16639; GFX940-NEXT: ;;#ASMEND 16640; GFX940-NEXT: s_setpc_b64 s[30:31] 16641 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16642 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16643 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2> 16644 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16645 ret void 16646} 16647 16648define void @s_shuffle_v4i16_v4i16__7_1_2_2() { 16649; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_2_2: 16650; GFX900: ; %bb.0: 16651; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16652; GFX900-NEXT: ;;#ASMSTART 16653; GFX900-NEXT: ; def s[4:5] 16654; GFX900-NEXT: ;;#ASMEND 16655; GFX900-NEXT: ;;#ASMSTART 16656; GFX900-NEXT: ; def s[6:7] 16657; GFX900-NEXT: ;;#ASMEND 16658; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 16659; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16660; GFX900-NEXT: ;;#ASMSTART 16661; GFX900-NEXT: ; use s[8:9] 16662; GFX900-NEXT: ;;#ASMEND 16663; GFX900-NEXT: s_setpc_b64 s[30:31] 16664; 16665; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_2_2: 16666; GFX90A: ; %bb.0: 16667; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16668; GFX90A-NEXT: ;;#ASMSTART 16669; GFX90A-NEXT: ; def s[4:5] 16670; GFX90A-NEXT: ;;#ASMEND 16671; GFX90A-NEXT: ;;#ASMSTART 16672; GFX90A-NEXT: ; def s[6:7] 16673; GFX90A-NEXT: ;;#ASMEND 16674; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 16675; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16676; GFX90A-NEXT: ;;#ASMSTART 16677; GFX90A-NEXT: ; use s[8:9] 16678; GFX90A-NEXT: ;;#ASMEND 16679; GFX90A-NEXT: s_setpc_b64 s[30:31] 16680; 16681; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_2_2: 16682; GFX940: ; %bb.0: 16683; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16684; GFX940-NEXT: ;;#ASMSTART 16685; GFX940-NEXT: ; def s[0:1] 16686; GFX940-NEXT: ;;#ASMEND 16687; GFX940-NEXT: ;;#ASMSTART 16688; GFX940-NEXT: ; def s[2:3] 16689; GFX940-NEXT: ;;#ASMEND 16690; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 16691; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16692; GFX940-NEXT: ;;#ASMSTART 16693; GFX940-NEXT: ; use s[8:9] 16694; GFX940-NEXT: ;;#ASMEND 16695; GFX940-NEXT: s_setpc_b64 s[30:31] 16696 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16697 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16698 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2> 16699 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16700 ret void 16701} 16702 16703define void @s_shuffle_v4i16_v4i16__7_3_2_2() { 16704; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_2_2: 16705; GFX900: ; %bb.0: 16706; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16707; GFX900-NEXT: ;;#ASMSTART 16708; GFX900-NEXT: ; def s[4:5] 16709; GFX900-NEXT: ;;#ASMEND 16710; GFX900-NEXT: ;;#ASMSTART 16711; GFX900-NEXT: ; def s[6:7] 16712; GFX900-NEXT: ;;#ASMEND 16713; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 16714; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16715; GFX900-NEXT: ;;#ASMSTART 16716; GFX900-NEXT: ; use s[8:9] 16717; GFX900-NEXT: ;;#ASMEND 16718; GFX900-NEXT: s_setpc_b64 s[30:31] 16719; 16720; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_2_2: 16721; GFX90A: ; %bb.0: 16722; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16723; GFX90A-NEXT: ;;#ASMSTART 16724; GFX90A-NEXT: ; def s[4:5] 16725; GFX90A-NEXT: ;;#ASMEND 16726; GFX90A-NEXT: ;;#ASMSTART 16727; GFX90A-NEXT: ; def s[6:7] 16728; GFX90A-NEXT: ;;#ASMEND 16729; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 16730; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16731; GFX90A-NEXT: ;;#ASMSTART 16732; GFX90A-NEXT: ; use s[8:9] 16733; GFX90A-NEXT: ;;#ASMEND 16734; GFX90A-NEXT: s_setpc_b64 s[30:31] 16735; 16736; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_2_2: 16737; GFX940: ; %bb.0: 16738; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16739; GFX940-NEXT: ;;#ASMSTART 16740; GFX940-NEXT: ; def s[0:1] 16741; GFX940-NEXT: ;;#ASMEND 16742; GFX940-NEXT: ;;#ASMSTART 16743; GFX940-NEXT: ; def s[2:3] 16744; GFX940-NEXT: ;;#ASMEND 16745; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 16746; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16747; GFX940-NEXT: ;;#ASMSTART 16748; GFX940-NEXT: ; use s[8:9] 16749; GFX940-NEXT: ;;#ASMEND 16750; GFX940-NEXT: s_setpc_b64 s[30:31] 16751 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16752 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16753 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2> 16754 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16755 ret void 16756} 16757 16758define void @s_shuffle_v4i16_v4i16__7_4_2_2() { 16759; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_2_2: 16760; GFX900: ; %bb.0: 16761; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16762; GFX900-NEXT: ;;#ASMSTART 16763; GFX900-NEXT: ; def s[4:5] 16764; GFX900-NEXT: ;;#ASMEND 16765; GFX900-NEXT: ;;#ASMSTART 16766; GFX900-NEXT: ; def s[6:7] 16767; GFX900-NEXT: ;;#ASMEND 16768; GFX900-NEXT: s_lshr_b32 s4, s7, 16 16769; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 16770; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16771; GFX900-NEXT: ;;#ASMSTART 16772; GFX900-NEXT: ; use s[8:9] 16773; GFX900-NEXT: ;;#ASMEND 16774; GFX900-NEXT: s_setpc_b64 s[30:31] 16775; 16776; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_2_2: 16777; GFX90A: ; %bb.0: 16778; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16779; GFX90A-NEXT: ;;#ASMSTART 16780; GFX90A-NEXT: ; def s[4:5] 16781; GFX90A-NEXT: ;;#ASMEND 16782; GFX90A-NEXT: ;;#ASMSTART 16783; GFX90A-NEXT: ; def s[6:7] 16784; GFX90A-NEXT: ;;#ASMEND 16785; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 16786; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 16787; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16788; GFX90A-NEXT: ;;#ASMSTART 16789; GFX90A-NEXT: ; use s[8:9] 16790; GFX90A-NEXT: ;;#ASMEND 16791; GFX90A-NEXT: s_setpc_b64 s[30:31] 16792; 16793; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_2_2: 16794; GFX940: ; %bb.0: 16795; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16796; GFX940-NEXT: ;;#ASMSTART 16797; GFX940-NEXT: ; def s[0:1] 16798; GFX940-NEXT: ;;#ASMEND 16799; GFX940-NEXT: ;;#ASMSTART 16800; GFX940-NEXT: ; def s[2:3] 16801; GFX940-NEXT: ;;#ASMEND 16802; GFX940-NEXT: s_lshr_b32 s0, s3, 16 16803; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 16804; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16805; GFX940-NEXT: ;;#ASMSTART 16806; GFX940-NEXT: ; use s[8:9] 16807; GFX940-NEXT: ;;#ASMEND 16808; GFX940-NEXT: s_setpc_b64 s[30:31] 16809 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16810 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16811 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2> 16812 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16813 ret void 16814} 16815 16816define void @s_shuffle_v4i16_v4i16__7_5_2_2() { 16817; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_2_2: 16818; GFX900: ; %bb.0: 16819; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16820; GFX900-NEXT: ;;#ASMSTART 16821; GFX900-NEXT: ; def s[4:5] 16822; GFX900-NEXT: ;;#ASMEND 16823; GFX900-NEXT: ;;#ASMSTART 16824; GFX900-NEXT: ; def s[6:7] 16825; GFX900-NEXT: ;;#ASMEND 16826; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s6 16827; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16828; GFX900-NEXT: ;;#ASMSTART 16829; GFX900-NEXT: ; use s[8:9] 16830; GFX900-NEXT: ;;#ASMEND 16831; GFX900-NEXT: s_setpc_b64 s[30:31] 16832; 16833; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_2_2: 16834; GFX90A: ; %bb.0: 16835; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16836; GFX90A-NEXT: ;;#ASMSTART 16837; GFX90A-NEXT: ; def s[4:5] 16838; GFX90A-NEXT: ;;#ASMEND 16839; GFX90A-NEXT: ;;#ASMSTART 16840; GFX90A-NEXT: ; def s[6:7] 16841; GFX90A-NEXT: ;;#ASMEND 16842; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s6 16843; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16844; GFX90A-NEXT: ;;#ASMSTART 16845; GFX90A-NEXT: ; use s[8:9] 16846; GFX90A-NEXT: ;;#ASMEND 16847; GFX90A-NEXT: s_setpc_b64 s[30:31] 16848; 16849; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_2_2: 16850; GFX940: ; %bb.0: 16851; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16852; GFX940-NEXT: ;;#ASMSTART 16853; GFX940-NEXT: ; def s[0:1] 16854; GFX940-NEXT: ;;#ASMEND 16855; GFX940-NEXT: ;;#ASMSTART 16856; GFX940-NEXT: ; def s[2:3] 16857; GFX940-NEXT: ;;#ASMEND 16858; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 16859; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16860; GFX940-NEXT: ;;#ASMSTART 16861; GFX940-NEXT: ; use s[8:9] 16862; GFX940-NEXT: ;;#ASMEND 16863; GFX940-NEXT: s_setpc_b64 s[30:31] 16864 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16865 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16866 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2> 16867 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16868 ret void 16869} 16870 16871define void @s_shuffle_v4i16_v4i16__7_6_2_2() { 16872; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_2_2: 16873; GFX900: ; %bb.0: 16874; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16875; GFX900-NEXT: ;;#ASMSTART 16876; GFX900-NEXT: ; def s[4:5] 16877; GFX900-NEXT: ;;#ASMEND 16878; GFX900-NEXT: ;;#ASMSTART 16879; GFX900-NEXT: ; def s[6:7] 16880; GFX900-NEXT: ;;#ASMEND 16881; GFX900-NEXT: s_lshr_b32 s4, s7, 16 16882; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 16883; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16884; GFX900-NEXT: ;;#ASMSTART 16885; GFX900-NEXT: ; use s[8:9] 16886; GFX900-NEXT: ;;#ASMEND 16887; GFX900-NEXT: s_setpc_b64 s[30:31] 16888; 16889; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_2_2: 16890; GFX90A: ; %bb.0: 16891; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16892; GFX90A-NEXT: ;;#ASMSTART 16893; GFX90A-NEXT: ; def s[4:5] 16894; GFX90A-NEXT: ;;#ASMEND 16895; GFX90A-NEXT: ;;#ASMSTART 16896; GFX90A-NEXT: ; def s[6:7] 16897; GFX90A-NEXT: ;;#ASMEND 16898; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 16899; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 16900; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16901; GFX90A-NEXT: ;;#ASMSTART 16902; GFX90A-NEXT: ; use s[8:9] 16903; GFX90A-NEXT: ;;#ASMEND 16904; GFX90A-NEXT: s_setpc_b64 s[30:31] 16905; 16906; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_2_2: 16907; GFX940: ; %bb.0: 16908; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16909; GFX940-NEXT: ;;#ASMSTART 16910; GFX940-NEXT: ; def s[0:1] 16911; GFX940-NEXT: ;;#ASMEND 16912; GFX940-NEXT: ;;#ASMSTART 16913; GFX940-NEXT: ; def s[2:3] 16914; GFX940-NEXT: ;;#ASMEND 16915; GFX940-NEXT: s_lshr_b32 s0, s3, 16 16916; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 16917; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16918; GFX940-NEXT: ;;#ASMSTART 16919; GFX940-NEXT: ; use s[8:9] 16920; GFX940-NEXT: ;;#ASMEND 16921; GFX940-NEXT: s_setpc_b64 s[30:31] 16922 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16923 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16924 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2> 16925 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16926 ret void 16927} 16928 16929define void @s_shuffle_v4i16_v4i16__7_7_2_2() { 16930; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_2: 16931; GFX900: ; %bb.0: 16932; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16933; GFX900-NEXT: ;;#ASMSTART 16934; GFX900-NEXT: ; def s[4:5] 16935; GFX900-NEXT: ;;#ASMEND 16936; GFX900-NEXT: ;;#ASMSTART 16937; GFX900-NEXT: ; def s[6:7] 16938; GFX900-NEXT: ;;#ASMEND 16939; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 16940; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16941; GFX900-NEXT: ;;#ASMSTART 16942; GFX900-NEXT: ; use s[8:9] 16943; GFX900-NEXT: ;;#ASMEND 16944; GFX900-NEXT: s_setpc_b64 s[30:31] 16945; 16946; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_2: 16947; GFX90A: ; %bb.0: 16948; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16949; GFX90A-NEXT: ;;#ASMSTART 16950; GFX90A-NEXT: ; def s[4:5] 16951; GFX90A-NEXT: ;;#ASMEND 16952; GFX90A-NEXT: ;;#ASMSTART 16953; GFX90A-NEXT: ; def s[6:7] 16954; GFX90A-NEXT: ;;#ASMEND 16955; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 16956; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 16957; GFX90A-NEXT: ;;#ASMSTART 16958; GFX90A-NEXT: ; use s[8:9] 16959; GFX90A-NEXT: ;;#ASMEND 16960; GFX90A-NEXT: s_setpc_b64 s[30:31] 16961; 16962; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_2: 16963; GFX940: ; %bb.0: 16964; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16965; GFX940-NEXT: ;;#ASMSTART 16966; GFX940-NEXT: ; def s[0:1] 16967; GFX940-NEXT: ;;#ASMEND 16968; GFX940-NEXT: ;;#ASMSTART 16969; GFX940-NEXT: ; def s[2:3] 16970; GFX940-NEXT: ;;#ASMEND 16971; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 16972; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 16973; GFX940-NEXT: ;;#ASMSTART 16974; GFX940-NEXT: ; use s[8:9] 16975; GFX940-NEXT: ;;#ASMEND 16976; GFX940-NEXT: s_setpc_b64 s[30:31] 16977 %vec0 = call <4 x i16> asm "; def $0", "=s"() 16978 %vec1 = call <4 x i16> asm "; def $0", "=s"() 16979 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2> 16980 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 16981 ret void 16982} 16983 16984define void @s_shuffle_v4i16_v4i16__7_7_u_2() { 16985; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_2: 16986; GFX900: ; %bb.0: 16987; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16988; GFX900-NEXT: ;;#ASMSTART 16989; GFX900-NEXT: ; def s[4:5] 16990; GFX900-NEXT: ;;#ASMEND 16991; GFX900-NEXT: ;;#ASMSTART 16992; GFX900-NEXT: ; def s[6:7] 16993; GFX900-NEXT: ;;#ASMEND 16994; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 16995; GFX900-NEXT: s_lshl_b32 s9, s5, 16 16996; GFX900-NEXT: ;;#ASMSTART 16997; GFX900-NEXT: ; use s[8:9] 16998; GFX900-NEXT: ;;#ASMEND 16999; GFX900-NEXT: s_setpc_b64 s[30:31] 17000; 17001; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_2: 17002; GFX90A: ; %bb.0: 17003; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17004; GFX90A-NEXT: ;;#ASMSTART 17005; GFX90A-NEXT: ; def s[4:5] 17006; GFX90A-NEXT: ;;#ASMEND 17007; GFX90A-NEXT: ;;#ASMSTART 17008; GFX90A-NEXT: ; def s[6:7] 17009; GFX90A-NEXT: ;;#ASMEND 17010; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17011; GFX90A-NEXT: s_lshl_b32 s9, s5, 16 17012; GFX90A-NEXT: ;;#ASMSTART 17013; GFX90A-NEXT: ; use s[8:9] 17014; GFX90A-NEXT: ;;#ASMEND 17015; GFX90A-NEXT: s_setpc_b64 s[30:31] 17016; 17017; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_2: 17018; GFX940: ; %bb.0: 17019; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17020; GFX940-NEXT: ;;#ASMSTART 17021; GFX940-NEXT: ; def s[0:1] 17022; GFX940-NEXT: ;;#ASMEND 17023; GFX940-NEXT: ;;#ASMSTART 17024; GFX940-NEXT: ; def s[2:3] 17025; GFX940-NEXT: ;;#ASMEND 17026; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 17027; GFX940-NEXT: s_lshl_b32 s9, s1, 16 17028; GFX940-NEXT: ;;#ASMSTART 17029; GFX940-NEXT: ; use s[8:9] 17030; GFX940-NEXT: ;;#ASMEND 17031; GFX940-NEXT: s_setpc_b64 s[30:31] 17032 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17033 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17034 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2> 17035 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17036 ret void 17037} 17038 17039define void @s_shuffle_v4i16_v4i16__7_7_0_2() { 17040; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_2: 17041; GFX900: ; %bb.0: 17042; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17043; GFX900-NEXT: ;;#ASMSTART 17044; GFX900-NEXT: ; def s[4:5] 17045; GFX900-NEXT: ;;#ASMEND 17046; GFX900-NEXT: ;;#ASMSTART 17047; GFX900-NEXT: ; def s[6:7] 17048; GFX900-NEXT: ;;#ASMEND 17049; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 17050; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17051; GFX900-NEXT: ;;#ASMSTART 17052; GFX900-NEXT: ; use s[8:9] 17053; GFX900-NEXT: ;;#ASMEND 17054; GFX900-NEXT: s_setpc_b64 s[30:31] 17055; 17056; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_2: 17057; GFX90A: ; %bb.0: 17058; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17059; GFX90A-NEXT: ;;#ASMSTART 17060; GFX90A-NEXT: ; def s[4:5] 17061; GFX90A-NEXT: ;;#ASMEND 17062; GFX90A-NEXT: ;;#ASMSTART 17063; GFX90A-NEXT: ; def s[6:7] 17064; GFX90A-NEXT: ;;#ASMEND 17065; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 17066; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17067; GFX90A-NEXT: ;;#ASMSTART 17068; GFX90A-NEXT: ; use s[8:9] 17069; GFX90A-NEXT: ;;#ASMEND 17070; GFX90A-NEXT: s_setpc_b64 s[30:31] 17071; 17072; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_2: 17073; GFX940: ; %bb.0: 17074; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17075; GFX940-NEXT: ;;#ASMSTART 17076; GFX940-NEXT: ; def s[0:1] 17077; GFX940-NEXT: ;;#ASMEND 17078; GFX940-NEXT: ;;#ASMSTART 17079; GFX940-NEXT: ; def s[2:3] 17080; GFX940-NEXT: ;;#ASMEND 17081; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 17082; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 17083; GFX940-NEXT: ;;#ASMSTART 17084; GFX940-NEXT: ; use s[8:9] 17085; GFX940-NEXT: ;;#ASMEND 17086; GFX940-NEXT: s_setpc_b64 s[30:31] 17087 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17088 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17089 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2> 17090 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17091 ret void 17092} 17093 17094define void @s_shuffle_v4i16_v4i16__7_7_1_2() { 17095; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_2: 17096; GFX900: ; %bb.0: 17097; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17098; GFX900-NEXT: ;;#ASMSTART 17099; GFX900-NEXT: ; def s[4:5] 17100; GFX900-NEXT: ;;#ASMEND 17101; GFX900-NEXT: s_lshr_b32 s4, s4, 16 17102; GFX900-NEXT: ;;#ASMSTART 17103; GFX900-NEXT: ; def s[6:7] 17104; GFX900-NEXT: ;;#ASMEND 17105; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 17106; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17107; GFX900-NEXT: ;;#ASMSTART 17108; GFX900-NEXT: ; use s[8:9] 17109; GFX900-NEXT: ;;#ASMEND 17110; GFX900-NEXT: s_setpc_b64 s[30:31] 17111; 17112; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_2: 17113; GFX90A: ; %bb.0: 17114; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17115; GFX90A-NEXT: ;;#ASMSTART 17116; GFX90A-NEXT: ; def s[4:5] 17117; GFX90A-NEXT: ;;#ASMEND 17118; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 17119; GFX90A-NEXT: ;;#ASMSTART 17120; GFX90A-NEXT: ; def s[6:7] 17121; GFX90A-NEXT: ;;#ASMEND 17122; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 17123; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17124; GFX90A-NEXT: ;;#ASMSTART 17125; GFX90A-NEXT: ; use s[8:9] 17126; GFX90A-NEXT: ;;#ASMEND 17127; GFX90A-NEXT: s_setpc_b64 s[30:31] 17128; 17129; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_2: 17130; GFX940: ; %bb.0: 17131; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17132; GFX940-NEXT: ;;#ASMSTART 17133; GFX940-NEXT: ; def s[0:1] 17134; GFX940-NEXT: ;;#ASMEND 17135; GFX940-NEXT: s_lshr_b32 s0, s0, 16 17136; GFX940-NEXT: ;;#ASMSTART 17137; GFX940-NEXT: ; def s[2:3] 17138; GFX940-NEXT: ;;#ASMEND 17139; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 17140; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 17141; GFX940-NEXT: ;;#ASMSTART 17142; GFX940-NEXT: ; use s[8:9] 17143; GFX940-NEXT: ;;#ASMEND 17144; GFX940-NEXT: s_setpc_b64 s[30:31] 17145 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17146 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17147 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2> 17148 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17149 ret void 17150} 17151 17152define void @s_shuffle_v4i16_v4i16__7_7_3_2() { 17153; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_2: 17154; GFX900: ; %bb.0: 17155; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17156; GFX900-NEXT: ;;#ASMSTART 17157; GFX900-NEXT: ; def s[4:5] 17158; GFX900-NEXT: ;;#ASMEND 17159; GFX900-NEXT: s_lshr_b32 s4, s5, 16 17160; GFX900-NEXT: ;;#ASMSTART 17161; GFX900-NEXT: ; def s[6:7] 17162; GFX900-NEXT: ;;#ASMEND 17163; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 17164; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17165; GFX900-NEXT: ;;#ASMSTART 17166; GFX900-NEXT: ; use s[8:9] 17167; GFX900-NEXT: ;;#ASMEND 17168; GFX900-NEXT: s_setpc_b64 s[30:31] 17169; 17170; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_2: 17171; GFX90A: ; %bb.0: 17172; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17173; GFX90A-NEXT: ;;#ASMSTART 17174; GFX90A-NEXT: ; def s[4:5] 17175; GFX90A-NEXT: ;;#ASMEND 17176; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 17177; GFX90A-NEXT: ;;#ASMSTART 17178; GFX90A-NEXT: ; def s[6:7] 17179; GFX90A-NEXT: ;;#ASMEND 17180; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 17181; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17182; GFX90A-NEXT: ;;#ASMSTART 17183; GFX90A-NEXT: ; use s[8:9] 17184; GFX90A-NEXT: ;;#ASMEND 17185; GFX90A-NEXT: s_setpc_b64 s[30:31] 17186; 17187; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_2: 17188; GFX940: ; %bb.0: 17189; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17190; GFX940-NEXT: ;;#ASMSTART 17191; GFX940-NEXT: ; def s[0:1] 17192; GFX940-NEXT: ;;#ASMEND 17193; GFX940-NEXT: s_lshr_b32 s0, s1, 16 17194; GFX940-NEXT: ;;#ASMSTART 17195; GFX940-NEXT: ; def s[2:3] 17196; GFX940-NEXT: ;;#ASMEND 17197; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 17198; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 17199; GFX940-NEXT: ;;#ASMSTART 17200; GFX940-NEXT: ; use s[8:9] 17201; GFX940-NEXT: ;;#ASMEND 17202; GFX940-NEXT: s_setpc_b64 s[30:31] 17203 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17204 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17205 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2> 17206 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17207 ret void 17208} 17209 17210define void @s_shuffle_v4i16_v4i16__7_7_4_2() { 17211; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_2: 17212; GFX900: ; %bb.0: 17213; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17214; GFX900-NEXT: ;;#ASMSTART 17215; GFX900-NEXT: ; def s[4:5] 17216; GFX900-NEXT: ;;#ASMEND 17217; GFX900-NEXT: ;;#ASMSTART 17218; GFX900-NEXT: ; def s[6:7] 17219; GFX900-NEXT: ;;#ASMEND 17220; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s5 17221; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17222; GFX900-NEXT: ;;#ASMSTART 17223; GFX900-NEXT: ; use s[8:9] 17224; GFX900-NEXT: ;;#ASMEND 17225; GFX900-NEXT: s_setpc_b64 s[30:31] 17226; 17227; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_2: 17228; GFX90A: ; %bb.0: 17229; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17230; GFX90A-NEXT: ;;#ASMSTART 17231; GFX90A-NEXT: ; def s[4:5] 17232; GFX90A-NEXT: ;;#ASMEND 17233; GFX90A-NEXT: ;;#ASMSTART 17234; GFX90A-NEXT: ; def s[6:7] 17235; GFX90A-NEXT: ;;#ASMEND 17236; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s5 17237; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17238; GFX90A-NEXT: ;;#ASMSTART 17239; GFX90A-NEXT: ; use s[8:9] 17240; GFX90A-NEXT: ;;#ASMEND 17241; GFX90A-NEXT: s_setpc_b64 s[30:31] 17242; 17243; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_2: 17244; GFX940: ; %bb.0: 17245; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17246; GFX940-NEXT: ;;#ASMSTART 17247; GFX940-NEXT: ; def s[0:1] 17248; GFX940-NEXT: ;;#ASMEND 17249; GFX940-NEXT: ;;#ASMSTART 17250; GFX940-NEXT: ; def s[2:3] 17251; GFX940-NEXT: ;;#ASMEND 17252; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s1 17253; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 17254; GFX940-NEXT: ;;#ASMSTART 17255; GFX940-NEXT: ; use s[8:9] 17256; GFX940-NEXT: ;;#ASMEND 17257; GFX940-NEXT: s_setpc_b64 s[30:31] 17258 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17259 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17260 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2> 17261 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17262 ret void 17263} 17264 17265define void @s_shuffle_v4i16_v4i16__7_7_5_2() { 17266; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_2: 17267; GFX900: ; %bb.0: 17268; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17269; GFX900-NEXT: ;;#ASMSTART 17270; GFX900-NEXT: ; def s[4:5] 17271; GFX900-NEXT: ;;#ASMEND 17272; GFX900-NEXT: ;;#ASMSTART 17273; GFX900-NEXT: ; def s[6:7] 17274; GFX900-NEXT: ;;#ASMEND 17275; GFX900-NEXT: s_lshr_b32 s4, s6, 16 17276; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 17277; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17278; GFX900-NEXT: ;;#ASMSTART 17279; GFX900-NEXT: ; use s[8:9] 17280; GFX900-NEXT: ;;#ASMEND 17281; GFX900-NEXT: s_setpc_b64 s[30:31] 17282; 17283; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_2: 17284; GFX90A: ; %bb.0: 17285; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17286; GFX90A-NEXT: ;;#ASMSTART 17287; GFX90A-NEXT: ; def s[4:5] 17288; GFX90A-NEXT: ;;#ASMEND 17289; GFX90A-NEXT: ;;#ASMSTART 17290; GFX90A-NEXT: ; def s[6:7] 17291; GFX90A-NEXT: ;;#ASMEND 17292; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 17293; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 17294; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17295; GFX90A-NEXT: ;;#ASMSTART 17296; GFX90A-NEXT: ; use s[8:9] 17297; GFX90A-NEXT: ;;#ASMEND 17298; GFX90A-NEXT: s_setpc_b64 s[30:31] 17299; 17300; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_2: 17301; GFX940: ; %bb.0: 17302; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17303; GFX940-NEXT: ;;#ASMSTART 17304; GFX940-NEXT: ; def s[0:1] 17305; GFX940-NEXT: ;;#ASMEND 17306; GFX940-NEXT: ;;#ASMSTART 17307; GFX940-NEXT: ; def s[2:3] 17308; GFX940-NEXT: ;;#ASMEND 17309; GFX940-NEXT: s_lshr_b32 s0, s2, 16 17310; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 17311; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 17312; GFX940-NEXT: ;;#ASMSTART 17313; GFX940-NEXT: ; use s[8:9] 17314; GFX940-NEXT: ;;#ASMEND 17315; GFX940-NEXT: s_setpc_b64 s[30:31] 17316 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17317 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17318 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2> 17319 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17320 ret void 17321} 17322 17323define void @s_shuffle_v4i16_v4i16__7_7_6_2() { 17324; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_2: 17325; GFX900: ; %bb.0: 17326; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17327; GFX900-NEXT: ;;#ASMSTART 17328; GFX900-NEXT: ; def s[4:5] 17329; GFX900-NEXT: ;;#ASMEND 17330; GFX900-NEXT: ;;#ASMSTART 17331; GFX900-NEXT: ; def s[6:7] 17332; GFX900-NEXT: ;;#ASMEND 17333; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s5 17334; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17335; GFX900-NEXT: ;;#ASMSTART 17336; GFX900-NEXT: ; use s[8:9] 17337; GFX900-NEXT: ;;#ASMEND 17338; GFX900-NEXT: s_setpc_b64 s[30:31] 17339; 17340; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_2: 17341; GFX90A: ; %bb.0: 17342; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17343; GFX90A-NEXT: ;;#ASMSTART 17344; GFX90A-NEXT: ; def s[4:5] 17345; GFX90A-NEXT: ;;#ASMEND 17346; GFX90A-NEXT: ;;#ASMSTART 17347; GFX90A-NEXT: ; def s[6:7] 17348; GFX90A-NEXT: ;;#ASMEND 17349; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s5 17350; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 17351; GFX90A-NEXT: ;;#ASMSTART 17352; GFX90A-NEXT: ; use s[8:9] 17353; GFX90A-NEXT: ;;#ASMEND 17354; GFX90A-NEXT: s_setpc_b64 s[30:31] 17355; 17356; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_2: 17357; GFX940: ; %bb.0: 17358; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17359; GFX940-NEXT: ;;#ASMSTART 17360; GFX940-NEXT: ; def s[0:1] 17361; GFX940-NEXT: ;;#ASMEND 17362; GFX940-NEXT: ;;#ASMSTART 17363; GFX940-NEXT: ; def s[2:3] 17364; GFX940-NEXT: ;;#ASMEND 17365; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s1 17366; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 17367; GFX940-NEXT: ;;#ASMSTART 17368; GFX940-NEXT: ; use s[8:9] 17369; GFX940-NEXT: ;;#ASMEND 17370; GFX940-NEXT: s_setpc_b64 s[30:31] 17371 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17372 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17373 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2> 17374 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17375 ret void 17376} 17377 17378define void @s_shuffle_v4i16_v4i16__u_3_3_3() { 17379; GFX900-LABEL: s_shuffle_v4i16_v4i16__u_3_3_3: 17380; GFX900: ; %bb.0: 17381; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17382; GFX900-NEXT: ;;#ASMSTART 17383; GFX900-NEXT: ; def s[4:5] 17384; GFX900-NEXT: ;;#ASMEND 17385; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17386; GFX900-NEXT: s_mov_b32 s8, s5 17387; GFX900-NEXT: ;;#ASMSTART 17388; GFX900-NEXT: ; use s[8:9] 17389; GFX900-NEXT: ;;#ASMEND 17390; GFX900-NEXT: s_setpc_b64 s[30:31] 17391; 17392; GFX90A-LABEL: s_shuffle_v4i16_v4i16__u_3_3_3: 17393; GFX90A: ; %bb.0: 17394; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17395; GFX90A-NEXT: ;;#ASMSTART 17396; GFX90A-NEXT: ; def s[4:5] 17397; GFX90A-NEXT: ;;#ASMEND 17398; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17399; GFX90A-NEXT: s_mov_b32 s8, s5 17400; GFX90A-NEXT: ;;#ASMSTART 17401; GFX90A-NEXT: ; use s[8:9] 17402; GFX90A-NEXT: ;;#ASMEND 17403; GFX90A-NEXT: s_setpc_b64 s[30:31] 17404; 17405; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_3_3_3: 17406; GFX940: ; %bb.0: 17407; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17408; GFX940-NEXT: ;;#ASMSTART 17409; GFX940-NEXT: ; def s[0:1] 17410; GFX940-NEXT: ;;#ASMEND 17411; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 17412; GFX940-NEXT: s_mov_b32 s8, s1 17413; GFX940-NEXT: ;;#ASMSTART 17414; GFX940-NEXT: ; use s[8:9] 17415; GFX940-NEXT: ;;#ASMEND 17416; GFX940-NEXT: s_setpc_b64 s[30:31] 17417 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17418 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3> 17419 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17420 ret void 17421} 17422 17423define void @s_shuffle_v4i16_v4i16__0_3_3_3() { 17424; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_3_3_3: 17425; GFX900: ; %bb.0: 17426; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17427; GFX900-NEXT: ;;#ASMSTART 17428; GFX900-NEXT: ; def s[4:5] 17429; GFX900-NEXT: ;;#ASMEND 17430; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s5 17431; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17432; GFX900-NEXT: ;;#ASMSTART 17433; GFX900-NEXT: ; use s[8:9] 17434; GFX900-NEXT: ;;#ASMEND 17435; GFX900-NEXT: s_setpc_b64 s[30:31] 17436; 17437; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_3_3_3: 17438; GFX90A: ; %bb.0: 17439; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17440; GFX90A-NEXT: ;;#ASMSTART 17441; GFX90A-NEXT: ; def s[4:5] 17442; GFX90A-NEXT: ;;#ASMEND 17443; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s5 17444; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17445; GFX90A-NEXT: ;;#ASMSTART 17446; GFX90A-NEXT: ; use s[8:9] 17447; GFX90A-NEXT: ;;#ASMEND 17448; GFX90A-NEXT: s_setpc_b64 s[30:31] 17449; 17450; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_3_3_3: 17451; GFX940: ; %bb.0: 17452; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17453; GFX940-NEXT: ;;#ASMSTART 17454; GFX940-NEXT: ; def s[0:1] 17455; GFX940-NEXT: ;;#ASMEND 17456; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 17457; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 17458; GFX940-NEXT: ;;#ASMSTART 17459; GFX940-NEXT: ; use s[8:9] 17460; GFX940-NEXT: ;;#ASMEND 17461; GFX940-NEXT: s_setpc_b64 s[30:31] 17462 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17463 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3> 17464 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17465 ret void 17466} 17467 17468define void @s_shuffle_v4i16_v4i16__1_3_3_3() { 17469; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_3_3_3: 17470; GFX900: ; %bb.0: 17471; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17472; GFX900-NEXT: ;;#ASMSTART 17473; GFX900-NEXT: ; def s[4:5] 17474; GFX900-NEXT: ;;#ASMEND 17475; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s5 17476; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17477; GFX900-NEXT: ;;#ASMSTART 17478; GFX900-NEXT: ; use s[8:9] 17479; GFX900-NEXT: ;;#ASMEND 17480; GFX900-NEXT: s_setpc_b64 s[30:31] 17481; 17482; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_3_3_3: 17483; GFX90A: ; %bb.0: 17484; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17485; GFX90A-NEXT: ;;#ASMSTART 17486; GFX90A-NEXT: ; def s[4:5] 17487; GFX90A-NEXT: ;;#ASMEND 17488; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s5 17489; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17490; GFX90A-NEXT: ;;#ASMSTART 17491; GFX90A-NEXT: ; use s[8:9] 17492; GFX90A-NEXT: ;;#ASMEND 17493; GFX90A-NEXT: s_setpc_b64 s[30:31] 17494; 17495; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_3_3_3: 17496; GFX940: ; %bb.0: 17497; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17498; GFX940-NEXT: ;;#ASMSTART 17499; GFX940-NEXT: ; def s[0:1] 17500; GFX940-NEXT: ;;#ASMEND 17501; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 17502; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 17503; GFX940-NEXT: ;;#ASMSTART 17504; GFX940-NEXT: ; use s[8:9] 17505; GFX940-NEXT: ;;#ASMEND 17506; GFX940-NEXT: s_setpc_b64 s[30:31] 17507 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17508 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3> 17509 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17510 ret void 17511} 17512 17513define void @s_shuffle_v4i16_v4i16__2_3_3_3() { 17514; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_3_3_3: 17515; GFX900: ; %bb.0: 17516; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17517; GFX900-NEXT: ;;#ASMSTART 17518; GFX900-NEXT: ; def s[4:5] 17519; GFX900-NEXT: ;;#ASMEND 17520; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17521; GFX900-NEXT: s_mov_b32 s8, s5 17522; GFX900-NEXT: ;;#ASMSTART 17523; GFX900-NEXT: ; use s[8:9] 17524; GFX900-NEXT: ;;#ASMEND 17525; GFX900-NEXT: s_setpc_b64 s[30:31] 17526; 17527; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_3_3_3: 17528; GFX90A: ; %bb.0: 17529; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17530; GFX90A-NEXT: ;;#ASMSTART 17531; GFX90A-NEXT: ; def s[4:5] 17532; GFX90A-NEXT: ;;#ASMEND 17533; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17534; GFX90A-NEXT: s_mov_b32 s8, s5 17535; GFX90A-NEXT: ;;#ASMSTART 17536; GFX90A-NEXT: ; use s[8:9] 17537; GFX90A-NEXT: ;;#ASMEND 17538; GFX90A-NEXT: s_setpc_b64 s[30:31] 17539; 17540; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_3_3_3: 17541; GFX940: ; %bb.0: 17542; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17543; GFX940-NEXT: ;;#ASMSTART 17544; GFX940-NEXT: ; def s[0:1] 17545; GFX940-NEXT: ;;#ASMEND 17546; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 17547; GFX940-NEXT: s_mov_b32 s8, s1 17548; GFX940-NEXT: ;;#ASMSTART 17549; GFX940-NEXT: ; use s[8:9] 17550; GFX940-NEXT: ;;#ASMEND 17551; GFX940-NEXT: s_setpc_b64 s[30:31] 17552 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17553 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3> 17554 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17555 ret void 17556} 17557 17558define void @s_shuffle_v4i16_v4i16__3_3_3_3() { 17559; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_3_3_3: 17560; GFX900: ; %bb.0: 17561; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17562; GFX900-NEXT: ;;#ASMSTART 17563; GFX900-NEXT: ; def s[4:5] 17564; GFX900-NEXT: ;;#ASMEND 17565; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 17566; GFX900-NEXT: s_mov_b32 s9, s8 17567; GFX900-NEXT: ;;#ASMSTART 17568; GFX900-NEXT: ; use s[8:9] 17569; GFX900-NEXT: ;;#ASMEND 17570; GFX900-NEXT: s_setpc_b64 s[30:31] 17571; 17572; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_3_3_3: 17573; GFX90A: ; %bb.0: 17574; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17575; GFX90A-NEXT: ;;#ASMSTART 17576; GFX90A-NEXT: ; def s[4:5] 17577; GFX90A-NEXT: ;;#ASMEND 17578; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 17579; GFX90A-NEXT: s_mov_b32 s9, s8 17580; GFX90A-NEXT: ;;#ASMSTART 17581; GFX90A-NEXT: ; use s[8:9] 17582; GFX90A-NEXT: ;;#ASMEND 17583; GFX90A-NEXT: s_setpc_b64 s[30:31] 17584; 17585; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_3_3_3: 17586; GFX940: ; %bb.0: 17587; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17588; GFX940-NEXT: ;;#ASMSTART 17589; GFX940-NEXT: ; def s[0:1] 17590; GFX940-NEXT: ;;#ASMEND 17591; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 17592; GFX940-NEXT: s_mov_b32 s9, s8 17593; GFX940-NEXT: ;;#ASMSTART 17594; GFX940-NEXT: ; use s[8:9] 17595; GFX940-NEXT: ;;#ASMEND 17596; GFX940-NEXT: s_setpc_b64 s[30:31] 17597 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17598 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 17599 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17600 ret void 17601} 17602 17603define void @s_shuffle_v4i16_v4i16__4_3_3_3() { 17604; GFX900-LABEL: s_shuffle_v4i16_v4i16__4_3_3_3: 17605; GFX900: ; %bb.0: 17606; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17607; GFX900-NEXT: ;;#ASMSTART 17608; GFX900-NEXT: ; def s[4:5] 17609; GFX900-NEXT: ;;#ASMEND 17610; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17611; GFX900-NEXT: s_mov_b32 s8, s5 17612; GFX900-NEXT: ;;#ASMSTART 17613; GFX900-NEXT: ; use s[8:9] 17614; GFX900-NEXT: ;;#ASMEND 17615; GFX900-NEXT: s_setpc_b64 s[30:31] 17616; 17617; GFX90A-LABEL: s_shuffle_v4i16_v4i16__4_3_3_3: 17618; GFX90A: ; %bb.0: 17619; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17620; GFX90A-NEXT: ;;#ASMSTART 17621; GFX90A-NEXT: ; def s[4:5] 17622; GFX90A-NEXT: ;;#ASMEND 17623; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17624; GFX90A-NEXT: s_mov_b32 s8, s5 17625; GFX90A-NEXT: ;;#ASMSTART 17626; GFX90A-NEXT: ; use s[8:9] 17627; GFX90A-NEXT: ;;#ASMEND 17628; GFX90A-NEXT: s_setpc_b64 s[30:31] 17629; 17630; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_3_3_3: 17631; GFX940: ; %bb.0: 17632; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17633; GFX940-NEXT: ;;#ASMSTART 17634; GFX940-NEXT: ; def s[0:1] 17635; GFX940-NEXT: ;;#ASMEND 17636; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 17637; GFX940-NEXT: s_mov_b32 s8, s1 17638; GFX940-NEXT: ;;#ASMSTART 17639; GFX940-NEXT: ; use s[8:9] 17640; GFX940-NEXT: ;;#ASMEND 17641; GFX940-NEXT: s_setpc_b64 s[30:31] 17642 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17643 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3> 17644 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17645 ret void 17646} 17647 17648define void @s_shuffle_v4i16_v4i16__5_3_3_3() { 17649; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_3_3_3: 17650; GFX900: ; %bb.0: 17651; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17652; GFX900-NEXT: ;;#ASMSTART 17653; GFX900-NEXT: ; def s[4:5] 17654; GFX900-NEXT: ;;#ASMEND 17655; GFX900-NEXT: ;;#ASMSTART 17656; GFX900-NEXT: ; def s[6:7] 17657; GFX900-NEXT: ;;#ASMEND 17658; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s5 17659; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17660; GFX900-NEXT: ;;#ASMSTART 17661; GFX900-NEXT: ; use s[8:9] 17662; GFX900-NEXT: ;;#ASMEND 17663; GFX900-NEXT: s_setpc_b64 s[30:31] 17664; 17665; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_3_3_3: 17666; GFX90A: ; %bb.0: 17667; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17668; GFX90A-NEXT: ;;#ASMSTART 17669; GFX90A-NEXT: ; def s[4:5] 17670; GFX90A-NEXT: ;;#ASMEND 17671; GFX90A-NEXT: ;;#ASMSTART 17672; GFX90A-NEXT: ; def s[6:7] 17673; GFX90A-NEXT: ;;#ASMEND 17674; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s5 17675; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17676; GFX90A-NEXT: ;;#ASMSTART 17677; GFX90A-NEXT: ; use s[8:9] 17678; GFX90A-NEXT: ;;#ASMEND 17679; GFX90A-NEXT: s_setpc_b64 s[30:31] 17680; 17681; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_3_3_3: 17682; GFX940: ; %bb.0: 17683; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17684; GFX940-NEXT: ;;#ASMSTART 17685; GFX940-NEXT: ; def s[0:1] 17686; GFX940-NEXT: ;;#ASMEND 17687; GFX940-NEXT: ;;#ASMSTART 17688; GFX940-NEXT: ; def s[2:3] 17689; GFX940-NEXT: ;;#ASMEND 17690; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s1 17691; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 17692; GFX940-NEXT: ;;#ASMSTART 17693; GFX940-NEXT: ; use s[8:9] 17694; GFX940-NEXT: ;;#ASMEND 17695; GFX940-NEXT: s_setpc_b64 s[30:31] 17696 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17697 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17698 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3> 17699 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17700 ret void 17701} 17702 17703define void @s_shuffle_v4i16_v4i16__6_3_3_3() { 17704; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_3_3_3: 17705; GFX900: ; %bb.0: 17706; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17707; GFX900-NEXT: ;;#ASMSTART 17708; GFX900-NEXT: ; def s[4:5] 17709; GFX900-NEXT: ;;#ASMEND 17710; GFX900-NEXT: ;;#ASMSTART 17711; GFX900-NEXT: ; def s[6:7] 17712; GFX900-NEXT: ;;#ASMEND 17713; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s5 17714; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17715; GFX900-NEXT: ;;#ASMSTART 17716; GFX900-NEXT: ; use s[8:9] 17717; GFX900-NEXT: ;;#ASMEND 17718; GFX900-NEXT: s_setpc_b64 s[30:31] 17719; 17720; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_3_3_3: 17721; GFX90A: ; %bb.0: 17722; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17723; GFX90A-NEXT: ;;#ASMSTART 17724; GFX90A-NEXT: ; def s[4:5] 17725; GFX90A-NEXT: ;;#ASMEND 17726; GFX90A-NEXT: ;;#ASMSTART 17727; GFX90A-NEXT: ; def s[6:7] 17728; GFX90A-NEXT: ;;#ASMEND 17729; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s5 17730; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17731; GFX90A-NEXT: ;;#ASMSTART 17732; GFX90A-NEXT: ; use s[8:9] 17733; GFX90A-NEXT: ;;#ASMEND 17734; GFX90A-NEXT: s_setpc_b64 s[30:31] 17735; 17736; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_3_3_3: 17737; GFX940: ; %bb.0: 17738; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17739; GFX940-NEXT: ;;#ASMSTART 17740; GFX940-NEXT: ; def s[0:1] 17741; GFX940-NEXT: ;;#ASMEND 17742; GFX940-NEXT: ;;#ASMSTART 17743; GFX940-NEXT: ; def s[2:3] 17744; GFX940-NEXT: ;;#ASMEND 17745; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s1 17746; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 17747; GFX940-NEXT: ;;#ASMSTART 17748; GFX940-NEXT: ; use s[8:9] 17749; GFX940-NEXT: ;;#ASMEND 17750; GFX940-NEXT: s_setpc_b64 s[30:31] 17751 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17752 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17753 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3> 17754 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17755 ret void 17756} 17757 17758define void @s_shuffle_v4i16_v4i16__7_3_3_3() { 17759; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_3_3: 17760; GFX900: ; %bb.0: 17761; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17762; GFX900-NEXT: ;;#ASMSTART 17763; GFX900-NEXT: ; def s[4:5] 17764; GFX900-NEXT: ;;#ASMEND 17765; GFX900-NEXT: ;;#ASMSTART 17766; GFX900-NEXT: ; def s[6:7] 17767; GFX900-NEXT: ;;#ASMEND 17768; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 17769; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17770; GFX900-NEXT: ;;#ASMSTART 17771; GFX900-NEXT: ; use s[8:9] 17772; GFX900-NEXT: ;;#ASMEND 17773; GFX900-NEXT: s_setpc_b64 s[30:31] 17774; 17775; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_3_3: 17776; GFX90A: ; %bb.0: 17777; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17778; GFX90A-NEXT: ;;#ASMSTART 17779; GFX90A-NEXT: ; def s[4:5] 17780; GFX90A-NEXT: ;;#ASMEND 17781; GFX90A-NEXT: ;;#ASMSTART 17782; GFX90A-NEXT: ; def s[6:7] 17783; GFX90A-NEXT: ;;#ASMEND 17784; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 17785; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17786; GFX90A-NEXT: ;;#ASMSTART 17787; GFX90A-NEXT: ; use s[8:9] 17788; GFX90A-NEXT: ;;#ASMEND 17789; GFX90A-NEXT: s_setpc_b64 s[30:31] 17790; 17791; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_3_3: 17792; GFX940: ; %bb.0: 17793; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17794; GFX940-NEXT: ;;#ASMSTART 17795; GFX940-NEXT: ; def s[0:1] 17796; GFX940-NEXT: ;;#ASMEND 17797; GFX940-NEXT: ;;#ASMSTART 17798; GFX940-NEXT: ; def s[2:3] 17799; GFX940-NEXT: ;;#ASMEND 17800; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 17801; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 17802; GFX940-NEXT: ;;#ASMSTART 17803; GFX940-NEXT: ; use s[8:9] 17804; GFX940-NEXT: ;;#ASMEND 17805; GFX940-NEXT: s_setpc_b64 s[30:31] 17806 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17807 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17808 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3> 17809 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17810 ret void 17811} 17812 17813define void @s_shuffle_v4i16_v4i16__7_u_3_3() { 17814; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_3_3: 17815; GFX900: ; %bb.0: 17816; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17817; GFX900-NEXT: ;;#ASMSTART 17818; GFX900-NEXT: ; def s[4:5] 17819; GFX900-NEXT: ;;#ASMEND 17820; GFX900-NEXT: ;;#ASMSTART 17821; GFX900-NEXT: ; def s[6:7] 17822; GFX900-NEXT: ;;#ASMEND 17823; GFX900-NEXT: s_lshr_b32 s8, s7, 16 17824; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17825; GFX900-NEXT: ;;#ASMSTART 17826; GFX900-NEXT: ; use s[8:9] 17827; GFX900-NEXT: ;;#ASMEND 17828; GFX900-NEXT: s_setpc_b64 s[30:31] 17829; 17830; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_3_3: 17831; GFX90A: ; %bb.0: 17832; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17833; GFX90A-NEXT: ;;#ASMSTART 17834; GFX90A-NEXT: ; def s[4:5] 17835; GFX90A-NEXT: ;;#ASMEND 17836; GFX90A-NEXT: ;;#ASMSTART 17837; GFX90A-NEXT: ; def s[6:7] 17838; GFX90A-NEXT: ;;#ASMEND 17839; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 17840; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17841; GFX90A-NEXT: ;;#ASMSTART 17842; GFX90A-NEXT: ; use s[8:9] 17843; GFX90A-NEXT: ;;#ASMEND 17844; GFX90A-NEXT: s_setpc_b64 s[30:31] 17845; 17846; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_3_3: 17847; GFX940: ; %bb.0: 17848; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17849; GFX940-NEXT: ;;#ASMSTART 17850; GFX940-NEXT: ; def s[0:1] 17851; GFX940-NEXT: ;;#ASMEND 17852; GFX940-NEXT: ;;#ASMSTART 17853; GFX940-NEXT: ; def s[2:3] 17854; GFX940-NEXT: ;;#ASMEND 17855; GFX940-NEXT: s_lshr_b32 s8, s3, 16 17856; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 17857; GFX940-NEXT: ;;#ASMSTART 17858; GFX940-NEXT: ; use s[8:9] 17859; GFX940-NEXT: ;;#ASMEND 17860; GFX940-NEXT: s_setpc_b64 s[30:31] 17861 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17862 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17863 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3> 17864 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17865 ret void 17866} 17867 17868define void @s_shuffle_v4i16_v4i16__7_0_3_3() { 17869; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_3_3: 17870; GFX900: ; %bb.0: 17871; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17872; GFX900-NEXT: ;;#ASMSTART 17873; GFX900-NEXT: ; def s[6:7] 17874; GFX900-NEXT: ;;#ASMEND 17875; GFX900-NEXT: s_lshr_b32 s6, s7, 16 17876; GFX900-NEXT: ;;#ASMSTART 17877; GFX900-NEXT: ; def s[4:5] 17878; GFX900-NEXT: ;;#ASMEND 17879; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4 17880; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17881; GFX900-NEXT: ;;#ASMSTART 17882; GFX900-NEXT: ; use s[8:9] 17883; GFX900-NEXT: ;;#ASMEND 17884; GFX900-NEXT: s_setpc_b64 s[30:31] 17885; 17886; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_3_3: 17887; GFX90A: ; %bb.0: 17888; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17889; GFX90A-NEXT: ;;#ASMSTART 17890; GFX90A-NEXT: ; def s[6:7] 17891; GFX90A-NEXT: ;;#ASMEND 17892; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 17893; GFX90A-NEXT: ;;#ASMSTART 17894; GFX90A-NEXT: ; def s[4:5] 17895; GFX90A-NEXT: ;;#ASMEND 17896; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4 17897; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17898; GFX90A-NEXT: ;;#ASMSTART 17899; GFX90A-NEXT: ; use s[8:9] 17900; GFX90A-NEXT: ;;#ASMEND 17901; GFX90A-NEXT: s_setpc_b64 s[30:31] 17902; 17903; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_3_3: 17904; GFX940: ; %bb.0: 17905; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17906; GFX940-NEXT: ;;#ASMSTART 17907; GFX940-NEXT: ; def s[2:3] 17908; GFX940-NEXT: ;;#ASMEND 17909; GFX940-NEXT: s_lshr_b32 s2, s3, 16 17910; GFX940-NEXT: ;;#ASMSTART 17911; GFX940-NEXT: ; def s[0:1] 17912; GFX940-NEXT: ;;#ASMEND 17913; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 17914; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 17915; GFX940-NEXT: ;;#ASMSTART 17916; GFX940-NEXT: ; use s[8:9] 17917; GFX940-NEXT: ;;#ASMEND 17918; GFX940-NEXT: s_setpc_b64 s[30:31] 17919 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17920 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17921 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3> 17922 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17923 ret void 17924} 17925 17926define void @s_shuffle_v4i16_v4i16__7_1_3_3() { 17927; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_3_3: 17928; GFX900: ; %bb.0: 17929; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17930; GFX900-NEXT: ;;#ASMSTART 17931; GFX900-NEXT: ; def s[4:5] 17932; GFX900-NEXT: ;;#ASMEND 17933; GFX900-NEXT: ;;#ASMSTART 17934; GFX900-NEXT: ; def s[6:7] 17935; GFX900-NEXT: ;;#ASMEND 17936; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 17937; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17938; GFX900-NEXT: ;;#ASMSTART 17939; GFX900-NEXT: ; use s[8:9] 17940; GFX900-NEXT: ;;#ASMEND 17941; GFX900-NEXT: s_setpc_b64 s[30:31] 17942; 17943; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_3_3: 17944; GFX90A: ; %bb.0: 17945; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17946; GFX90A-NEXT: ;;#ASMSTART 17947; GFX90A-NEXT: ; def s[4:5] 17948; GFX90A-NEXT: ;;#ASMEND 17949; GFX90A-NEXT: ;;#ASMSTART 17950; GFX90A-NEXT: ; def s[6:7] 17951; GFX90A-NEXT: ;;#ASMEND 17952; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 17953; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17954; GFX90A-NEXT: ;;#ASMSTART 17955; GFX90A-NEXT: ; use s[8:9] 17956; GFX90A-NEXT: ;;#ASMEND 17957; GFX90A-NEXT: s_setpc_b64 s[30:31] 17958; 17959; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_3_3: 17960; GFX940: ; %bb.0: 17961; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17962; GFX940-NEXT: ;;#ASMSTART 17963; GFX940-NEXT: ; def s[0:1] 17964; GFX940-NEXT: ;;#ASMEND 17965; GFX940-NEXT: ;;#ASMSTART 17966; GFX940-NEXT: ; def s[2:3] 17967; GFX940-NEXT: ;;#ASMEND 17968; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 17969; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 17970; GFX940-NEXT: ;;#ASMSTART 17971; GFX940-NEXT: ; use s[8:9] 17972; GFX940-NEXT: ;;#ASMEND 17973; GFX940-NEXT: s_setpc_b64 s[30:31] 17974 %vec0 = call <4 x i16> asm "; def $0", "=s"() 17975 %vec1 = call <4 x i16> asm "; def $0", "=s"() 17976 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3> 17977 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 17978 ret void 17979} 17980 17981define void @s_shuffle_v4i16_v4i16__7_2_3_3() { 17982; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_3_3: 17983; GFX900: ; %bb.0: 17984; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17985; GFX900-NEXT: ;;#ASMSTART 17986; GFX900-NEXT: ; def s[4:5] 17987; GFX900-NEXT: ;;#ASMEND 17988; GFX900-NEXT: ;;#ASMSTART 17989; GFX900-NEXT: ; def s[6:7] 17990; GFX900-NEXT: ;;#ASMEND 17991; GFX900-NEXT: s_lshr_b32 s4, s7, 16 17992; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 17993; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 17994; GFX900-NEXT: ;;#ASMSTART 17995; GFX900-NEXT: ; use s[8:9] 17996; GFX900-NEXT: ;;#ASMEND 17997; GFX900-NEXT: s_setpc_b64 s[30:31] 17998; 17999; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_3_3: 18000; GFX90A: ; %bb.0: 18001; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18002; GFX90A-NEXT: ;;#ASMSTART 18003; GFX90A-NEXT: ; def s[4:5] 18004; GFX90A-NEXT: ;;#ASMEND 18005; GFX90A-NEXT: ;;#ASMSTART 18006; GFX90A-NEXT: ; def s[6:7] 18007; GFX90A-NEXT: ;;#ASMEND 18008; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 18009; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 18010; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 18011; GFX90A-NEXT: ;;#ASMSTART 18012; GFX90A-NEXT: ; use s[8:9] 18013; GFX90A-NEXT: ;;#ASMEND 18014; GFX90A-NEXT: s_setpc_b64 s[30:31] 18015; 18016; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_3_3: 18017; GFX940: ; %bb.0: 18018; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18019; GFX940-NEXT: ;;#ASMSTART 18020; GFX940-NEXT: ; def s[0:1] 18021; GFX940-NEXT: ;;#ASMEND 18022; GFX940-NEXT: ;;#ASMSTART 18023; GFX940-NEXT: ; def s[2:3] 18024; GFX940-NEXT: ;;#ASMEND 18025; GFX940-NEXT: s_lshr_b32 s0, s3, 16 18026; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 18027; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 18028; GFX940-NEXT: ;;#ASMSTART 18029; GFX940-NEXT: ; use s[8:9] 18030; GFX940-NEXT: ;;#ASMEND 18031; GFX940-NEXT: s_setpc_b64 s[30:31] 18032 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18033 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18034 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3> 18035 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18036 ret void 18037} 18038 18039define void @s_shuffle_v4i16_v4i16__7_4_3_3() { 18040; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_3_3: 18041; GFX900: ; %bb.0: 18042; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18043; GFX900-NEXT: ;;#ASMSTART 18044; GFX900-NEXT: ; def s[4:5] 18045; GFX900-NEXT: ;;#ASMEND 18046; GFX900-NEXT: ;;#ASMSTART 18047; GFX900-NEXT: ; def s[6:7] 18048; GFX900-NEXT: ;;#ASMEND 18049; GFX900-NEXT: s_lshr_b32 s4, s7, 16 18050; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18051; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 18052; GFX900-NEXT: ;;#ASMSTART 18053; GFX900-NEXT: ; use s[8:9] 18054; GFX900-NEXT: ;;#ASMEND 18055; GFX900-NEXT: s_setpc_b64 s[30:31] 18056; 18057; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_3_3: 18058; GFX90A: ; %bb.0: 18059; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18060; GFX90A-NEXT: ;;#ASMSTART 18061; GFX90A-NEXT: ; def s[4:5] 18062; GFX90A-NEXT: ;;#ASMEND 18063; GFX90A-NEXT: ;;#ASMSTART 18064; GFX90A-NEXT: ; def s[6:7] 18065; GFX90A-NEXT: ;;#ASMEND 18066; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 18067; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18068; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 18069; GFX90A-NEXT: ;;#ASMSTART 18070; GFX90A-NEXT: ; use s[8:9] 18071; GFX90A-NEXT: ;;#ASMEND 18072; GFX90A-NEXT: s_setpc_b64 s[30:31] 18073; 18074; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_3_3: 18075; GFX940: ; %bb.0: 18076; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18077; GFX940-NEXT: ;;#ASMSTART 18078; GFX940-NEXT: ; def s[0:1] 18079; GFX940-NEXT: ;;#ASMEND 18080; GFX940-NEXT: ;;#ASMSTART 18081; GFX940-NEXT: ; def s[2:3] 18082; GFX940-NEXT: ;;#ASMEND 18083; GFX940-NEXT: s_lshr_b32 s0, s3, 16 18084; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 18085; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 18086; GFX940-NEXT: ;;#ASMSTART 18087; GFX940-NEXT: ; use s[8:9] 18088; GFX940-NEXT: ;;#ASMEND 18089; GFX940-NEXT: s_setpc_b64 s[30:31] 18090 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18091 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18092 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3> 18093 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18094 ret void 18095} 18096 18097define void @s_shuffle_v4i16_v4i16__7_5_3_3() { 18098; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_3_3: 18099; GFX900: ; %bb.0: 18100; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18101; GFX900-NEXT: ;;#ASMSTART 18102; GFX900-NEXT: ; def s[4:5] 18103; GFX900-NEXT: ;;#ASMEND 18104; GFX900-NEXT: ;;#ASMSTART 18105; GFX900-NEXT: ; def s[6:7] 18106; GFX900-NEXT: ;;#ASMEND 18107; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s6 18108; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 18109; GFX900-NEXT: ;;#ASMSTART 18110; GFX900-NEXT: ; use s[8:9] 18111; GFX900-NEXT: ;;#ASMEND 18112; GFX900-NEXT: s_setpc_b64 s[30:31] 18113; 18114; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_3_3: 18115; GFX90A: ; %bb.0: 18116; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18117; GFX90A-NEXT: ;;#ASMSTART 18118; GFX90A-NEXT: ; def s[4:5] 18119; GFX90A-NEXT: ;;#ASMEND 18120; GFX90A-NEXT: ;;#ASMSTART 18121; GFX90A-NEXT: ; def s[6:7] 18122; GFX90A-NEXT: ;;#ASMEND 18123; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s6 18124; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 18125; GFX90A-NEXT: ;;#ASMSTART 18126; GFX90A-NEXT: ; use s[8:9] 18127; GFX90A-NEXT: ;;#ASMEND 18128; GFX90A-NEXT: s_setpc_b64 s[30:31] 18129; 18130; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_3_3: 18131; GFX940: ; %bb.0: 18132; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18133; GFX940-NEXT: ;;#ASMSTART 18134; GFX940-NEXT: ; def s[0:1] 18135; GFX940-NEXT: ;;#ASMEND 18136; GFX940-NEXT: ;;#ASMSTART 18137; GFX940-NEXT: ; def s[2:3] 18138; GFX940-NEXT: ;;#ASMEND 18139; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 18140; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 18141; GFX940-NEXT: ;;#ASMSTART 18142; GFX940-NEXT: ; use s[8:9] 18143; GFX940-NEXT: ;;#ASMEND 18144; GFX940-NEXT: s_setpc_b64 s[30:31] 18145 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18146 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18147 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3> 18148 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18149 ret void 18150} 18151 18152define void @s_shuffle_v4i16_v4i16__7_6_3_3() { 18153; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_3_3: 18154; GFX900: ; %bb.0: 18155; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18156; GFX900-NEXT: ;;#ASMSTART 18157; GFX900-NEXT: ; def s[4:5] 18158; GFX900-NEXT: ;;#ASMEND 18159; GFX900-NEXT: ;;#ASMSTART 18160; GFX900-NEXT: ; def s[6:7] 18161; GFX900-NEXT: ;;#ASMEND 18162; GFX900-NEXT: s_lshr_b32 s4, s7, 16 18163; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 18164; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 18165; GFX900-NEXT: ;;#ASMSTART 18166; GFX900-NEXT: ; use s[8:9] 18167; GFX900-NEXT: ;;#ASMEND 18168; GFX900-NEXT: s_setpc_b64 s[30:31] 18169; 18170; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_3_3: 18171; GFX90A: ; %bb.0: 18172; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18173; GFX90A-NEXT: ;;#ASMSTART 18174; GFX90A-NEXT: ; def s[4:5] 18175; GFX90A-NEXT: ;;#ASMEND 18176; GFX90A-NEXT: ;;#ASMSTART 18177; GFX90A-NEXT: ; def s[6:7] 18178; GFX90A-NEXT: ;;#ASMEND 18179; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 18180; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 18181; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 18182; GFX90A-NEXT: ;;#ASMSTART 18183; GFX90A-NEXT: ; use s[8:9] 18184; GFX90A-NEXT: ;;#ASMEND 18185; GFX90A-NEXT: s_setpc_b64 s[30:31] 18186; 18187; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_3_3: 18188; GFX940: ; %bb.0: 18189; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18190; GFX940-NEXT: ;;#ASMSTART 18191; GFX940-NEXT: ; def s[0:1] 18192; GFX940-NEXT: ;;#ASMEND 18193; GFX940-NEXT: ;;#ASMSTART 18194; GFX940-NEXT: ; def s[2:3] 18195; GFX940-NEXT: ;;#ASMEND 18196; GFX940-NEXT: s_lshr_b32 s0, s3, 16 18197; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 18198; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 18199; GFX940-NEXT: ;;#ASMSTART 18200; GFX940-NEXT: ; use s[8:9] 18201; GFX940-NEXT: ;;#ASMEND 18202; GFX940-NEXT: s_setpc_b64 s[30:31] 18203 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18204 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18205 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3> 18206 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18207 ret void 18208} 18209 18210define void @s_shuffle_v4i16_v4i16__7_7_3_3() { 18211; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_3: 18212; GFX900: ; %bb.0: 18213; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18214; GFX900-NEXT: ;;#ASMSTART 18215; GFX900-NEXT: ; def s[4:5] 18216; GFX900-NEXT: ;;#ASMEND 18217; GFX900-NEXT: ;;#ASMSTART 18218; GFX900-NEXT: ; def s[6:7] 18219; GFX900-NEXT: ;;#ASMEND 18220; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 18221; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18222; GFX900-NEXT: ;;#ASMSTART 18223; GFX900-NEXT: ; use s[8:9] 18224; GFX900-NEXT: ;;#ASMEND 18225; GFX900-NEXT: s_setpc_b64 s[30:31] 18226; 18227; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_3: 18228; GFX90A: ; %bb.0: 18229; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18230; GFX90A-NEXT: ;;#ASMSTART 18231; GFX90A-NEXT: ; def s[4:5] 18232; GFX90A-NEXT: ;;#ASMEND 18233; GFX90A-NEXT: ;;#ASMSTART 18234; GFX90A-NEXT: ; def s[6:7] 18235; GFX90A-NEXT: ;;#ASMEND 18236; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 18237; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18238; GFX90A-NEXT: ;;#ASMSTART 18239; GFX90A-NEXT: ; use s[8:9] 18240; GFX90A-NEXT: ;;#ASMEND 18241; GFX90A-NEXT: s_setpc_b64 s[30:31] 18242; 18243; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_3: 18244; GFX940: ; %bb.0: 18245; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18246; GFX940-NEXT: ;;#ASMSTART 18247; GFX940-NEXT: ; def s[0:1] 18248; GFX940-NEXT: ;;#ASMEND 18249; GFX940-NEXT: ;;#ASMSTART 18250; GFX940-NEXT: ; def s[2:3] 18251; GFX940-NEXT: ;;#ASMEND 18252; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 18253; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 18254; GFX940-NEXT: ;;#ASMSTART 18255; GFX940-NEXT: ; use s[8:9] 18256; GFX940-NEXT: ;;#ASMEND 18257; GFX940-NEXT: s_setpc_b64 s[30:31] 18258 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18259 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18260 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3> 18261 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18262 ret void 18263} 18264 18265define void @s_shuffle_v4i16_v4i16__7_7_u_3() { 18266; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_3: 18267; GFX900: ; %bb.0: 18268; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18269; GFX900-NEXT: ;;#ASMSTART 18270; GFX900-NEXT: ; def s[8:9] 18271; GFX900-NEXT: ;;#ASMEND 18272; GFX900-NEXT: ;;#ASMSTART 18273; GFX900-NEXT: ; def s[4:5] 18274; GFX900-NEXT: ;;#ASMEND 18275; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 18276; GFX900-NEXT: ;;#ASMSTART 18277; GFX900-NEXT: ; use s[8:9] 18278; GFX900-NEXT: ;;#ASMEND 18279; GFX900-NEXT: s_setpc_b64 s[30:31] 18280; 18281; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_3: 18282; GFX90A: ; %bb.0: 18283; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18284; GFX90A-NEXT: ;;#ASMSTART 18285; GFX90A-NEXT: ; def s[8:9] 18286; GFX90A-NEXT: ;;#ASMEND 18287; GFX90A-NEXT: ;;#ASMSTART 18288; GFX90A-NEXT: ; def s[4:5] 18289; GFX90A-NEXT: ;;#ASMEND 18290; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 18291; GFX90A-NEXT: ;;#ASMSTART 18292; GFX90A-NEXT: ; use s[8:9] 18293; GFX90A-NEXT: ;;#ASMEND 18294; GFX90A-NEXT: s_setpc_b64 s[30:31] 18295; 18296; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_3: 18297; GFX940: ; %bb.0: 18298; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18299; GFX940-NEXT: ;;#ASMSTART 18300; GFX940-NEXT: ; def s[8:9] 18301; GFX940-NEXT: ;;#ASMEND 18302; GFX940-NEXT: ;;#ASMSTART 18303; GFX940-NEXT: ; def s[0:1] 18304; GFX940-NEXT: ;;#ASMEND 18305; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 18306; GFX940-NEXT: ;;#ASMSTART 18307; GFX940-NEXT: ; use s[8:9] 18308; GFX940-NEXT: ;;#ASMEND 18309; GFX940-NEXT: s_setpc_b64 s[30:31] 18310 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18311 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18312 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3> 18313 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18314 ret void 18315} 18316 18317define void @s_shuffle_v4i16_v4i16__7_7_0_3() { 18318; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_3: 18319; GFX900: ; %bb.0: 18320; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18321; GFX900-NEXT: ;;#ASMSTART 18322; GFX900-NEXT: ; def s[4:5] 18323; GFX900-NEXT: ;;#ASMEND 18324; GFX900-NEXT: ;;#ASMSTART 18325; GFX900-NEXT: ; def s[6:7] 18326; GFX900-NEXT: ;;#ASMEND 18327; GFX900-NEXT: s_pack_lh_b32_b16 s9, s4, s5 18328; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18329; GFX900-NEXT: ;;#ASMSTART 18330; GFX900-NEXT: ; use s[8:9] 18331; GFX900-NEXT: ;;#ASMEND 18332; GFX900-NEXT: s_setpc_b64 s[30:31] 18333; 18334; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_3: 18335; GFX90A: ; %bb.0: 18336; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18337; GFX90A-NEXT: ;;#ASMSTART 18338; GFX90A-NEXT: ; def s[4:5] 18339; GFX90A-NEXT: ;;#ASMEND 18340; GFX90A-NEXT: ;;#ASMSTART 18341; GFX90A-NEXT: ; def s[6:7] 18342; GFX90A-NEXT: ;;#ASMEND 18343; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s4, s5 18344; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18345; GFX90A-NEXT: ;;#ASMSTART 18346; GFX90A-NEXT: ; use s[8:9] 18347; GFX90A-NEXT: ;;#ASMEND 18348; GFX90A-NEXT: s_setpc_b64 s[30:31] 18349; 18350; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_3: 18351; GFX940: ; %bb.0: 18352; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18353; GFX940-NEXT: ;;#ASMSTART 18354; GFX940-NEXT: ; def s[0:1] 18355; GFX940-NEXT: ;;#ASMEND 18356; GFX940-NEXT: ;;#ASMSTART 18357; GFX940-NEXT: ; def s[2:3] 18358; GFX940-NEXT: ;;#ASMEND 18359; GFX940-NEXT: s_pack_lh_b32_b16 s9, s0, s1 18360; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 18361; GFX940-NEXT: ;;#ASMSTART 18362; GFX940-NEXT: ; use s[8:9] 18363; GFX940-NEXT: ;;#ASMEND 18364; GFX940-NEXT: s_setpc_b64 s[30:31] 18365 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18366 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18367 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3> 18368 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18369 ret void 18370} 18371 18372define void @s_shuffle_v4i16_v4i16__7_7_1_3() { 18373; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_3: 18374; GFX900: ; %bb.0: 18375; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18376; GFX900-NEXT: ;;#ASMSTART 18377; GFX900-NEXT: ; def s[4:5] 18378; GFX900-NEXT: ;;#ASMEND 18379; GFX900-NEXT: ;;#ASMSTART 18380; GFX900-NEXT: ; def s[6:7] 18381; GFX900-NEXT: ;;#ASMEND 18382; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s5 18383; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18384; GFX900-NEXT: ;;#ASMSTART 18385; GFX900-NEXT: ; use s[8:9] 18386; GFX900-NEXT: ;;#ASMEND 18387; GFX900-NEXT: s_setpc_b64 s[30:31] 18388; 18389; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_3: 18390; GFX90A: ; %bb.0: 18391; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18392; GFX90A-NEXT: ;;#ASMSTART 18393; GFX90A-NEXT: ; def s[4:5] 18394; GFX90A-NEXT: ;;#ASMEND 18395; GFX90A-NEXT: ;;#ASMSTART 18396; GFX90A-NEXT: ; def s[6:7] 18397; GFX90A-NEXT: ;;#ASMEND 18398; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s5 18399; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18400; GFX90A-NEXT: ;;#ASMSTART 18401; GFX90A-NEXT: ; use s[8:9] 18402; GFX90A-NEXT: ;;#ASMEND 18403; GFX90A-NEXT: s_setpc_b64 s[30:31] 18404; 18405; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_3: 18406; GFX940: ; %bb.0: 18407; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18408; GFX940-NEXT: ;;#ASMSTART 18409; GFX940-NEXT: ; def s[0:1] 18410; GFX940-NEXT: ;;#ASMEND 18411; GFX940-NEXT: ;;#ASMSTART 18412; GFX940-NEXT: ; def s[2:3] 18413; GFX940-NEXT: ;;#ASMEND 18414; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s1 18415; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 18416; GFX940-NEXT: ;;#ASMSTART 18417; GFX940-NEXT: ; use s[8:9] 18418; GFX940-NEXT: ;;#ASMEND 18419; GFX940-NEXT: s_setpc_b64 s[30:31] 18420 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18421 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18422 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3> 18423 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18424 ret void 18425} 18426 18427define void @s_shuffle_v4i16_v4i16__7_7_2_3() { 18428; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_3: 18429; GFX900: ; %bb.0: 18430; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18431; GFX900-NEXT: ;;#ASMSTART 18432; GFX900-NEXT: ; def s[8:9] 18433; GFX900-NEXT: ;;#ASMEND 18434; GFX900-NEXT: ;;#ASMSTART 18435; GFX900-NEXT: ; def s[4:5] 18436; GFX900-NEXT: ;;#ASMEND 18437; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 18438; GFX900-NEXT: ;;#ASMSTART 18439; GFX900-NEXT: ; use s[8:9] 18440; GFX900-NEXT: ;;#ASMEND 18441; GFX900-NEXT: s_setpc_b64 s[30:31] 18442; 18443; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_3: 18444; GFX90A: ; %bb.0: 18445; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18446; GFX90A-NEXT: ;;#ASMSTART 18447; GFX90A-NEXT: ; def s[8:9] 18448; GFX90A-NEXT: ;;#ASMEND 18449; GFX90A-NEXT: ;;#ASMSTART 18450; GFX90A-NEXT: ; def s[4:5] 18451; GFX90A-NEXT: ;;#ASMEND 18452; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 18453; GFX90A-NEXT: ;;#ASMSTART 18454; GFX90A-NEXT: ; use s[8:9] 18455; GFX90A-NEXT: ;;#ASMEND 18456; GFX90A-NEXT: s_setpc_b64 s[30:31] 18457; 18458; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_3: 18459; GFX940: ; %bb.0: 18460; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18461; GFX940-NEXT: ;;#ASMSTART 18462; GFX940-NEXT: ; def s[8:9] 18463; GFX940-NEXT: ;;#ASMEND 18464; GFX940-NEXT: ;;#ASMSTART 18465; GFX940-NEXT: ; def s[0:1] 18466; GFX940-NEXT: ;;#ASMEND 18467; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 18468; GFX940-NEXT: ;;#ASMSTART 18469; GFX940-NEXT: ; use s[8:9] 18470; GFX940-NEXT: ;;#ASMEND 18471; GFX940-NEXT: s_setpc_b64 s[30:31] 18472 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18473 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18474 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3> 18475 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18476 ret void 18477} 18478 18479define void @s_shuffle_v4i16_v4i16__7_7_4_3() { 18480; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_3: 18481; GFX900: ; %bb.0: 18482; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18483; GFX900-NEXT: ;;#ASMSTART 18484; GFX900-NEXT: ; def s[4:5] 18485; GFX900-NEXT: ;;#ASMEND 18486; GFX900-NEXT: ;;#ASMSTART 18487; GFX900-NEXT: ; def s[6:7] 18488; GFX900-NEXT: ;;#ASMEND 18489; GFX900-NEXT: s_pack_lh_b32_b16 s9, s6, s5 18490; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18491; GFX900-NEXT: ;;#ASMSTART 18492; GFX900-NEXT: ; use s[8:9] 18493; GFX900-NEXT: ;;#ASMEND 18494; GFX900-NEXT: s_setpc_b64 s[30:31] 18495; 18496; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_3: 18497; GFX90A: ; %bb.0: 18498; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18499; GFX90A-NEXT: ;;#ASMSTART 18500; GFX90A-NEXT: ; def s[4:5] 18501; GFX90A-NEXT: ;;#ASMEND 18502; GFX90A-NEXT: ;;#ASMSTART 18503; GFX90A-NEXT: ; def s[6:7] 18504; GFX90A-NEXT: ;;#ASMEND 18505; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s6, s5 18506; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18507; GFX90A-NEXT: ;;#ASMSTART 18508; GFX90A-NEXT: ; use s[8:9] 18509; GFX90A-NEXT: ;;#ASMEND 18510; GFX90A-NEXT: s_setpc_b64 s[30:31] 18511; 18512; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_3: 18513; GFX940: ; %bb.0: 18514; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18515; GFX940-NEXT: ;;#ASMSTART 18516; GFX940-NEXT: ; def s[0:1] 18517; GFX940-NEXT: ;;#ASMEND 18518; GFX940-NEXT: ;;#ASMSTART 18519; GFX940-NEXT: ; def s[2:3] 18520; GFX940-NEXT: ;;#ASMEND 18521; GFX940-NEXT: s_pack_lh_b32_b16 s9, s2, s1 18522; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 18523; GFX940-NEXT: ;;#ASMSTART 18524; GFX940-NEXT: ; use s[8:9] 18525; GFX940-NEXT: ;;#ASMEND 18526; GFX940-NEXT: s_setpc_b64 s[30:31] 18527 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18528 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18529 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3> 18530 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18531 ret void 18532} 18533 18534define void @s_shuffle_v4i16_v4i16__7_7_5_3() { 18535; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_3: 18536; GFX900: ; %bb.0: 18537; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18538; GFX900-NEXT: ;;#ASMSTART 18539; GFX900-NEXT: ; def s[4:5] 18540; GFX900-NEXT: ;;#ASMEND 18541; GFX900-NEXT: ;;#ASMSTART 18542; GFX900-NEXT: ; def s[6:7] 18543; GFX900-NEXT: ;;#ASMEND 18544; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s5 18545; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18546; GFX900-NEXT: ;;#ASMSTART 18547; GFX900-NEXT: ; use s[8:9] 18548; GFX900-NEXT: ;;#ASMEND 18549; GFX900-NEXT: s_setpc_b64 s[30:31] 18550; 18551; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_3: 18552; GFX90A: ; %bb.0: 18553; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18554; GFX90A-NEXT: ;;#ASMSTART 18555; GFX90A-NEXT: ; def s[4:5] 18556; GFX90A-NEXT: ;;#ASMEND 18557; GFX90A-NEXT: ;;#ASMSTART 18558; GFX90A-NEXT: ; def s[6:7] 18559; GFX90A-NEXT: ;;#ASMEND 18560; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s5 18561; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18562; GFX90A-NEXT: ;;#ASMSTART 18563; GFX90A-NEXT: ; use s[8:9] 18564; GFX90A-NEXT: ;;#ASMEND 18565; GFX90A-NEXT: s_setpc_b64 s[30:31] 18566; 18567; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_3: 18568; GFX940: ; %bb.0: 18569; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18570; GFX940-NEXT: ;;#ASMSTART 18571; GFX940-NEXT: ; def s[0:1] 18572; GFX940-NEXT: ;;#ASMEND 18573; GFX940-NEXT: ;;#ASMSTART 18574; GFX940-NEXT: ; def s[2:3] 18575; GFX940-NEXT: ;;#ASMEND 18576; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s1 18577; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 18578; GFX940-NEXT: ;;#ASMSTART 18579; GFX940-NEXT: ; use s[8:9] 18580; GFX940-NEXT: ;;#ASMEND 18581; GFX940-NEXT: s_setpc_b64 s[30:31] 18582 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18583 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18584 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3> 18585 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18586 ret void 18587} 18588 18589define void @s_shuffle_v4i16_v4i16__7_7_6_3() { 18590; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_3: 18591; GFX900: ; %bb.0: 18592; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18593; GFX900-NEXT: ;;#ASMSTART 18594; GFX900-NEXT: ; def s[4:5] 18595; GFX900-NEXT: ;;#ASMEND 18596; GFX900-NEXT: ;;#ASMSTART 18597; GFX900-NEXT: ; def s[6:7] 18598; GFX900-NEXT: ;;#ASMEND 18599; GFX900-NEXT: s_pack_lh_b32_b16 s9, s7, s5 18600; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18601; GFX900-NEXT: ;;#ASMSTART 18602; GFX900-NEXT: ; use s[8:9] 18603; GFX900-NEXT: ;;#ASMEND 18604; GFX900-NEXT: s_setpc_b64 s[30:31] 18605; 18606; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_3: 18607; GFX90A: ; %bb.0: 18608; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18609; GFX90A-NEXT: ;;#ASMSTART 18610; GFX90A-NEXT: ; def s[4:5] 18611; GFX90A-NEXT: ;;#ASMEND 18612; GFX90A-NEXT: ;;#ASMSTART 18613; GFX90A-NEXT: ; def s[6:7] 18614; GFX90A-NEXT: ;;#ASMEND 18615; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s7, s5 18616; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 18617; GFX90A-NEXT: ;;#ASMSTART 18618; GFX90A-NEXT: ; use s[8:9] 18619; GFX90A-NEXT: ;;#ASMEND 18620; GFX90A-NEXT: s_setpc_b64 s[30:31] 18621; 18622; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_3: 18623; GFX940: ; %bb.0: 18624; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18625; GFX940-NEXT: ;;#ASMSTART 18626; GFX940-NEXT: ; def s[0:1] 18627; GFX940-NEXT: ;;#ASMEND 18628; GFX940-NEXT: ;;#ASMSTART 18629; GFX940-NEXT: ; def s[2:3] 18630; GFX940-NEXT: ;;#ASMEND 18631; GFX940-NEXT: s_pack_lh_b32_b16 s9, s3, s1 18632; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 18633; GFX940-NEXT: ;;#ASMSTART 18634; GFX940-NEXT: ; use s[8:9] 18635; GFX940-NEXT: ;;#ASMEND 18636; GFX940-NEXT: s_setpc_b64 s[30:31] 18637 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18638 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18639 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3> 18640 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18641 ret void 18642} 18643 18644define void @s_shuffle_v4i16_v4i16__u_4_4_4() { 18645; GFX9-LABEL: s_shuffle_v4i16_v4i16__u_4_4_4: 18646; GFX9: ; %bb.0: 18647; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18648; GFX9-NEXT: ;;#ASMSTART 18649; GFX9-NEXT: ; use s[8:9] 18650; GFX9-NEXT: ;;#ASMEND 18651; GFX9-NEXT: s_setpc_b64 s[30:31] 18652 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18653 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4> 18654 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18655 ret void 18656} 18657 18658define void @s_shuffle_v4i16_v4i16__0_4_4_4() { 18659; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_4_4_4: 18660; GFX900: ; %bb.0: 18661; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18662; GFX900-NEXT: ;;#ASMSTART 18663; GFX900-NEXT: ; def s[8:9] 18664; GFX900-NEXT: ;;#ASMEND 18665; GFX900-NEXT: ;;#ASMSTART 18666; GFX900-NEXT: ; use s[8:9] 18667; GFX900-NEXT: ;;#ASMEND 18668; GFX900-NEXT: s_setpc_b64 s[30:31] 18669; 18670; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_4_4_4: 18671; GFX90A: ; %bb.0: 18672; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18673; GFX90A-NEXT: ;;#ASMSTART 18674; GFX90A-NEXT: ; def s[8:9] 18675; GFX90A-NEXT: ;;#ASMEND 18676; GFX90A-NEXT: ;;#ASMSTART 18677; GFX90A-NEXT: ; use s[8:9] 18678; GFX90A-NEXT: ;;#ASMEND 18679; GFX90A-NEXT: s_setpc_b64 s[30:31] 18680; 18681; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_4_4_4: 18682; GFX940: ; %bb.0: 18683; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18684; GFX940-NEXT: ;;#ASMSTART 18685; GFX940-NEXT: ; def s[8:9] 18686; GFX940-NEXT: ;;#ASMEND 18687; GFX940-NEXT: s_nop 0 18688; GFX940-NEXT: ;;#ASMSTART 18689; GFX940-NEXT: ; use s[8:9] 18690; GFX940-NEXT: ;;#ASMEND 18691; GFX940-NEXT: s_setpc_b64 s[30:31] 18692 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18693 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 18694 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18695 ret void 18696} 18697 18698define void @s_shuffle_v4i16_v4i16__1_4_4_4() { 18699; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_4_4_4: 18700; GFX900: ; %bb.0: 18701; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18702; GFX900-NEXT: ;;#ASMSTART 18703; GFX900-NEXT: ; def s[4:5] 18704; GFX900-NEXT: ;;#ASMEND 18705; GFX900-NEXT: s_lshr_b32 s8, s4, 16 18706; GFX900-NEXT: ;;#ASMSTART 18707; GFX900-NEXT: ; use s[8:9] 18708; GFX900-NEXT: ;;#ASMEND 18709; GFX900-NEXT: s_setpc_b64 s[30:31] 18710; 18711; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_4_4_4: 18712; GFX90A: ; %bb.0: 18713; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18714; GFX90A-NEXT: ;;#ASMSTART 18715; GFX90A-NEXT: ; def s[4:5] 18716; GFX90A-NEXT: ;;#ASMEND 18717; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 18718; GFX90A-NEXT: ;;#ASMSTART 18719; GFX90A-NEXT: ; use s[8:9] 18720; GFX90A-NEXT: ;;#ASMEND 18721; GFX90A-NEXT: s_setpc_b64 s[30:31] 18722; 18723; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_4_4_4: 18724; GFX940: ; %bb.0: 18725; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18726; GFX940-NEXT: ;;#ASMSTART 18727; GFX940-NEXT: ; def s[0:1] 18728; GFX940-NEXT: ;;#ASMEND 18729; GFX940-NEXT: s_lshr_b32 s8, s0, 16 18730; GFX940-NEXT: ;;#ASMSTART 18731; GFX940-NEXT: ; use s[8:9] 18732; GFX940-NEXT: ;;#ASMEND 18733; GFX940-NEXT: s_setpc_b64 s[30:31] 18734 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18735 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4> 18736 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18737 ret void 18738} 18739 18740define void @s_shuffle_v4i16_v4i16__2_4_4_4() { 18741; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_4_4_4: 18742; GFX900: ; %bb.0: 18743; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18744; GFX900-NEXT: ;;#ASMSTART 18745; GFX900-NEXT: ; def s[4:5] 18746; GFX900-NEXT: ;;#ASMEND 18747; GFX900-NEXT: s_mov_b32 s8, s5 18748; GFX900-NEXT: ;;#ASMSTART 18749; GFX900-NEXT: ; use s[8:9] 18750; GFX900-NEXT: ;;#ASMEND 18751; GFX900-NEXT: s_setpc_b64 s[30:31] 18752; 18753; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_4_4_4: 18754; GFX90A: ; %bb.0: 18755; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18756; GFX90A-NEXT: ;;#ASMSTART 18757; GFX90A-NEXT: ; def s[4:5] 18758; GFX90A-NEXT: ;;#ASMEND 18759; GFX90A-NEXT: s_mov_b32 s8, s5 18760; GFX90A-NEXT: ;;#ASMSTART 18761; GFX90A-NEXT: ; use s[8:9] 18762; GFX90A-NEXT: ;;#ASMEND 18763; GFX90A-NEXT: s_setpc_b64 s[30:31] 18764; 18765; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_4_4_4: 18766; GFX940: ; %bb.0: 18767; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18768; GFX940-NEXT: ;;#ASMSTART 18769; GFX940-NEXT: ; def s[0:1] 18770; GFX940-NEXT: ;;#ASMEND 18771; GFX940-NEXT: s_mov_b32 s8, s1 18772; GFX940-NEXT: ;;#ASMSTART 18773; GFX940-NEXT: ; use s[8:9] 18774; GFX940-NEXT: ;;#ASMEND 18775; GFX940-NEXT: s_setpc_b64 s[30:31] 18776 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18777 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4> 18778 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18779 ret void 18780} 18781 18782define void @s_shuffle_v4i16_v4i16__3_4_4_4() { 18783; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_4_4_4: 18784; GFX900: ; %bb.0: 18785; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18786; GFX900-NEXT: ;;#ASMSTART 18787; GFX900-NEXT: ; def s[4:5] 18788; GFX900-NEXT: ;;#ASMEND 18789; GFX900-NEXT: s_lshr_b32 s8, s5, 16 18790; GFX900-NEXT: ;;#ASMSTART 18791; GFX900-NEXT: ; use s[8:9] 18792; GFX900-NEXT: ;;#ASMEND 18793; GFX900-NEXT: s_setpc_b64 s[30:31] 18794; 18795; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_4_4_4: 18796; GFX90A: ; %bb.0: 18797; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18798; GFX90A-NEXT: ;;#ASMSTART 18799; GFX90A-NEXT: ; def s[4:5] 18800; GFX90A-NEXT: ;;#ASMEND 18801; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 18802; GFX90A-NEXT: ;;#ASMSTART 18803; GFX90A-NEXT: ; use s[8:9] 18804; GFX90A-NEXT: ;;#ASMEND 18805; GFX90A-NEXT: s_setpc_b64 s[30:31] 18806; 18807; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_4_4_4: 18808; GFX940: ; %bb.0: 18809; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18810; GFX940-NEXT: ;;#ASMSTART 18811; GFX940-NEXT: ; def s[0:1] 18812; GFX940-NEXT: ;;#ASMEND 18813; GFX940-NEXT: s_lshr_b32 s8, s1, 16 18814; GFX940-NEXT: ;;#ASMSTART 18815; GFX940-NEXT: ; use s[8:9] 18816; GFX940-NEXT: ;;#ASMEND 18817; GFX940-NEXT: s_setpc_b64 s[30:31] 18818 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18819 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4> 18820 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18821 ret void 18822} 18823 18824define void @s_shuffle_v4i16_v4i16__4_4_4_4() { 18825; GFX9-LABEL: s_shuffle_v4i16_v4i16__4_4_4_4: 18826; GFX9: ; %bb.0: 18827; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18828; GFX9-NEXT: ;;#ASMSTART 18829; GFX9-NEXT: ; use s[8:9] 18830; GFX9-NEXT: ;;#ASMEND 18831; GFX9-NEXT: s_setpc_b64 s[30:31] 18832 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18833 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4> 18834 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18835 ret void 18836} 18837 18838define void @s_shuffle_v4i16_v4i16__5_4_4_4() { 18839; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_4_4_4: 18840; GFX900: ; %bb.0: 18841; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18842; GFX900-NEXT: ;;#ASMSTART 18843; GFX900-NEXT: ; def s[4:5] 18844; GFX900-NEXT: ;;#ASMEND 18845; GFX900-NEXT: s_lshr_b32 s5, s4, 16 18846; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 18847; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 18848; GFX900-NEXT: ;;#ASMSTART 18849; GFX900-NEXT: ; use s[8:9] 18850; GFX900-NEXT: ;;#ASMEND 18851; GFX900-NEXT: s_setpc_b64 s[30:31] 18852; 18853; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_4_4_4: 18854; GFX90A: ; %bb.0: 18855; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18856; GFX90A-NEXT: ;;#ASMSTART 18857; GFX90A-NEXT: ; def s[4:5] 18858; GFX90A-NEXT: ;;#ASMEND 18859; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 18860; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 18861; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 18862; GFX90A-NEXT: ;;#ASMSTART 18863; GFX90A-NEXT: ; use s[8:9] 18864; GFX90A-NEXT: ;;#ASMEND 18865; GFX90A-NEXT: s_setpc_b64 s[30:31] 18866; 18867; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_4_4_4: 18868; GFX940: ; %bb.0: 18869; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18870; GFX940-NEXT: ;;#ASMSTART 18871; GFX940-NEXT: ; def s[0:1] 18872; GFX940-NEXT: ;;#ASMEND 18873; GFX940-NEXT: s_lshr_b32 s1, s0, 16 18874; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 18875; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 18876; GFX940-NEXT: ;;#ASMSTART 18877; GFX940-NEXT: ; use s[8:9] 18878; GFX940-NEXT: ;;#ASMEND 18879; GFX940-NEXT: s_setpc_b64 s[30:31] 18880 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18881 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18882 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4> 18883 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18884 ret void 18885} 18886 18887define void @s_shuffle_v4i16_v4i16__6_4_4_4() { 18888; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_4_4_4: 18889; GFX900: ; %bb.0: 18890; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18891; GFX900-NEXT: ;;#ASMSTART 18892; GFX900-NEXT: ; def s[4:5] 18893; GFX900-NEXT: ;;#ASMEND 18894; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 18895; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 18896; GFX900-NEXT: ;;#ASMSTART 18897; GFX900-NEXT: ; use s[8:9] 18898; GFX900-NEXT: ;;#ASMEND 18899; GFX900-NEXT: s_setpc_b64 s[30:31] 18900; 18901; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_4_4_4: 18902; GFX90A: ; %bb.0: 18903; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18904; GFX90A-NEXT: ;;#ASMSTART 18905; GFX90A-NEXT: ; def s[4:5] 18906; GFX90A-NEXT: ;;#ASMEND 18907; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 18908; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 18909; GFX90A-NEXT: ;;#ASMSTART 18910; GFX90A-NEXT: ; use s[8:9] 18911; GFX90A-NEXT: ;;#ASMEND 18912; GFX90A-NEXT: s_setpc_b64 s[30:31] 18913; 18914; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_4_4_4: 18915; GFX940: ; %bb.0: 18916; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18917; GFX940-NEXT: ;;#ASMSTART 18918; GFX940-NEXT: ; def s[0:1] 18919; GFX940-NEXT: ;;#ASMEND 18920; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 18921; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 18922; GFX940-NEXT: ;;#ASMSTART 18923; GFX940-NEXT: ; use s[8:9] 18924; GFX940-NEXT: ;;#ASMEND 18925; GFX940-NEXT: s_setpc_b64 s[30:31] 18926 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18927 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18928 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4> 18929 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18930 ret void 18931} 18932 18933define void @s_shuffle_v4i16_v4i16__7_4_4_4() { 18934; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_4_4: 18935; GFX900: ; %bb.0: 18936; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18937; GFX900-NEXT: ;;#ASMSTART 18938; GFX900-NEXT: ; def s[4:5] 18939; GFX900-NEXT: ;;#ASMEND 18940; GFX900-NEXT: s_lshr_b32 s5, s5, 16 18941; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 18942; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 18943; GFX900-NEXT: ;;#ASMSTART 18944; GFX900-NEXT: ; use s[8:9] 18945; GFX900-NEXT: ;;#ASMEND 18946; GFX900-NEXT: s_setpc_b64 s[30:31] 18947; 18948; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_4_4: 18949; GFX90A: ; %bb.0: 18950; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18951; GFX90A-NEXT: ;;#ASMSTART 18952; GFX90A-NEXT: ; def s[4:5] 18953; GFX90A-NEXT: ;;#ASMEND 18954; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 18955; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 18956; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 18957; GFX90A-NEXT: ;;#ASMSTART 18958; GFX90A-NEXT: ; use s[8:9] 18959; GFX90A-NEXT: ;;#ASMEND 18960; GFX90A-NEXT: s_setpc_b64 s[30:31] 18961; 18962; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_4_4: 18963; GFX940: ; %bb.0: 18964; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18965; GFX940-NEXT: ;;#ASMSTART 18966; GFX940-NEXT: ; def s[0:1] 18967; GFX940-NEXT: ;;#ASMEND 18968; GFX940-NEXT: s_lshr_b32 s1, s1, 16 18969; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 18970; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 18971; GFX940-NEXT: ;;#ASMSTART 18972; GFX940-NEXT: ; use s[8:9] 18973; GFX940-NEXT: ;;#ASMEND 18974; GFX940-NEXT: s_setpc_b64 s[30:31] 18975 %vec0 = call <4 x i16> asm "; def $0", "=s"() 18976 %vec1 = call <4 x i16> asm "; def $0", "=s"() 18977 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4> 18978 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 18979 ret void 18980} 18981 18982define void @s_shuffle_v4i16_v4i16__7_u_4_4() { 18983; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_4_4: 18984; GFX900: ; %bb.0: 18985; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18986; GFX900-NEXT: ;;#ASMSTART 18987; GFX900-NEXT: ; def s[4:5] 18988; GFX900-NEXT: ;;#ASMEND 18989; GFX900-NEXT: s_lshr_b32 s8, s5, 16 18990; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 18991; GFX900-NEXT: ;;#ASMSTART 18992; GFX900-NEXT: ; use s[8:9] 18993; GFX900-NEXT: ;;#ASMEND 18994; GFX900-NEXT: s_setpc_b64 s[30:31] 18995; 18996; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_4_4: 18997; GFX90A: ; %bb.0: 18998; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18999; GFX90A-NEXT: ;;#ASMSTART 19000; GFX90A-NEXT: ; def s[4:5] 19001; GFX90A-NEXT: ;;#ASMEND 19002; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 19003; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 19004; GFX90A-NEXT: ;;#ASMSTART 19005; GFX90A-NEXT: ; use s[8:9] 19006; GFX90A-NEXT: ;;#ASMEND 19007; GFX90A-NEXT: s_setpc_b64 s[30:31] 19008; 19009; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_4_4: 19010; GFX940: ; %bb.0: 19011; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19012; GFX940-NEXT: ;;#ASMSTART 19013; GFX940-NEXT: ; def s[0:1] 19014; GFX940-NEXT: ;;#ASMEND 19015; GFX940-NEXT: s_lshr_b32 s8, s1, 16 19016; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 19017; GFX940-NEXT: ;;#ASMSTART 19018; GFX940-NEXT: ; use s[8:9] 19019; GFX940-NEXT: ;;#ASMEND 19020; GFX940-NEXT: s_setpc_b64 s[30:31] 19021 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19022 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19023 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4> 19024 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19025 ret void 19026} 19027 19028define void @s_shuffle_v4i16_v4i16__7_0_4_4() { 19029; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_4_4: 19030; GFX900: ; %bb.0: 19031; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19032; GFX900-NEXT: ;;#ASMSTART 19033; GFX900-NEXT: ; def s[4:5] 19034; GFX900-NEXT: ;;#ASMEND 19035; GFX900-NEXT: ;;#ASMSTART 19036; GFX900-NEXT: ; def s[6:7] 19037; GFX900-NEXT: ;;#ASMEND 19038; GFX900-NEXT: s_lshr_b32 s5, s7, 16 19039; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 19040; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s6 19041; GFX900-NEXT: ;;#ASMSTART 19042; GFX900-NEXT: ; use s[8:9] 19043; GFX900-NEXT: ;;#ASMEND 19044; GFX900-NEXT: s_setpc_b64 s[30:31] 19045; 19046; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_4_4: 19047; GFX90A: ; %bb.0: 19048; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19049; GFX90A-NEXT: ;;#ASMSTART 19050; GFX90A-NEXT: ; def s[4:5] 19051; GFX90A-NEXT: ;;#ASMEND 19052; GFX90A-NEXT: ;;#ASMSTART 19053; GFX90A-NEXT: ; def s[6:7] 19054; GFX90A-NEXT: ;;#ASMEND 19055; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 19056; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 19057; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s6 19058; GFX90A-NEXT: ;;#ASMSTART 19059; GFX90A-NEXT: ; use s[8:9] 19060; GFX90A-NEXT: ;;#ASMEND 19061; GFX90A-NEXT: s_setpc_b64 s[30:31] 19062; 19063; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_4_4: 19064; GFX940: ; %bb.0: 19065; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19066; GFX940-NEXT: ;;#ASMSTART 19067; GFX940-NEXT: ; def s[0:1] 19068; GFX940-NEXT: ;;#ASMEND 19069; GFX940-NEXT: ;;#ASMSTART 19070; GFX940-NEXT: ; def s[2:3] 19071; GFX940-NEXT: ;;#ASMEND 19072; GFX940-NEXT: s_lshr_b32 s1, s3, 16 19073; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 19074; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 19075; GFX940-NEXT: ;;#ASMSTART 19076; GFX940-NEXT: ; use s[8:9] 19077; GFX940-NEXT: ;;#ASMEND 19078; GFX940-NEXT: s_setpc_b64 s[30:31] 19079 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19080 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19081 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4> 19082 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19083 ret void 19084} 19085 19086define void @s_shuffle_v4i16_v4i16__7_1_4_4() { 19087; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_4_4: 19088; GFX900: ; %bb.0: 19089; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19090; GFX900-NEXT: ;;#ASMSTART 19091; GFX900-NEXT: ; def s[4:5] 19092; GFX900-NEXT: ;;#ASMEND 19093; GFX900-NEXT: ;;#ASMSTART 19094; GFX900-NEXT: ; def s[6:7] 19095; GFX900-NEXT: ;;#ASMEND 19096; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 19097; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s6 19098; GFX900-NEXT: ;;#ASMSTART 19099; GFX900-NEXT: ; use s[8:9] 19100; GFX900-NEXT: ;;#ASMEND 19101; GFX900-NEXT: s_setpc_b64 s[30:31] 19102; 19103; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_4_4: 19104; GFX90A: ; %bb.0: 19105; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19106; GFX90A-NEXT: ;;#ASMSTART 19107; GFX90A-NEXT: ; def s[4:5] 19108; GFX90A-NEXT: ;;#ASMEND 19109; GFX90A-NEXT: ;;#ASMSTART 19110; GFX90A-NEXT: ; def s[6:7] 19111; GFX90A-NEXT: ;;#ASMEND 19112; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 19113; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s6 19114; GFX90A-NEXT: ;;#ASMSTART 19115; GFX90A-NEXT: ; use s[8:9] 19116; GFX90A-NEXT: ;;#ASMEND 19117; GFX90A-NEXT: s_setpc_b64 s[30:31] 19118; 19119; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_4_4: 19120; GFX940: ; %bb.0: 19121; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19122; GFX940-NEXT: ;;#ASMSTART 19123; GFX940-NEXT: ; def s[0:1] 19124; GFX940-NEXT: ;;#ASMEND 19125; GFX940-NEXT: ;;#ASMSTART 19126; GFX940-NEXT: ; def s[2:3] 19127; GFX940-NEXT: ;;#ASMEND 19128; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 19129; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 19130; GFX940-NEXT: ;;#ASMSTART 19131; GFX940-NEXT: ; use s[8:9] 19132; GFX940-NEXT: ;;#ASMEND 19133; GFX940-NEXT: s_setpc_b64 s[30:31] 19134 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19135 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19136 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4> 19137 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19138 ret void 19139} 19140 19141define void @s_shuffle_v4i16_v4i16__7_2_4_4() { 19142; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_4_4: 19143; GFX900: ; %bb.0: 19144; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19145; GFX900-NEXT: ;;#ASMSTART 19146; GFX900-NEXT: ; def s[4:5] 19147; GFX900-NEXT: ;;#ASMEND 19148; GFX900-NEXT: ;;#ASMSTART 19149; GFX900-NEXT: ; def s[6:7] 19150; GFX900-NEXT: ;;#ASMEND 19151; GFX900-NEXT: s_lshr_b32 s4, s7, 16 19152; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 19153; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s6 19154; GFX900-NEXT: ;;#ASMSTART 19155; GFX900-NEXT: ; use s[8:9] 19156; GFX900-NEXT: ;;#ASMEND 19157; GFX900-NEXT: s_setpc_b64 s[30:31] 19158; 19159; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_4_4: 19160; GFX90A: ; %bb.0: 19161; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19162; GFX90A-NEXT: ;;#ASMSTART 19163; GFX90A-NEXT: ; def s[4:5] 19164; GFX90A-NEXT: ;;#ASMEND 19165; GFX90A-NEXT: ;;#ASMSTART 19166; GFX90A-NEXT: ; def s[6:7] 19167; GFX90A-NEXT: ;;#ASMEND 19168; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 19169; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 19170; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s6 19171; GFX90A-NEXT: ;;#ASMSTART 19172; GFX90A-NEXT: ; use s[8:9] 19173; GFX90A-NEXT: ;;#ASMEND 19174; GFX90A-NEXT: s_setpc_b64 s[30:31] 19175; 19176; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_4_4: 19177; GFX940: ; %bb.0: 19178; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19179; GFX940-NEXT: ;;#ASMSTART 19180; GFX940-NEXT: ; def s[0:1] 19181; GFX940-NEXT: ;;#ASMEND 19182; GFX940-NEXT: ;;#ASMSTART 19183; GFX940-NEXT: ; def s[2:3] 19184; GFX940-NEXT: ;;#ASMEND 19185; GFX940-NEXT: s_lshr_b32 s0, s3, 16 19186; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 19187; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 19188; GFX940-NEXT: ;;#ASMSTART 19189; GFX940-NEXT: ; use s[8:9] 19190; GFX940-NEXT: ;;#ASMEND 19191; GFX940-NEXT: s_setpc_b64 s[30:31] 19192 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19193 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19194 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4> 19195 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19196 ret void 19197} 19198 19199define void @s_shuffle_v4i16_v4i16__7_3_4_4() { 19200; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_4_4: 19201; GFX900: ; %bb.0: 19202; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19203; GFX900-NEXT: ;;#ASMSTART 19204; GFX900-NEXT: ; def s[4:5] 19205; GFX900-NEXT: ;;#ASMEND 19206; GFX900-NEXT: ;;#ASMSTART 19207; GFX900-NEXT: ; def s[6:7] 19208; GFX900-NEXT: ;;#ASMEND 19209; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 19210; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s6 19211; GFX900-NEXT: ;;#ASMSTART 19212; GFX900-NEXT: ; use s[8:9] 19213; GFX900-NEXT: ;;#ASMEND 19214; GFX900-NEXT: s_setpc_b64 s[30:31] 19215; 19216; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_4_4: 19217; GFX90A: ; %bb.0: 19218; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19219; GFX90A-NEXT: ;;#ASMSTART 19220; GFX90A-NEXT: ; def s[4:5] 19221; GFX90A-NEXT: ;;#ASMEND 19222; GFX90A-NEXT: ;;#ASMSTART 19223; GFX90A-NEXT: ; def s[6:7] 19224; GFX90A-NEXT: ;;#ASMEND 19225; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 19226; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s6 19227; GFX90A-NEXT: ;;#ASMSTART 19228; GFX90A-NEXT: ; use s[8:9] 19229; GFX90A-NEXT: ;;#ASMEND 19230; GFX90A-NEXT: s_setpc_b64 s[30:31] 19231; 19232; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_4_4: 19233; GFX940: ; %bb.0: 19234; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19235; GFX940-NEXT: ;;#ASMSTART 19236; GFX940-NEXT: ; def s[0:1] 19237; GFX940-NEXT: ;;#ASMEND 19238; GFX940-NEXT: ;;#ASMSTART 19239; GFX940-NEXT: ; def s[2:3] 19240; GFX940-NEXT: ;;#ASMEND 19241; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 19242; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s2 19243; GFX940-NEXT: ;;#ASMSTART 19244; GFX940-NEXT: ; use s[8:9] 19245; GFX940-NEXT: ;;#ASMEND 19246; GFX940-NEXT: s_setpc_b64 s[30:31] 19247 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19248 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19249 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4> 19250 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19251 ret void 19252} 19253 19254define void @s_shuffle_v4i16_v4i16__7_5_4_4() { 19255; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_4_4: 19256; GFX900: ; %bb.0: 19257; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19258; GFX900-NEXT: ;;#ASMSTART 19259; GFX900-NEXT: ; def s[4:5] 19260; GFX900-NEXT: ;;#ASMEND 19261; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 19262; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 19263; GFX900-NEXT: ;;#ASMSTART 19264; GFX900-NEXT: ; use s[8:9] 19265; GFX900-NEXT: ;;#ASMEND 19266; GFX900-NEXT: s_setpc_b64 s[30:31] 19267; 19268; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_4_4: 19269; GFX90A: ; %bb.0: 19270; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19271; GFX90A-NEXT: ;;#ASMSTART 19272; GFX90A-NEXT: ; def s[4:5] 19273; GFX90A-NEXT: ;;#ASMEND 19274; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 19275; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 19276; GFX90A-NEXT: ;;#ASMSTART 19277; GFX90A-NEXT: ; use s[8:9] 19278; GFX90A-NEXT: ;;#ASMEND 19279; GFX90A-NEXT: s_setpc_b64 s[30:31] 19280; 19281; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_4_4: 19282; GFX940: ; %bb.0: 19283; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19284; GFX940-NEXT: ;;#ASMSTART 19285; GFX940-NEXT: ; def s[0:1] 19286; GFX940-NEXT: ;;#ASMEND 19287; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 19288; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 19289; GFX940-NEXT: ;;#ASMSTART 19290; GFX940-NEXT: ; use s[8:9] 19291; GFX940-NEXT: ;;#ASMEND 19292; GFX940-NEXT: s_setpc_b64 s[30:31] 19293 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19294 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19295 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4> 19296 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19297 ret void 19298} 19299 19300define void @s_shuffle_v4i16_v4i16__7_6_4_4() { 19301; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_4_4: 19302; GFX900: ; %bb.0: 19303; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19304; GFX900-NEXT: ;;#ASMSTART 19305; GFX900-NEXT: ; def s[4:5] 19306; GFX900-NEXT: ;;#ASMEND 19307; GFX900-NEXT: s_lshr_b32 s6, s5, 16 19308; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 19309; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 19310; GFX900-NEXT: ;;#ASMSTART 19311; GFX900-NEXT: ; use s[8:9] 19312; GFX900-NEXT: ;;#ASMEND 19313; GFX900-NEXT: s_setpc_b64 s[30:31] 19314; 19315; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_4_4: 19316; GFX90A: ; %bb.0: 19317; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19318; GFX90A-NEXT: ;;#ASMSTART 19319; GFX90A-NEXT: ; def s[4:5] 19320; GFX90A-NEXT: ;;#ASMEND 19321; GFX90A-NEXT: s_lshr_b32 s6, s5, 16 19322; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 19323; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 19324; GFX90A-NEXT: ;;#ASMSTART 19325; GFX90A-NEXT: ; use s[8:9] 19326; GFX90A-NEXT: ;;#ASMEND 19327; GFX90A-NEXT: s_setpc_b64 s[30:31] 19328; 19329; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_4_4: 19330; GFX940: ; %bb.0: 19331; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19332; GFX940-NEXT: ;;#ASMSTART 19333; GFX940-NEXT: ; def s[0:1] 19334; GFX940-NEXT: ;;#ASMEND 19335; GFX940-NEXT: s_lshr_b32 s2, s1, 16 19336; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 19337; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 19338; GFX940-NEXT: ;;#ASMSTART 19339; GFX940-NEXT: ; use s[8:9] 19340; GFX940-NEXT: ;;#ASMEND 19341; GFX940-NEXT: s_setpc_b64 s[30:31] 19342 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19343 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19344 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4> 19345 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19346 ret void 19347} 19348 19349define void @s_shuffle_v4i16_v4i16__7_7_4_4() { 19350; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_4: 19351; GFX900: ; %bb.0: 19352; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19353; GFX900-NEXT: ;;#ASMSTART 19354; GFX900-NEXT: ; def s[4:5] 19355; GFX900-NEXT: ;;#ASMEND 19356; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s4 19357; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 19358; GFX900-NEXT: ;;#ASMSTART 19359; GFX900-NEXT: ; use s[8:9] 19360; GFX900-NEXT: ;;#ASMEND 19361; GFX900-NEXT: s_setpc_b64 s[30:31] 19362; 19363; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_4: 19364; GFX90A: ; %bb.0: 19365; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19366; GFX90A-NEXT: ;;#ASMSTART 19367; GFX90A-NEXT: ; def s[4:5] 19368; GFX90A-NEXT: ;;#ASMEND 19369; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s4 19370; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 19371; GFX90A-NEXT: ;;#ASMSTART 19372; GFX90A-NEXT: ; use s[8:9] 19373; GFX90A-NEXT: ;;#ASMEND 19374; GFX90A-NEXT: s_setpc_b64 s[30:31] 19375; 19376; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_4: 19377; GFX940: ; %bb.0: 19378; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19379; GFX940-NEXT: ;;#ASMSTART 19380; GFX940-NEXT: ; def s[0:1] 19381; GFX940-NEXT: ;;#ASMEND 19382; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s0 19383; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 19384; GFX940-NEXT: ;;#ASMSTART 19385; GFX940-NEXT: ; use s[8:9] 19386; GFX940-NEXT: ;;#ASMEND 19387; GFX940-NEXT: s_setpc_b64 s[30:31] 19388 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19389 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19390 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4> 19391 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19392 ret void 19393} 19394 19395define void @s_shuffle_v4i16_v4i16__7_7_u_4() { 19396; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_4: 19397; GFX900: ; %bb.0: 19398; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19399; GFX900-NEXT: ;;#ASMSTART 19400; GFX900-NEXT: ; def s[4:5] 19401; GFX900-NEXT: ;;#ASMEND 19402; GFX900-NEXT: s_lshl_b32 s9, s4, 16 19403; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 19404; GFX900-NEXT: ;;#ASMSTART 19405; GFX900-NEXT: ; use s[8:9] 19406; GFX900-NEXT: ;;#ASMEND 19407; GFX900-NEXT: s_setpc_b64 s[30:31] 19408; 19409; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_4: 19410; GFX90A: ; %bb.0: 19411; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19412; GFX90A-NEXT: ;;#ASMSTART 19413; GFX90A-NEXT: ; def s[4:5] 19414; GFX90A-NEXT: ;;#ASMEND 19415; GFX90A-NEXT: s_lshl_b32 s9, s4, 16 19416; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 19417; GFX90A-NEXT: ;;#ASMSTART 19418; GFX90A-NEXT: ; use s[8:9] 19419; GFX90A-NEXT: ;;#ASMEND 19420; GFX90A-NEXT: s_setpc_b64 s[30:31] 19421; 19422; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_4: 19423; GFX940: ; %bb.0: 19424; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19425; GFX940-NEXT: ;;#ASMSTART 19426; GFX940-NEXT: ; def s[0:1] 19427; GFX940-NEXT: ;;#ASMEND 19428; GFX940-NEXT: s_lshl_b32 s9, s0, 16 19429; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 19430; GFX940-NEXT: ;;#ASMSTART 19431; GFX940-NEXT: ; use s[8:9] 19432; GFX940-NEXT: ;;#ASMEND 19433; GFX940-NEXT: s_setpc_b64 s[30:31] 19434 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19435 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19436 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4> 19437 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19438 ret void 19439} 19440 19441define void @s_shuffle_v4i16_v4i16__7_7_0_4() { 19442; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_4: 19443; GFX900: ; %bb.0: 19444; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19445; GFX900-NEXT: ;;#ASMSTART 19446; GFX900-NEXT: ; def s[4:5] 19447; GFX900-NEXT: ;;#ASMEND 19448; GFX900-NEXT: ;;#ASMSTART 19449; GFX900-NEXT: ; def s[6:7] 19450; GFX900-NEXT: ;;#ASMEND 19451; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s6 19452; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 19453; GFX900-NEXT: ;;#ASMSTART 19454; GFX900-NEXT: ; use s[8:9] 19455; GFX900-NEXT: ;;#ASMEND 19456; GFX900-NEXT: s_setpc_b64 s[30:31] 19457; 19458; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_4: 19459; GFX90A: ; %bb.0: 19460; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19461; GFX90A-NEXT: ;;#ASMSTART 19462; GFX90A-NEXT: ; def s[4:5] 19463; GFX90A-NEXT: ;;#ASMEND 19464; GFX90A-NEXT: ;;#ASMSTART 19465; GFX90A-NEXT: ; def s[6:7] 19466; GFX90A-NEXT: ;;#ASMEND 19467; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s6 19468; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 19469; GFX90A-NEXT: ;;#ASMSTART 19470; GFX90A-NEXT: ; use s[8:9] 19471; GFX90A-NEXT: ;;#ASMEND 19472; GFX90A-NEXT: s_setpc_b64 s[30:31] 19473; 19474; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_4: 19475; GFX940: ; %bb.0: 19476; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19477; GFX940-NEXT: ;;#ASMSTART 19478; GFX940-NEXT: ; def s[0:1] 19479; GFX940-NEXT: ;;#ASMEND 19480; GFX940-NEXT: ;;#ASMSTART 19481; GFX940-NEXT: ; def s[2:3] 19482; GFX940-NEXT: ;;#ASMEND 19483; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 19484; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 19485; GFX940-NEXT: ;;#ASMSTART 19486; GFX940-NEXT: ; use s[8:9] 19487; GFX940-NEXT: ;;#ASMEND 19488; GFX940-NEXT: s_setpc_b64 s[30:31] 19489 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19490 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19491 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4> 19492 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19493 ret void 19494} 19495 19496define void @s_shuffle_v4i16_v4i16__7_7_1_4() { 19497; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_4: 19498; GFX900: ; %bb.0: 19499; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19500; GFX900-NEXT: ;;#ASMSTART 19501; GFX900-NEXT: ; def s[4:5] 19502; GFX900-NEXT: ;;#ASMEND 19503; GFX900-NEXT: s_lshr_b32 s4, s4, 16 19504; GFX900-NEXT: ;;#ASMSTART 19505; GFX900-NEXT: ; def s[6:7] 19506; GFX900-NEXT: ;;#ASMEND 19507; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s6 19508; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 19509; GFX900-NEXT: ;;#ASMSTART 19510; GFX900-NEXT: ; use s[8:9] 19511; GFX900-NEXT: ;;#ASMEND 19512; GFX900-NEXT: s_setpc_b64 s[30:31] 19513; 19514; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_4: 19515; GFX90A: ; %bb.0: 19516; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19517; GFX90A-NEXT: ;;#ASMSTART 19518; GFX90A-NEXT: ; def s[4:5] 19519; GFX90A-NEXT: ;;#ASMEND 19520; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 19521; GFX90A-NEXT: ;;#ASMSTART 19522; GFX90A-NEXT: ; def s[6:7] 19523; GFX90A-NEXT: ;;#ASMEND 19524; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s6 19525; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 19526; GFX90A-NEXT: ;;#ASMSTART 19527; GFX90A-NEXT: ; use s[8:9] 19528; GFX90A-NEXT: ;;#ASMEND 19529; GFX90A-NEXT: s_setpc_b64 s[30:31] 19530; 19531; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_4: 19532; GFX940: ; %bb.0: 19533; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19534; GFX940-NEXT: ;;#ASMSTART 19535; GFX940-NEXT: ; def s[0:1] 19536; GFX940-NEXT: ;;#ASMEND 19537; GFX940-NEXT: s_lshr_b32 s0, s0, 16 19538; GFX940-NEXT: ;;#ASMSTART 19539; GFX940-NEXT: ; def s[2:3] 19540; GFX940-NEXT: ;;#ASMEND 19541; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 19542; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 19543; GFX940-NEXT: ;;#ASMSTART 19544; GFX940-NEXT: ; use s[8:9] 19545; GFX940-NEXT: ;;#ASMEND 19546; GFX940-NEXT: s_setpc_b64 s[30:31] 19547 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19548 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19549 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4> 19550 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19551 ret void 19552} 19553 19554define void @s_shuffle_v4i16_v4i16__7_7_2_4() { 19555; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_4: 19556; GFX900: ; %bb.0: 19557; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19558; GFX900-NEXT: ;;#ASMSTART 19559; GFX900-NEXT: ; def s[4:5] 19560; GFX900-NEXT: ;;#ASMEND 19561; GFX900-NEXT: ;;#ASMSTART 19562; GFX900-NEXT: ; def s[6:7] 19563; GFX900-NEXT: ;;#ASMEND 19564; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s6 19565; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 19566; GFX900-NEXT: ;;#ASMSTART 19567; GFX900-NEXT: ; use s[8:9] 19568; GFX900-NEXT: ;;#ASMEND 19569; GFX900-NEXT: s_setpc_b64 s[30:31] 19570; 19571; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_4: 19572; GFX90A: ; %bb.0: 19573; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19574; GFX90A-NEXT: ;;#ASMSTART 19575; GFX90A-NEXT: ; def s[4:5] 19576; GFX90A-NEXT: ;;#ASMEND 19577; GFX90A-NEXT: ;;#ASMSTART 19578; GFX90A-NEXT: ; def s[6:7] 19579; GFX90A-NEXT: ;;#ASMEND 19580; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s6 19581; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 19582; GFX90A-NEXT: ;;#ASMSTART 19583; GFX90A-NEXT: ; use s[8:9] 19584; GFX90A-NEXT: ;;#ASMEND 19585; GFX90A-NEXT: s_setpc_b64 s[30:31] 19586; 19587; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_4: 19588; GFX940: ; %bb.0: 19589; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19590; GFX940-NEXT: ;;#ASMSTART 19591; GFX940-NEXT: ; def s[0:1] 19592; GFX940-NEXT: ;;#ASMEND 19593; GFX940-NEXT: ;;#ASMSTART 19594; GFX940-NEXT: ; def s[2:3] 19595; GFX940-NEXT: ;;#ASMEND 19596; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s2 19597; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 19598; GFX940-NEXT: ;;#ASMSTART 19599; GFX940-NEXT: ; use s[8:9] 19600; GFX940-NEXT: ;;#ASMEND 19601; GFX940-NEXT: s_setpc_b64 s[30:31] 19602 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19603 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19604 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4> 19605 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19606 ret void 19607} 19608 19609define void @s_shuffle_v4i16_v4i16__7_7_3_4() { 19610; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_4: 19611; GFX900: ; %bb.0: 19612; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19613; GFX900-NEXT: ;;#ASMSTART 19614; GFX900-NEXT: ; def s[4:5] 19615; GFX900-NEXT: ;;#ASMEND 19616; GFX900-NEXT: s_lshr_b32 s4, s5, 16 19617; GFX900-NEXT: ;;#ASMSTART 19618; GFX900-NEXT: ; def s[6:7] 19619; GFX900-NEXT: ;;#ASMEND 19620; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s6 19621; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 19622; GFX900-NEXT: ;;#ASMSTART 19623; GFX900-NEXT: ; use s[8:9] 19624; GFX900-NEXT: ;;#ASMEND 19625; GFX900-NEXT: s_setpc_b64 s[30:31] 19626; 19627; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_4: 19628; GFX90A: ; %bb.0: 19629; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19630; GFX90A-NEXT: ;;#ASMSTART 19631; GFX90A-NEXT: ; def s[4:5] 19632; GFX90A-NEXT: ;;#ASMEND 19633; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 19634; GFX90A-NEXT: ;;#ASMSTART 19635; GFX90A-NEXT: ; def s[6:7] 19636; GFX90A-NEXT: ;;#ASMEND 19637; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s6 19638; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 19639; GFX90A-NEXT: ;;#ASMSTART 19640; GFX90A-NEXT: ; use s[8:9] 19641; GFX90A-NEXT: ;;#ASMEND 19642; GFX90A-NEXT: s_setpc_b64 s[30:31] 19643; 19644; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_4: 19645; GFX940: ; %bb.0: 19646; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19647; GFX940-NEXT: ;;#ASMSTART 19648; GFX940-NEXT: ; def s[0:1] 19649; GFX940-NEXT: ;;#ASMEND 19650; GFX940-NEXT: s_lshr_b32 s0, s1, 16 19651; GFX940-NEXT: ;;#ASMSTART 19652; GFX940-NEXT: ; def s[2:3] 19653; GFX940-NEXT: ;;#ASMEND 19654; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s2 19655; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 19656; GFX940-NEXT: ;;#ASMSTART 19657; GFX940-NEXT: ; use s[8:9] 19658; GFX940-NEXT: ;;#ASMEND 19659; GFX940-NEXT: s_setpc_b64 s[30:31] 19660 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19661 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19662 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4> 19663 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19664 ret void 19665} 19666 19667define void @s_shuffle_v4i16_v4i16__7_7_5_4() { 19668; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_4: 19669; GFX900: ; %bb.0: 19670; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19671; GFX900-NEXT: ;;#ASMSTART 19672; GFX900-NEXT: ; def s[4:5] 19673; GFX900-NEXT: ;;#ASMEND 19674; GFX900-NEXT: s_lshr_b32 s6, s4, 16 19675; GFX900-NEXT: s_pack_ll_b32_b16 s9, s6, s4 19676; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 19677; GFX900-NEXT: ;;#ASMSTART 19678; GFX900-NEXT: ; use s[8:9] 19679; GFX900-NEXT: ;;#ASMEND 19680; GFX900-NEXT: s_setpc_b64 s[30:31] 19681; 19682; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_4: 19683; GFX90A: ; %bb.0: 19684; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19685; GFX90A-NEXT: ;;#ASMSTART 19686; GFX90A-NEXT: ; def s[4:5] 19687; GFX90A-NEXT: ;;#ASMEND 19688; GFX90A-NEXT: s_lshr_b32 s6, s4, 16 19689; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s6, s4 19690; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 19691; GFX90A-NEXT: ;;#ASMSTART 19692; GFX90A-NEXT: ; use s[8:9] 19693; GFX90A-NEXT: ;;#ASMEND 19694; GFX90A-NEXT: s_setpc_b64 s[30:31] 19695; 19696; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_4: 19697; GFX940: ; %bb.0: 19698; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19699; GFX940-NEXT: ;;#ASMSTART 19700; GFX940-NEXT: ; def s[0:1] 19701; GFX940-NEXT: ;;#ASMEND 19702; GFX940-NEXT: s_lshr_b32 s2, s0, 16 19703; GFX940-NEXT: s_pack_ll_b32_b16 s9, s2, s0 19704; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 19705; GFX940-NEXT: ;;#ASMSTART 19706; GFX940-NEXT: ; use s[8:9] 19707; GFX940-NEXT: ;;#ASMEND 19708; GFX940-NEXT: s_setpc_b64 s[30:31] 19709 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19710 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19711 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4> 19712 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19713 ret void 19714} 19715 19716define void @s_shuffle_v4i16_v4i16__7_7_6_4() { 19717; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_4: 19718; GFX900: ; %bb.0: 19719; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19720; GFX900-NEXT: ;;#ASMSTART 19721; GFX900-NEXT: ; def s[4:5] 19722; GFX900-NEXT: ;;#ASMEND 19723; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s4 19724; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 19725; GFX900-NEXT: ;;#ASMSTART 19726; GFX900-NEXT: ; use s[8:9] 19727; GFX900-NEXT: ;;#ASMEND 19728; GFX900-NEXT: s_setpc_b64 s[30:31] 19729; 19730; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_4: 19731; GFX90A: ; %bb.0: 19732; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19733; GFX90A-NEXT: ;;#ASMSTART 19734; GFX90A-NEXT: ; def s[4:5] 19735; GFX90A-NEXT: ;;#ASMEND 19736; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s4 19737; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 19738; GFX90A-NEXT: ;;#ASMSTART 19739; GFX90A-NEXT: ; use s[8:9] 19740; GFX90A-NEXT: ;;#ASMEND 19741; GFX90A-NEXT: s_setpc_b64 s[30:31] 19742; 19743; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_4: 19744; GFX940: ; %bb.0: 19745; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19746; GFX940-NEXT: ;;#ASMSTART 19747; GFX940-NEXT: ; def s[0:1] 19748; GFX940-NEXT: ;;#ASMEND 19749; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s0 19750; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 19751; GFX940-NEXT: ;;#ASMSTART 19752; GFX940-NEXT: ; use s[8:9] 19753; GFX940-NEXT: ;;#ASMEND 19754; GFX940-NEXT: s_setpc_b64 s[30:31] 19755 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19756 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19757 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4> 19758 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19759 ret void 19760} 19761 19762define void @s_shuffle_v4i16_v4i16__u_5_5_5() { 19763; GFX9-LABEL: s_shuffle_v4i16_v4i16__u_5_5_5: 19764; GFX9: ; %bb.0: 19765; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19766; GFX9-NEXT: ;;#ASMSTART 19767; GFX9-NEXT: ; def s[8:9] 19768; GFX9-NEXT: ;;#ASMEND 19769; GFX9-NEXT: s_pack_hh_b32_b16 s9, s8, s8 19770; GFX9-NEXT: ;;#ASMSTART 19771; GFX9-NEXT: ; use s[8:9] 19772; GFX9-NEXT: ;;#ASMEND 19773; GFX9-NEXT: s_setpc_b64 s[30:31] 19774 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19775 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19776 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5> 19777 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19778 ret void 19779} 19780 19781define void @s_shuffle_v4i16_v4i16__0_5_5_5() { 19782; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_5_5_5: 19783; GFX900: ; %bb.0: 19784; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19785; GFX900-NEXT: ;;#ASMSTART 19786; GFX900-NEXT: ; def s[4:5] 19787; GFX900-NEXT: ;;#ASMEND 19788; GFX900-NEXT: ;;#ASMSTART 19789; GFX900-NEXT: ; def s[6:7] 19790; GFX900-NEXT: ;;#ASMEND 19791; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s6 19792; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 19793; GFX900-NEXT: ;;#ASMSTART 19794; GFX900-NEXT: ; use s[8:9] 19795; GFX900-NEXT: ;;#ASMEND 19796; GFX900-NEXT: s_setpc_b64 s[30:31] 19797; 19798; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_5_5_5: 19799; GFX90A: ; %bb.0: 19800; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19801; GFX90A-NEXT: ;;#ASMSTART 19802; GFX90A-NEXT: ; def s[4:5] 19803; GFX90A-NEXT: ;;#ASMEND 19804; GFX90A-NEXT: ;;#ASMSTART 19805; GFX90A-NEXT: ; def s[6:7] 19806; GFX90A-NEXT: ;;#ASMEND 19807; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s6 19808; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 19809; GFX90A-NEXT: ;;#ASMSTART 19810; GFX90A-NEXT: ; use s[8:9] 19811; GFX90A-NEXT: ;;#ASMEND 19812; GFX90A-NEXT: s_setpc_b64 s[30:31] 19813; 19814; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_5_5_5: 19815; GFX940: ; %bb.0: 19816; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19817; GFX940-NEXT: ;;#ASMSTART 19818; GFX940-NEXT: ; def s[0:1] 19819; GFX940-NEXT: ;;#ASMEND 19820; GFX940-NEXT: ;;#ASMSTART 19821; GFX940-NEXT: ; def s[2:3] 19822; GFX940-NEXT: ;;#ASMEND 19823; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 19824; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 19825; GFX940-NEXT: ;;#ASMSTART 19826; GFX940-NEXT: ; use s[8:9] 19827; GFX940-NEXT: ;;#ASMEND 19828; GFX940-NEXT: s_setpc_b64 s[30:31] 19829 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19830 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19831 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5> 19832 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19833 ret void 19834} 19835 19836define void @s_shuffle_v4i16_v4i16__1_5_5_5() { 19837; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_5_5_5: 19838; GFX900: ; %bb.0: 19839; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19840; GFX900-NEXT: ;;#ASMSTART 19841; GFX900-NEXT: ; def s[4:5] 19842; GFX900-NEXT: ;;#ASMEND 19843; GFX900-NEXT: ;;#ASMSTART 19844; GFX900-NEXT: ; def s[6:7] 19845; GFX900-NEXT: ;;#ASMEND 19846; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s6 19847; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 19848; GFX900-NEXT: ;;#ASMSTART 19849; GFX900-NEXT: ; use s[8:9] 19850; GFX900-NEXT: ;;#ASMEND 19851; GFX900-NEXT: s_setpc_b64 s[30:31] 19852; 19853; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_5_5_5: 19854; GFX90A: ; %bb.0: 19855; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19856; GFX90A-NEXT: ;;#ASMSTART 19857; GFX90A-NEXT: ; def s[4:5] 19858; GFX90A-NEXT: ;;#ASMEND 19859; GFX90A-NEXT: ;;#ASMSTART 19860; GFX90A-NEXT: ; def s[6:7] 19861; GFX90A-NEXT: ;;#ASMEND 19862; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s6 19863; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 19864; GFX90A-NEXT: ;;#ASMSTART 19865; GFX90A-NEXT: ; use s[8:9] 19866; GFX90A-NEXT: ;;#ASMEND 19867; GFX90A-NEXT: s_setpc_b64 s[30:31] 19868; 19869; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_5_5_5: 19870; GFX940: ; %bb.0: 19871; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19872; GFX940-NEXT: ;;#ASMSTART 19873; GFX940-NEXT: ; def s[0:1] 19874; GFX940-NEXT: ;;#ASMEND 19875; GFX940-NEXT: ;;#ASMSTART 19876; GFX940-NEXT: ; def s[2:3] 19877; GFX940-NEXT: ;;#ASMEND 19878; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 19879; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 19880; GFX940-NEXT: ;;#ASMSTART 19881; GFX940-NEXT: ; use s[8:9] 19882; GFX940-NEXT: ;;#ASMEND 19883; GFX940-NEXT: s_setpc_b64 s[30:31] 19884 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19885 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19886 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5> 19887 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19888 ret void 19889} 19890 19891define void @s_shuffle_v4i16_v4i16__2_5_5_5() { 19892; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_5_5_5: 19893; GFX900: ; %bb.0: 19894; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19895; GFX900-NEXT: ;;#ASMSTART 19896; GFX900-NEXT: ; def s[4:5] 19897; GFX900-NEXT: ;;#ASMEND 19898; GFX900-NEXT: ;;#ASMSTART 19899; GFX900-NEXT: ; def s[6:7] 19900; GFX900-NEXT: ;;#ASMEND 19901; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s6 19902; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 19903; GFX900-NEXT: ;;#ASMSTART 19904; GFX900-NEXT: ; use s[8:9] 19905; GFX900-NEXT: ;;#ASMEND 19906; GFX900-NEXT: s_setpc_b64 s[30:31] 19907; 19908; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_5_5_5: 19909; GFX90A: ; %bb.0: 19910; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19911; GFX90A-NEXT: ;;#ASMSTART 19912; GFX90A-NEXT: ; def s[4:5] 19913; GFX90A-NEXT: ;;#ASMEND 19914; GFX90A-NEXT: ;;#ASMSTART 19915; GFX90A-NEXT: ; def s[6:7] 19916; GFX90A-NEXT: ;;#ASMEND 19917; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s6 19918; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 19919; GFX90A-NEXT: ;;#ASMSTART 19920; GFX90A-NEXT: ; use s[8:9] 19921; GFX90A-NEXT: ;;#ASMEND 19922; GFX90A-NEXT: s_setpc_b64 s[30:31] 19923; 19924; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_5_5_5: 19925; GFX940: ; %bb.0: 19926; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19927; GFX940-NEXT: ;;#ASMSTART 19928; GFX940-NEXT: ; def s[0:1] 19929; GFX940-NEXT: ;;#ASMEND 19930; GFX940-NEXT: ;;#ASMSTART 19931; GFX940-NEXT: ; def s[2:3] 19932; GFX940-NEXT: ;;#ASMEND 19933; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 19934; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 19935; GFX940-NEXT: ;;#ASMSTART 19936; GFX940-NEXT: ; use s[8:9] 19937; GFX940-NEXT: ;;#ASMEND 19938; GFX940-NEXT: s_setpc_b64 s[30:31] 19939 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19940 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19941 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5> 19942 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19943 ret void 19944} 19945 19946define void @s_shuffle_v4i16_v4i16__3_5_5_5() { 19947; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_5_5_5: 19948; GFX900: ; %bb.0: 19949; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19950; GFX900-NEXT: ;;#ASMSTART 19951; GFX900-NEXT: ; def s[4:5] 19952; GFX900-NEXT: ;;#ASMEND 19953; GFX900-NEXT: ;;#ASMSTART 19954; GFX900-NEXT: ; def s[6:7] 19955; GFX900-NEXT: ;;#ASMEND 19956; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s6 19957; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 19958; GFX900-NEXT: ;;#ASMSTART 19959; GFX900-NEXT: ; use s[8:9] 19960; GFX900-NEXT: ;;#ASMEND 19961; GFX900-NEXT: s_setpc_b64 s[30:31] 19962; 19963; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_5_5_5: 19964; GFX90A: ; %bb.0: 19965; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19966; GFX90A-NEXT: ;;#ASMSTART 19967; GFX90A-NEXT: ; def s[4:5] 19968; GFX90A-NEXT: ;;#ASMEND 19969; GFX90A-NEXT: ;;#ASMSTART 19970; GFX90A-NEXT: ; def s[6:7] 19971; GFX90A-NEXT: ;;#ASMEND 19972; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s6 19973; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 19974; GFX90A-NEXT: ;;#ASMSTART 19975; GFX90A-NEXT: ; use s[8:9] 19976; GFX90A-NEXT: ;;#ASMEND 19977; GFX90A-NEXT: s_setpc_b64 s[30:31] 19978; 19979; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_5_5_5: 19980; GFX940: ; %bb.0: 19981; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19982; GFX940-NEXT: ;;#ASMSTART 19983; GFX940-NEXT: ; def s[0:1] 19984; GFX940-NEXT: ;;#ASMEND 19985; GFX940-NEXT: ;;#ASMSTART 19986; GFX940-NEXT: ; def s[2:3] 19987; GFX940-NEXT: ;;#ASMEND 19988; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s2 19989; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 19990; GFX940-NEXT: ;;#ASMSTART 19991; GFX940-NEXT: ; use s[8:9] 19992; GFX940-NEXT: ;;#ASMEND 19993; GFX940-NEXT: s_setpc_b64 s[30:31] 19994 %vec0 = call <4 x i16> asm "; def $0", "=s"() 19995 %vec1 = call <4 x i16> asm "; def $0", "=s"() 19996 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5> 19997 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 19998 ret void 19999} 20000 20001define void @s_shuffle_v4i16_v4i16__4_5_5_5() { 20002; GFX9-LABEL: s_shuffle_v4i16_v4i16__4_5_5_5: 20003; GFX9: ; %bb.0: 20004; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20005; GFX9-NEXT: ;;#ASMSTART 20006; GFX9-NEXT: ; def s[8:9] 20007; GFX9-NEXT: ;;#ASMEND 20008; GFX9-NEXT: s_pack_hh_b32_b16 s9, s8, s8 20009; GFX9-NEXT: ;;#ASMSTART 20010; GFX9-NEXT: ; use s[8:9] 20011; GFX9-NEXT: ;;#ASMEND 20012; GFX9-NEXT: s_setpc_b64 s[30:31] 20013 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20014 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20015 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5> 20016 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20017 ret void 20018} 20019 20020define void @s_shuffle_v4i16_v4i16__5_5_5_5() { 20021; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_5_5_5: 20022; GFX900: ; %bb.0: 20023; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20024; GFX900-NEXT: ;;#ASMSTART 20025; GFX900-NEXT: ; def s[4:5] 20026; GFX900-NEXT: ;;#ASMEND 20027; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4 20028; GFX900-NEXT: s_mov_b32 s9, s8 20029; GFX900-NEXT: ;;#ASMSTART 20030; GFX900-NEXT: ; use s[8:9] 20031; GFX900-NEXT: ;;#ASMEND 20032; GFX900-NEXT: s_setpc_b64 s[30:31] 20033; 20034; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_5_5_5: 20035; GFX90A: ; %bb.0: 20036; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20037; GFX90A-NEXT: ;;#ASMSTART 20038; GFX90A-NEXT: ; def s[4:5] 20039; GFX90A-NEXT: ;;#ASMEND 20040; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4 20041; GFX90A-NEXT: s_mov_b32 s9, s8 20042; GFX90A-NEXT: ;;#ASMSTART 20043; GFX90A-NEXT: ; use s[8:9] 20044; GFX90A-NEXT: ;;#ASMEND 20045; GFX90A-NEXT: s_setpc_b64 s[30:31] 20046; 20047; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_5_5_5: 20048; GFX940: ; %bb.0: 20049; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20050; GFX940-NEXT: ;;#ASMSTART 20051; GFX940-NEXT: ; def s[0:1] 20052; GFX940-NEXT: ;;#ASMEND 20053; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 20054; GFX940-NEXT: s_mov_b32 s9, s8 20055; GFX940-NEXT: ;;#ASMSTART 20056; GFX940-NEXT: ; use s[8:9] 20057; GFX940-NEXT: ;;#ASMEND 20058; GFX940-NEXT: s_setpc_b64 s[30:31] 20059 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20060 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20061 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5> 20062 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20063 ret void 20064} 20065 20066define void @s_shuffle_v4i16_v4i16__6_5_5_5() { 20067; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_5_5_5: 20068; GFX900: ; %bb.0: 20069; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20070; GFX900-NEXT: ;;#ASMSTART 20071; GFX900-NEXT: ; def s[4:5] 20072; GFX900-NEXT: ;;#ASMEND 20073; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 20074; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20075; GFX900-NEXT: ;;#ASMSTART 20076; GFX900-NEXT: ; use s[8:9] 20077; GFX900-NEXT: ;;#ASMEND 20078; GFX900-NEXT: s_setpc_b64 s[30:31] 20079; 20080; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_5_5_5: 20081; GFX90A: ; %bb.0: 20082; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20083; GFX90A-NEXT: ;;#ASMSTART 20084; GFX90A-NEXT: ; def s[4:5] 20085; GFX90A-NEXT: ;;#ASMEND 20086; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 20087; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20088; GFX90A-NEXT: ;;#ASMSTART 20089; GFX90A-NEXT: ; use s[8:9] 20090; GFX90A-NEXT: ;;#ASMEND 20091; GFX90A-NEXT: s_setpc_b64 s[30:31] 20092; 20093; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_5_5_5: 20094; GFX940: ; %bb.0: 20095; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20096; GFX940-NEXT: ;;#ASMSTART 20097; GFX940-NEXT: ; def s[0:1] 20098; GFX940-NEXT: ;;#ASMEND 20099; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 20100; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 20101; GFX940-NEXT: ;;#ASMSTART 20102; GFX940-NEXT: ; use s[8:9] 20103; GFX940-NEXT: ;;#ASMEND 20104; GFX940-NEXT: s_setpc_b64 s[30:31] 20105 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20106 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20107 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5> 20108 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20109 ret void 20110} 20111 20112define void @s_shuffle_v4i16_v4i16__7_5_5_5() { 20113; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_5_5: 20114; GFX900: ; %bb.0: 20115; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20116; GFX900-NEXT: ;;#ASMSTART 20117; GFX900-NEXT: ; def s[4:5] 20118; GFX900-NEXT: ;;#ASMEND 20119; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 20120; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20121; GFX900-NEXT: ;;#ASMSTART 20122; GFX900-NEXT: ; use s[8:9] 20123; GFX900-NEXT: ;;#ASMEND 20124; GFX900-NEXT: s_setpc_b64 s[30:31] 20125; 20126; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_5_5: 20127; GFX90A: ; %bb.0: 20128; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20129; GFX90A-NEXT: ;;#ASMSTART 20130; GFX90A-NEXT: ; def s[4:5] 20131; GFX90A-NEXT: ;;#ASMEND 20132; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 20133; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20134; GFX90A-NEXT: ;;#ASMSTART 20135; GFX90A-NEXT: ; use s[8:9] 20136; GFX90A-NEXT: ;;#ASMEND 20137; GFX90A-NEXT: s_setpc_b64 s[30:31] 20138; 20139; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_5_5: 20140; GFX940: ; %bb.0: 20141; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20142; GFX940-NEXT: ;;#ASMSTART 20143; GFX940-NEXT: ; def s[0:1] 20144; GFX940-NEXT: ;;#ASMEND 20145; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 20146; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 20147; GFX940-NEXT: ;;#ASMSTART 20148; GFX940-NEXT: ; use s[8:9] 20149; GFX940-NEXT: ;;#ASMEND 20150; GFX940-NEXT: s_setpc_b64 s[30:31] 20151 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20152 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20153 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5> 20154 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20155 ret void 20156} 20157 20158define void @s_shuffle_v4i16_v4i16__7_u_5_5() { 20159; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_5_5: 20160; GFX900: ; %bb.0: 20161; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20162; GFX900-NEXT: ;;#ASMSTART 20163; GFX900-NEXT: ; def s[4:5] 20164; GFX900-NEXT: ;;#ASMEND 20165; GFX900-NEXT: s_lshr_b32 s8, s5, 16 20166; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20167; GFX900-NEXT: ;;#ASMSTART 20168; GFX900-NEXT: ; use s[8:9] 20169; GFX900-NEXT: ;;#ASMEND 20170; GFX900-NEXT: s_setpc_b64 s[30:31] 20171; 20172; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_5_5: 20173; GFX90A: ; %bb.0: 20174; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20175; GFX90A-NEXT: ;;#ASMSTART 20176; GFX90A-NEXT: ; def s[4:5] 20177; GFX90A-NEXT: ;;#ASMEND 20178; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 20179; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20180; GFX90A-NEXT: ;;#ASMSTART 20181; GFX90A-NEXT: ; use s[8:9] 20182; GFX90A-NEXT: ;;#ASMEND 20183; GFX90A-NEXT: s_setpc_b64 s[30:31] 20184; 20185; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_5_5: 20186; GFX940: ; %bb.0: 20187; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20188; GFX940-NEXT: ;;#ASMSTART 20189; GFX940-NEXT: ; def s[0:1] 20190; GFX940-NEXT: ;;#ASMEND 20191; GFX940-NEXT: s_lshr_b32 s8, s1, 16 20192; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 20193; GFX940-NEXT: ;;#ASMSTART 20194; GFX940-NEXT: ; use s[8:9] 20195; GFX940-NEXT: ;;#ASMEND 20196; GFX940-NEXT: s_setpc_b64 s[30:31] 20197 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20198 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20199 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5> 20200 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20201 ret void 20202} 20203 20204define void @s_shuffle_v4i16_v4i16__7_0_5_5() { 20205; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_5_5: 20206; GFX900: ; %bb.0: 20207; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20208; GFX900-NEXT: ;;#ASMSTART 20209; GFX900-NEXT: ; def s[4:5] 20210; GFX900-NEXT: ;;#ASMEND 20211; GFX900-NEXT: ;;#ASMSTART 20212; GFX900-NEXT: ; def s[6:7] 20213; GFX900-NEXT: ;;#ASMEND 20214; GFX900-NEXT: s_lshr_b32 s5, s7, 16 20215; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 20216; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 20217; GFX900-NEXT: ;;#ASMSTART 20218; GFX900-NEXT: ; use s[8:9] 20219; GFX900-NEXT: ;;#ASMEND 20220; GFX900-NEXT: s_setpc_b64 s[30:31] 20221; 20222; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_5_5: 20223; GFX90A: ; %bb.0: 20224; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20225; GFX90A-NEXT: ;;#ASMSTART 20226; GFX90A-NEXT: ; def s[4:5] 20227; GFX90A-NEXT: ;;#ASMEND 20228; GFX90A-NEXT: ;;#ASMSTART 20229; GFX90A-NEXT: ; def s[6:7] 20230; GFX90A-NEXT: ;;#ASMEND 20231; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 20232; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 20233; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 20234; GFX90A-NEXT: ;;#ASMSTART 20235; GFX90A-NEXT: ; use s[8:9] 20236; GFX90A-NEXT: ;;#ASMEND 20237; GFX90A-NEXT: s_setpc_b64 s[30:31] 20238; 20239; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_5_5: 20240; GFX940: ; %bb.0: 20241; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20242; GFX940-NEXT: ;;#ASMSTART 20243; GFX940-NEXT: ; def s[0:1] 20244; GFX940-NEXT: ;;#ASMEND 20245; GFX940-NEXT: ;;#ASMSTART 20246; GFX940-NEXT: ; def s[2:3] 20247; GFX940-NEXT: ;;#ASMEND 20248; GFX940-NEXT: s_lshr_b32 s1, s3, 16 20249; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 20250; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 20251; GFX940-NEXT: ;;#ASMSTART 20252; GFX940-NEXT: ; use s[8:9] 20253; GFX940-NEXT: ;;#ASMEND 20254; GFX940-NEXT: s_setpc_b64 s[30:31] 20255 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20256 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20257 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5> 20258 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20259 ret void 20260} 20261 20262define void @s_shuffle_v4i16_v4i16__7_1_5_5() { 20263; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_5_5: 20264; GFX900: ; %bb.0: 20265; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20266; GFX900-NEXT: ;;#ASMSTART 20267; GFX900-NEXT: ; def s[4:5] 20268; GFX900-NEXT: ;;#ASMEND 20269; GFX900-NEXT: ;;#ASMSTART 20270; GFX900-NEXT: ; def s[6:7] 20271; GFX900-NEXT: ;;#ASMEND 20272; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 20273; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 20274; GFX900-NEXT: ;;#ASMSTART 20275; GFX900-NEXT: ; use s[8:9] 20276; GFX900-NEXT: ;;#ASMEND 20277; GFX900-NEXT: s_setpc_b64 s[30:31] 20278; 20279; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_5_5: 20280; GFX90A: ; %bb.0: 20281; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20282; GFX90A-NEXT: ;;#ASMSTART 20283; GFX90A-NEXT: ; def s[4:5] 20284; GFX90A-NEXT: ;;#ASMEND 20285; GFX90A-NEXT: ;;#ASMSTART 20286; GFX90A-NEXT: ; def s[6:7] 20287; GFX90A-NEXT: ;;#ASMEND 20288; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 20289; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 20290; GFX90A-NEXT: ;;#ASMSTART 20291; GFX90A-NEXT: ; use s[8:9] 20292; GFX90A-NEXT: ;;#ASMEND 20293; GFX90A-NEXT: s_setpc_b64 s[30:31] 20294; 20295; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_5_5: 20296; GFX940: ; %bb.0: 20297; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20298; GFX940-NEXT: ;;#ASMSTART 20299; GFX940-NEXT: ; def s[0:1] 20300; GFX940-NEXT: ;;#ASMEND 20301; GFX940-NEXT: ;;#ASMSTART 20302; GFX940-NEXT: ; def s[2:3] 20303; GFX940-NEXT: ;;#ASMEND 20304; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 20305; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 20306; GFX940-NEXT: ;;#ASMSTART 20307; GFX940-NEXT: ; use s[8:9] 20308; GFX940-NEXT: ;;#ASMEND 20309; GFX940-NEXT: s_setpc_b64 s[30:31] 20310 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20311 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20312 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5> 20313 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20314 ret void 20315} 20316 20317define void @s_shuffle_v4i16_v4i16__7_2_5_5() { 20318; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_5_5: 20319; GFX900: ; %bb.0: 20320; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20321; GFX900-NEXT: ;;#ASMSTART 20322; GFX900-NEXT: ; def s[4:5] 20323; GFX900-NEXT: ;;#ASMEND 20324; GFX900-NEXT: ;;#ASMSTART 20325; GFX900-NEXT: ; def s[6:7] 20326; GFX900-NEXT: ;;#ASMEND 20327; GFX900-NEXT: s_lshr_b32 s4, s7, 16 20328; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 20329; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 20330; GFX900-NEXT: ;;#ASMSTART 20331; GFX900-NEXT: ; use s[8:9] 20332; GFX900-NEXT: ;;#ASMEND 20333; GFX900-NEXT: s_setpc_b64 s[30:31] 20334; 20335; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_5_5: 20336; GFX90A: ; %bb.0: 20337; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20338; GFX90A-NEXT: ;;#ASMSTART 20339; GFX90A-NEXT: ; def s[4:5] 20340; GFX90A-NEXT: ;;#ASMEND 20341; GFX90A-NEXT: ;;#ASMSTART 20342; GFX90A-NEXT: ; def s[6:7] 20343; GFX90A-NEXT: ;;#ASMEND 20344; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 20345; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 20346; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 20347; GFX90A-NEXT: ;;#ASMSTART 20348; GFX90A-NEXT: ; use s[8:9] 20349; GFX90A-NEXT: ;;#ASMEND 20350; GFX90A-NEXT: s_setpc_b64 s[30:31] 20351; 20352; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_5_5: 20353; GFX940: ; %bb.0: 20354; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20355; GFX940-NEXT: ;;#ASMSTART 20356; GFX940-NEXT: ; def s[0:1] 20357; GFX940-NEXT: ;;#ASMEND 20358; GFX940-NEXT: ;;#ASMSTART 20359; GFX940-NEXT: ; def s[2:3] 20360; GFX940-NEXT: ;;#ASMEND 20361; GFX940-NEXT: s_lshr_b32 s0, s3, 16 20362; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 20363; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 20364; GFX940-NEXT: ;;#ASMSTART 20365; GFX940-NEXT: ; use s[8:9] 20366; GFX940-NEXT: ;;#ASMEND 20367; GFX940-NEXT: s_setpc_b64 s[30:31] 20368 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20369 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20370 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5> 20371 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20372 ret void 20373} 20374 20375define void @s_shuffle_v4i16_v4i16__7_3_5_5() { 20376; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_5_5: 20377; GFX900: ; %bb.0: 20378; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20379; GFX900-NEXT: ;;#ASMSTART 20380; GFX900-NEXT: ; def s[4:5] 20381; GFX900-NEXT: ;;#ASMEND 20382; GFX900-NEXT: ;;#ASMSTART 20383; GFX900-NEXT: ; def s[6:7] 20384; GFX900-NEXT: ;;#ASMEND 20385; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 20386; GFX900-NEXT: s_pack_hh_b32_b16 s9, s6, s6 20387; GFX900-NEXT: ;;#ASMSTART 20388; GFX900-NEXT: ; use s[8:9] 20389; GFX900-NEXT: ;;#ASMEND 20390; GFX900-NEXT: s_setpc_b64 s[30:31] 20391; 20392; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_5_5: 20393; GFX90A: ; %bb.0: 20394; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20395; GFX90A-NEXT: ;;#ASMSTART 20396; GFX90A-NEXT: ; def s[4:5] 20397; GFX90A-NEXT: ;;#ASMEND 20398; GFX90A-NEXT: ;;#ASMSTART 20399; GFX90A-NEXT: ; def s[6:7] 20400; GFX90A-NEXT: ;;#ASMEND 20401; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 20402; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s6, s6 20403; GFX90A-NEXT: ;;#ASMSTART 20404; GFX90A-NEXT: ; use s[8:9] 20405; GFX90A-NEXT: ;;#ASMEND 20406; GFX90A-NEXT: s_setpc_b64 s[30:31] 20407; 20408; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_5_5: 20409; GFX940: ; %bb.0: 20410; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20411; GFX940-NEXT: ;;#ASMSTART 20412; GFX940-NEXT: ; def s[0:1] 20413; GFX940-NEXT: ;;#ASMEND 20414; GFX940-NEXT: ;;#ASMSTART 20415; GFX940-NEXT: ; def s[2:3] 20416; GFX940-NEXT: ;;#ASMEND 20417; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 20418; GFX940-NEXT: s_pack_hh_b32_b16 s9, s2, s2 20419; GFX940-NEXT: ;;#ASMSTART 20420; GFX940-NEXT: ; use s[8:9] 20421; GFX940-NEXT: ;;#ASMEND 20422; GFX940-NEXT: s_setpc_b64 s[30:31] 20423 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20424 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20425 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5> 20426 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20427 ret void 20428} 20429 20430define void @s_shuffle_v4i16_v4i16__7_4_5_5() { 20431; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_5_5: 20432; GFX900: ; %bb.0: 20433; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20434; GFX900-NEXT: ;;#ASMSTART 20435; GFX900-NEXT: ; def s[4:5] 20436; GFX900-NEXT: ;;#ASMEND 20437; GFX900-NEXT: s_lshr_b32 s5, s5, 16 20438; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 20439; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20440; GFX900-NEXT: ;;#ASMSTART 20441; GFX900-NEXT: ; use s[8:9] 20442; GFX900-NEXT: ;;#ASMEND 20443; GFX900-NEXT: s_setpc_b64 s[30:31] 20444; 20445; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_5_5: 20446; GFX90A: ; %bb.0: 20447; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20448; GFX90A-NEXT: ;;#ASMSTART 20449; GFX90A-NEXT: ; def s[4:5] 20450; GFX90A-NEXT: ;;#ASMEND 20451; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 20452; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 20453; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20454; GFX90A-NEXT: ;;#ASMSTART 20455; GFX90A-NEXT: ; use s[8:9] 20456; GFX90A-NEXT: ;;#ASMEND 20457; GFX90A-NEXT: s_setpc_b64 s[30:31] 20458; 20459; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_5_5: 20460; GFX940: ; %bb.0: 20461; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20462; GFX940-NEXT: ;;#ASMSTART 20463; GFX940-NEXT: ; def s[0:1] 20464; GFX940-NEXT: ;;#ASMEND 20465; GFX940-NEXT: s_lshr_b32 s1, s1, 16 20466; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 20467; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 20468; GFX940-NEXT: ;;#ASMSTART 20469; GFX940-NEXT: ; use s[8:9] 20470; GFX940-NEXT: ;;#ASMEND 20471; GFX940-NEXT: s_setpc_b64 s[30:31] 20472 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20473 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20474 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5> 20475 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20476 ret void 20477} 20478 20479define void @s_shuffle_v4i16_v4i16__7_6_5_5() { 20480; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_5_5: 20481; GFX900: ; %bb.0: 20482; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20483; GFX900-NEXT: ;;#ASMSTART 20484; GFX900-NEXT: ; def s[4:5] 20485; GFX900-NEXT: ;;#ASMEND 20486; GFX900-NEXT: s_lshr_b32 s6, s5, 16 20487; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 20488; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20489; GFX900-NEXT: ;;#ASMSTART 20490; GFX900-NEXT: ; use s[8:9] 20491; GFX900-NEXT: ;;#ASMEND 20492; GFX900-NEXT: s_setpc_b64 s[30:31] 20493; 20494; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_5_5: 20495; GFX90A: ; %bb.0: 20496; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20497; GFX90A-NEXT: ;;#ASMSTART 20498; GFX90A-NEXT: ; def s[4:5] 20499; GFX90A-NEXT: ;;#ASMEND 20500; GFX90A-NEXT: s_lshr_b32 s6, s5, 16 20501; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 20502; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20503; GFX90A-NEXT: ;;#ASMSTART 20504; GFX90A-NEXT: ; use s[8:9] 20505; GFX90A-NEXT: ;;#ASMEND 20506; GFX90A-NEXT: s_setpc_b64 s[30:31] 20507; 20508; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_5_5: 20509; GFX940: ; %bb.0: 20510; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20511; GFX940-NEXT: ;;#ASMSTART 20512; GFX940-NEXT: ; def s[0:1] 20513; GFX940-NEXT: ;;#ASMEND 20514; GFX940-NEXT: s_lshr_b32 s2, s1, 16 20515; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 20516; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 20517; GFX940-NEXT: ;;#ASMSTART 20518; GFX940-NEXT: ; use s[8:9] 20519; GFX940-NEXT: ;;#ASMEND 20520; GFX940-NEXT: s_setpc_b64 s[30:31] 20521 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20522 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20523 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5> 20524 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20525 ret void 20526} 20527 20528define void @s_shuffle_v4i16_v4i16__7_7_5_5() { 20529; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_5: 20530; GFX900: ; %bb.0: 20531; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20532; GFX900-NEXT: ;;#ASMSTART 20533; GFX900-NEXT: ; def s[4:5] 20534; GFX900-NEXT: ;;#ASMEND 20535; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20536; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 20537; GFX900-NEXT: ;;#ASMSTART 20538; GFX900-NEXT: ; use s[8:9] 20539; GFX900-NEXT: ;;#ASMEND 20540; GFX900-NEXT: s_setpc_b64 s[30:31] 20541; 20542; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_5: 20543; GFX90A: ; %bb.0: 20544; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20545; GFX90A-NEXT: ;;#ASMSTART 20546; GFX90A-NEXT: ; def s[4:5] 20547; GFX90A-NEXT: ;;#ASMEND 20548; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s4 20549; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 20550; GFX90A-NEXT: ;;#ASMSTART 20551; GFX90A-NEXT: ; use s[8:9] 20552; GFX90A-NEXT: ;;#ASMEND 20553; GFX90A-NEXT: s_setpc_b64 s[30:31] 20554; 20555; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_5: 20556; GFX940: ; %bb.0: 20557; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20558; GFX940-NEXT: ;;#ASMSTART 20559; GFX940-NEXT: ; def s[0:1] 20560; GFX940-NEXT: ;;#ASMEND 20561; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s0 20562; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 20563; GFX940-NEXT: ;;#ASMSTART 20564; GFX940-NEXT: ; use s[8:9] 20565; GFX940-NEXT: ;;#ASMEND 20566; GFX940-NEXT: s_setpc_b64 s[30:31] 20567 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20568 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20569 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5> 20570 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20571 ret void 20572} 20573 20574define void @s_shuffle_v4i16_v4i16__7_7_u_5() { 20575; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_5: 20576; GFX900: ; %bb.0: 20577; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20578; GFX900-NEXT: ;;#ASMSTART 20579; GFX900-NEXT: ; def s[4:5] 20580; GFX900-NEXT: ;;#ASMEND 20581; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 20582; GFX900-NEXT: s_mov_b32 s9, s4 20583; GFX900-NEXT: ;;#ASMSTART 20584; GFX900-NEXT: ; use s[8:9] 20585; GFX900-NEXT: ;;#ASMEND 20586; GFX900-NEXT: s_setpc_b64 s[30:31] 20587; 20588; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_5: 20589; GFX90A: ; %bb.0: 20590; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20591; GFX90A-NEXT: ;;#ASMSTART 20592; GFX90A-NEXT: ; def s[4:5] 20593; GFX90A-NEXT: ;;#ASMEND 20594; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 20595; GFX90A-NEXT: s_mov_b32 s9, s4 20596; GFX90A-NEXT: ;;#ASMSTART 20597; GFX90A-NEXT: ; use s[8:9] 20598; GFX90A-NEXT: ;;#ASMEND 20599; GFX90A-NEXT: s_setpc_b64 s[30:31] 20600; 20601; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_5: 20602; GFX940: ; %bb.0: 20603; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20604; GFX940-NEXT: ;;#ASMSTART 20605; GFX940-NEXT: ; def s[0:1] 20606; GFX940-NEXT: ;;#ASMEND 20607; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 20608; GFX940-NEXT: s_mov_b32 s9, s0 20609; GFX940-NEXT: ;;#ASMSTART 20610; GFX940-NEXT: ; use s[8:9] 20611; GFX940-NEXT: ;;#ASMEND 20612; GFX940-NEXT: s_setpc_b64 s[30:31] 20613 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20614 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20615 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5> 20616 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20617 ret void 20618} 20619 20620define void @s_shuffle_v4i16_v4i16__7_7_0_5() { 20621; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_5: 20622; GFX900: ; %bb.0: 20623; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20624; GFX900-NEXT: ;;#ASMSTART 20625; GFX900-NEXT: ; def s[4:5] 20626; GFX900-NEXT: ;;#ASMEND 20627; GFX900-NEXT: ;;#ASMSTART 20628; GFX900-NEXT: ; def s[6:7] 20629; GFX900-NEXT: ;;#ASMEND 20630; GFX900-NEXT: s_pack_lh_b32_b16 s9, s4, s6 20631; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 20632; GFX900-NEXT: ;;#ASMSTART 20633; GFX900-NEXT: ; use s[8:9] 20634; GFX900-NEXT: ;;#ASMEND 20635; GFX900-NEXT: s_setpc_b64 s[30:31] 20636; 20637; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_5: 20638; GFX90A: ; %bb.0: 20639; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20640; GFX90A-NEXT: ;;#ASMSTART 20641; GFX90A-NEXT: ; def s[4:5] 20642; GFX90A-NEXT: ;;#ASMEND 20643; GFX90A-NEXT: ;;#ASMSTART 20644; GFX90A-NEXT: ; def s[6:7] 20645; GFX90A-NEXT: ;;#ASMEND 20646; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s4, s6 20647; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 20648; GFX90A-NEXT: ;;#ASMSTART 20649; GFX90A-NEXT: ; use s[8:9] 20650; GFX90A-NEXT: ;;#ASMEND 20651; GFX90A-NEXT: s_setpc_b64 s[30:31] 20652; 20653; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_5: 20654; GFX940: ; %bb.0: 20655; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20656; GFX940-NEXT: ;;#ASMSTART 20657; GFX940-NEXT: ; def s[0:1] 20658; GFX940-NEXT: ;;#ASMEND 20659; GFX940-NEXT: ;;#ASMSTART 20660; GFX940-NEXT: ; def s[2:3] 20661; GFX940-NEXT: ;;#ASMEND 20662; GFX940-NEXT: s_pack_lh_b32_b16 s9, s0, s2 20663; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 20664; GFX940-NEXT: ;;#ASMSTART 20665; GFX940-NEXT: ; use s[8:9] 20666; GFX940-NEXT: ;;#ASMEND 20667; GFX940-NEXT: s_setpc_b64 s[30:31] 20668 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20669 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20670 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5> 20671 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20672 ret void 20673} 20674 20675define void @s_shuffle_v4i16_v4i16__7_7_1_5() { 20676; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_5: 20677; GFX900: ; %bb.0: 20678; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20679; GFX900-NEXT: ;;#ASMSTART 20680; GFX900-NEXT: ; def s[4:5] 20681; GFX900-NEXT: ;;#ASMEND 20682; GFX900-NEXT: ;;#ASMSTART 20683; GFX900-NEXT: ; def s[6:7] 20684; GFX900-NEXT: ;;#ASMEND 20685; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s6 20686; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 20687; GFX900-NEXT: ;;#ASMSTART 20688; GFX900-NEXT: ; use s[8:9] 20689; GFX900-NEXT: ;;#ASMEND 20690; GFX900-NEXT: s_setpc_b64 s[30:31] 20691; 20692; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_5: 20693; GFX90A: ; %bb.0: 20694; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20695; GFX90A-NEXT: ;;#ASMSTART 20696; GFX90A-NEXT: ; def s[4:5] 20697; GFX90A-NEXT: ;;#ASMEND 20698; GFX90A-NEXT: ;;#ASMSTART 20699; GFX90A-NEXT: ; def s[6:7] 20700; GFX90A-NEXT: ;;#ASMEND 20701; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s6 20702; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 20703; GFX90A-NEXT: ;;#ASMSTART 20704; GFX90A-NEXT: ; use s[8:9] 20705; GFX90A-NEXT: ;;#ASMEND 20706; GFX90A-NEXT: s_setpc_b64 s[30:31] 20707; 20708; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_5: 20709; GFX940: ; %bb.0: 20710; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20711; GFX940-NEXT: ;;#ASMSTART 20712; GFX940-NEXT: ; def s[0:1] 20713; GFX940-NEXT: ;;#ASMEND 20714; GFX940-NEXT: ;;#ASMSTART 20715; GFX940-NEXT: ; def s[2:3] 20716; GFX940-NEXT: ;;#ASMEND 20717; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s2 20718; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 20719; GFX940-NEXT: ;;#ASMSTART 20720; GFX940-NEXT: ; use s[8:9] 20721; GFX940-NEXT: ;;#ASMEND 20722; GFX940-NEXT: s_setpc_b64 s[30:31] 20723 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20724 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20725 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5> 20726 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20727 ret void 20728} 20729 20730define void @s_shuffle_v4i16_v4i16__7_7_2_5() { 20731; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_5: 20732; GFX900: ; %bb.0: 20733; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20734; GFX900-NEXT: ;;#ASMSTART 20735; GFX900-NEXT: ; def s[4:5] 20736; GFX900-NEXT: ;;#ASMEND 20737; GFX900-NEXT: ;;#ASMSTART 20738; GFX900-NEXT: ; def s[6:7] 20739; GFX900-NEXT: ;;#ASMEND 20740; GFX900-NEXT: s_pack_lh_b32_b16 s9, s5, s6 20741; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 20742; GFX900-NEXT: ;;#ASMSTART 20743; GFX900-NEXT: ; use s[8:9] 20744; GFX900-NEXT: ;;#ASMEND 20745; GFX900-NEXT: s_setpc_b64 s[30:31] 20746; 20747; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_5: 20748; GFX90A: ; %bb.0: 20749; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20750; GFX90A-NEXT: ;;#ASMSTART 20751; GFX90A-NEXT: ; def s[4:5] 20752; GFX90A-NEXT: ;;#ASMEND 20753; GFX90A-NEXT: ;;#ASMSTART 20754; GFX90A-NEXT: ; def s[6:7] 20755; GFX90A-NEXT: ;;#ASMEND 20756; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s5, s6 20757; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 20758; GFX90A-NEXT: ;;#ASMSTART 20759; GFX90A-NEXT: ; use s[8:9] 20760; GFX90A-NEXT: ;;#ASMEND 20761; GFX90A-NEXT: s_setpc_b64 s[30:31] 20762; 20763; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_5: 20764; GFX940: ; %bb.0: 20765; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20766; GFX940-NEXT: ;;#ASMSTART 20767; GFX940-NEXT: ; def s[0:1] 20768; GFX940-NEXT: ;;#ASMEND 20769; GFX940-NEXT: ;;#ASMSTART 20770; GFX940-NEXT: ; def s[2:3] 20771; GFX940-NEXT: ;;#ASMEND 20772; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s2 20773; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 20774; GFX940-NEXT: ;;#ASMSTART 20775; GFX940-NEXT: ; use s[8:9] 20776; GFX940-NEXT: ;;#ASMEND 20777; GFX940-NEXT: s_setpc_b64 s[30:31] 20778 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20779 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20780 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5> 20781 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20782 ret void 20783} 20784 20785define void @s_shuffle_v4i16_v4i16__7_7_3_5() { 20786; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_5: 20787; GFX900: ; %bb.0: 20788; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20789; GFX900-NEXT: ;;#ASMSTART 20790; GFX900-NEXT: ; def s[4:5] 20791; GFX900-NEXT: ;;#ASMEND 20792; GFX900-NEXT: ;;#ASMSTART 20793; GFX900-NEXT: ; def s[6:7] 20794; GFX900-NEXT: ;;#ASMEND 20795; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s6 20796; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 20797; GFX900-NEXT: ;;#ASMSTART 20798; GFX900-NEXT: ; use s[8:9] 20799; GFX900-NEXT: ;;#ASMEND 20800; GFX900-NEXT: s_setpc_b64 s[30:31] 20801; 20802; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_5: 20803; GFX90A: ; %bb.0: 20804; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20805; GFX90A-NEXT: ;;#ASMSTART 20806; GFX90A-NEXT: ; def s[4:5] 20807; GFX90A-NEXT: ;;#ASMEND 20808; GFX90A-NEXT: ;;#ASMSTART 20809; GFX90A-NEXT: ; def s[6:7] 20810; GFX90A-NEXT: ;;#ASMEND 20811; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s6 20812; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 20813; GFX90A-NEXT: ;;#ASMSTART 20814; GFX90A-NEXT: ; use s[8:9] 20815; GFX90A-NEXT: ;;#ASMEND 20816; GFX90A-NEXT: s_setpc_b64 s[30:31] 20817; 20818; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_5: 20819; GFX940: ; %bb.0: 20820; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20821; GFX940-NEXT: ;;#ASMSTART 20822; GFX940-NEXT: ; def s[0:1] 20823; GFX940-NEXT: ;;#ASMEND 20824; GFX940-NEXT: ;;#ASMSTART 20825; GFX940-NEXT: ; def s[2:3] 20826; GFX940-NEXT: ;;#ASMEND 20827; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s2 20828; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 20829; GFX940-NEXT: ;;#ASMSTART 20830; GFX940-NEXT: ; use s[8:9] 20831; GFX940-NEXT: ;;#ASMEND 20832; GFX940-NEXT: s_setpc_b64 s[30:31] 20833 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20834 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20835 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5> 20836 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20837 ret void 20838} 20839 20840define void @s_shuffle_v4i16_v4i16__7_7_4_5() { 20841; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_5: 20842; GFX900: ; %bb.0: 20843; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20844; GFX900-NEXT: ;;#ASMSTART 20845; GFX900-NEXT: ; def s[4:5] 20846; GFX900-NEXT: ;;#ASMEND 20847; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 20848; GFX900-NEXT: s_mov_b32 s9, s4 20849; GFX900-NEXT: ;;#ASMSTART 20850; GFX900-NEXT: ; use s[8:9] 20851; GFX900-NEXT: ;;#ASMEND 20852; GFX900-NEXT: s_setpc_b64 s[30:31] 20853; 20854; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_5: 20855; GFX90A: ; %bb.0: 20856; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20857; GFX90A-NEXT: ;;#ASMSTART 20858; GFX90A-NEXT: ; def s[4:5] 20859; GFX90A-NEXT: ;;#ASMEND 20860; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 20861; GFX90A-NEXT: s_mov_b32 s9, s4 20862; GFX90A-NEXT: ;;#ASMSTART 20863; GFX90A-NEXT: ; use s[8:9] 20864; GFX90A-NEXT: ;;#ASMEND 20865; GFX90A-NEXT: s_setpc_b64 s[30:31] 20866; 20867; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_5: 20868; GFX940: ; %bb.0: 20869; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20870; GFX940-NEXT: ;;#ASMSTART 20871; GFX940-NEXT: ; def s[0:1] 20872; GFX940-NEXT: ;;#ASMEND 20873; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 20874; GFX940-NEXT: s_mov_b32 s9, s0 20875; GFX940-NEXT: ;;#ASMSTART 20876; GFX940-NEXT: ; use s[8:9] 20877; GFX940-NEXT: ;;#ASMEND 20878; GFX940-NEXT: s_setpc_b64 s[30:31] 20879 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20880 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20881 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5> 20882 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20883 ret void 20884} 20885 20886define void @s_shuffle_v4i16_v4i16__7_7_6_5() { 20887; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_5: 20888; GFX900: ; %bb.0: 20889; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20890; GFX900-NEXT: ;;#ASMSTART 20891; GFX900-NEXT: ; def s[4:5] 20892; GFX900-NEXT: ;;#ASMEND 20893; GFX900-NEXT: s_pack_lh_b32_b16 s9, s5, s4 20894; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 20895; GFX900-NEXT: ;;#ASMSTART 20896; GFX900-NEXT: ; use s[8:9] 20897; GFX900-NEXT: ;;#ASMEND 20898; GFX900-NEXT: s_setpc_b64 s[30:31] 20899; 20900; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_5: 20901; GFX90A: ; %bb.0: 20902; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20903; GFX90A-NEXT: ;;#ASMSTART 20904; GFX90A-NEXT: ; def s[4:5] 20905; GFX90A-NEXT: ;;#ASMEND 20906; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s5, s4 20907; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 20908; GFX90A-NEXT: ;;#ASMSTART 20909; GFX90A-NEXT: ; use s[8:9] 20910; GFX90A-NEXT: ;;#ASMEND 20911; GFX90A-NEXT: s_setpc_b64 s[30:31] 20912; 20913; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_5: 20914; GFX940: ; %bb.0: 20915; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20916; GFX940-NEXT: ;;#ASMSTART 20917; GFX940-NEXT: ; def s[0:1] 20918; GFX940-NEXT: ;;#ASMEND 20919; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s0 20920; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 20921; GFX940-NEXT: ;;#ASMSTART 20922; GFX940-NEXT: ; use s[8:9] 20923; GFX940-NEXT: ;;#ASMEND 20924; GFX940-NEXT: s_setpc_b64 s[30:31] 20925 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20926 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20927 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5> 20928 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20929 ret void 20930} 20931 20932define void @s_shuffle_v4i16_v4i16__u_6_6_6() { 20933; GFX900-LABEL: s_shuffle_v4i16_v4i16__u_6_6_6: 20934; GFX900: ; %bb.0: 20935; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20936; GFX900-NEXT: ;;#ASMSTART 20937; GFX900-NEXT: ; def s[4:5] 20938; GFX900-NEXT: ;;#ASMEND 20939; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 20940; GFX900-NEXT: s_lshl_b32 s8, s5, 16 20941; GFX900-NEXT: ;;#ASMSTART 20942; GFX900-NEXT: ; use s[8:9] 20943; GFX900-NEXT: ;;#ASMEND 20944; GFX900-NEXT: s_setpc_b64 s[30:31] 20945; 20946; GFX90A-LABEL: s_shuffle_v4i16_v4i16__u_6_6_6: 20947; GFX90A: ; %bb.0: 20948; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20949; GFX90A-NEXT: ;;#ASMSTART 20950; GFX90A-NEXT: ; def s[4:5] 20951; GFX90A-NEXT: ;;#ASMEND 20952; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 20953; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 20954; GFX90A-NEXT: ;;#ASMSTART 20955; GFX90A-NEXT: ; use s[8:9] 20956; GFX90A-NEXT: ;;#ASMEND 20957; GFX90A-NEXT: s_setpc_b64 s[30:31] 20958; 20959; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_6_6_6: 20960; GFX940: ; %bb.0: 20961; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20962; GFX940-NEXT: ;;#ASMSTART 20963; GFX940-NEXT: ; def s[0:1] 20964; GFX940-NEXT: ;;#ASMEND 20965; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 20966; GFX940-NEXT: s_lshl_b32 s8, s1, 16 20967; GFX940-NEXT: ;;#ASMSTART 20968; GFX940-NEXT: ; use s[8:9] 20969; GFX940-NEXT: ;;#ASMEND 20970; GFX940-NEXT: s_setpc_b64 s[30:31] 20971 %vec0 = call <4 x i16> asm "; def $0", "=s"() 20972 %vec1 = call <4 x i16> asm "; def $0", "=s"() 20973 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6> 20974 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 20975 ret void 20976} 20977 20978define void @s_shuffle_v4i16_v4i16__0_6_6_6() { 20979; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_6_6_6: 20980; GFX900: ; %bb.0: 20981; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20982; GFX900-NEXT: ;;#ASMSTART 20983; GFX900-NEXT: ; def s[4:5] 20984; GFX900-NEXT: ;;#ASMEND 20985; GFX900-NEXT: ;;#ASMSTART 20986; GFX900-NEXT: ; def s[6:7] 20987; GFX900-NEXT: ;;#ASMEND 20988; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 20989; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 20990; GFX900-NEXT: ;;#ASMSTART 20991; GFX900-NEXT: ; use s[8:9] 20992; GFX900-NEXT: ;;#ASMEND 20993; GFX900-NEXT: s_setpc_b64 s[30:31] 20994; 20995; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_6_6_6: 20996; GFX90A: ; %bb.0: 20997; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20998; GFX90A-NEXT: ;;#ASMSTART 20999; GFX90A-NEXT: ; def s[4:5] 21000; GFX90A-NEXT: ;;#ASMEND 21001; GFX90A-NEXT: ;;#ASMSTART 21002; GFX90A-NEXT: ; def s[6:7] 21003; GFX90A-NEXT: ;;#ASMEND 21004; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 21005; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21006; GFX90A-NEXT: ;;#ASMSTART 21007; GFX90A-NEXT: ; use s[8:9] 21008; GFX90A-NEXT: ;;#ASMEND 21009; GFX90A-NEXT: s_setpc_b64 s[30:31] 21010; 21011; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_6_6_6: 21012; GFX940: ; %bb.0: 21013; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21014; GFX940-NEXT: ;;#ASMSTART 21015; GFX940-NEXT: ; def s[0:1] 21016; GFX940-NEXT: ;;#ASMEND 21017; GFX940-NEXT: ;;#ASMSTART 21018; GFX940-NEXT: ; def s[2:3] 21019; GFX940-NEXT: ;;#ASMEND 21020; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 21021; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 21022; GFX940-NEXT: ;;#ASMSTART 21023; GFX940-NEXT: ; use s[8:9] 21024; GFX940-NEXT: ;;#ASMEND 21025; GFX940-NEXT: s_setpc_b64 s[30:31] 21026 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21027 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21028 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6> 21029 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21030 ret void 21031} 21032 21033define void @s_shuffle_v4i16_v4i16__1_6_6_6() { 21034; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_6_6_6: 21035; GFX900: ; %bb.0: 21036; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21037; GFX900-NEXT: ;;#ASMSTART 21038; GFX900-NEXT: ; def s[4:5] 21039; GFX900-NEXT: ;;#ASMEND 21040; GFX900-NEXT: s_lshr_b32 s4, s4, 16 21041; GFX900-NEXT: ;;#ASMSTART 21042; GFX900-NEXT: ; def s[6:7] 21043; GFX900-NEXT: ;;#ASMEND 21044; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 21045; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21046; GFX900-NEXT: ;;#ASMSTART 21047; GFX900-NEXT: ; use s[8:9] 21048; GFX900-NEXT: ;;#ASMEND 21049; GFX900-NEXT: s_setpc_b64 s[30:31] 21050; 21051; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_6_6_6: 21052; GFX90A: ; %bb.0: 21053; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21054; GFX90A-NEXT: ;;#ASMSTART 21055; GFX90A-NEXT: ; def s[4:5] 21056; GFX90A-NEXT: ;;#ASMEND 21057; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 21058; GFX90A-NEXT: ;;#ASMSTART 21059; GFX90A-NEXT: ; def s[6:7] 21060; GFX90A-NEXT: ;;#ASMEND 21061; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 21062; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21063; GFX90A-NEXT: ;;#ASMSTART 21064; GFX90A-NEXT: ; use s[8:9] 21065; GFX90A-NEXT: ;;#ASMEND 21066; GFX90A-NEXT: s_setpc_b64 s[30:31] 21067; 21068; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_6_6_6: 21069; GFX940: ; %bb.0: 21070; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21071; GFX940-NEXT: ;;#ASMSTART 21072; GFX940-NEXT: ; def s[0:1] 21073; GFX940-NEXT: ;;#ASMEND 21074; GFX940-NEXT: s_lshr_b32 s0, s0, 16 21075; GFX940-NEXT: ;;#ASMSTART 21076; GFX940-NEXT: ; def s[2:3] 21077; GFX940-NEXT: ;;#ASMEND 21078; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 21079; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 21080; GFX940-NEXT: ;;#ASMSTART 21081; GFX940-NEXT: ; use s[8:9] 21082; GFX940-NEXT: ;;#ASMEND 21083; GFX940-NEXT: s_setpc_b64 s[30:31] 21084 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21085 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21086 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6> 21087 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21088 ret void 21089} 21090 21091define void @s_shuffle_v4i16_v4i16__2_6_6_6() { 21092; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_6_6_6: 21093; GFX900: ; %bb.0: 21094; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21095; GFX900-NEXT: ;;#ASMSTART 21096; GFX900-NEXT: ; def s[4:5] 21097; GFX900-NEXT: ;;#ASMEND 21098; GFX900-NEXT: ;;#ASMSTART 21099; GFX900-NEXT: ; def s[6:7] 21100; GFX900-NEXT: ;;#ASMEND 21101; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 21102; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21103; GFX900-NEXT: ;;#ASMSTART 21104; GFX900-NEXT: ; use s[8:9] 21105; GFX900-NEXT: ;;#ASMEND 21106; GFX900-NEXT: s_setpc_b64 s[30:31] 21107; 21108; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_6_6_6: 21109; GFX90A: ; %bb.0: 21110; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21111; GFX90A-NEXT: ;;#ASMSTART 21112; GFX90A-NEXT: ; def s[4:5] 21113; GFX90A-NEXT: ;;#ASMEND 21114; GFX90A-NEXT: ;;#ASMSTART 21115; GFX90A-NEXT: ; def s[6:7] 21116; GFX90A-NEXT: ;;#ASMEND 21117; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 21118; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21119; GFX90A-NEXT: ;;#ASMSTART 21120; GFX90A-NEXT: ; use s[8:9] 21121; GFX90A-NEXT: ;;#ASMEND 21122; GFX90A-NEXT: s_setpc_b64 s[30:31] 21123; 21124; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_6_6_6: 21125; GFX940: ; %bb.0: 21126; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21127; GFX940-NEXT: ;;#ASMSTART 21128; GFX940-NEXT: ; def s[0:1] 21129; GFX940-NEXT: ;;#ASMEND 21130; GFX940-NEXT: ;;#ASMSTART 21131; GFX940-NEXT: ; def s[2:3] 21132; GFX940-NEXT: ;;#ASMEND 21133; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 21134; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 21135; GFX940-NEXT: ;;#ASMSTART 21136; GFX940-NEXT: ; use s[8:9] 21137; GFX940-NEXT: ;;#ASMEND 21138; GFX940-NEXT: s_setpc_b64 s[30:31] 21139 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21140 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21141 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6> 21142 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21143 ret void 21144} 21145 21146define void @s_shuffle_v4i16_v4i16__3_6_6_6() { 21147; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_6_6_6: 21148; GFX900: ; %bb.0: 21149; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21150; GFX900-NEXT: ;;#ASMSTART 21151; GFX900-NEXT: ; def s[4:5] 21152; GFX900-NEXT: ;;#ASMEND 21153; GFX900-NEXT: s_lshr_b32 s4, s5, 16 21154; GFX900-NEXT: ;;#ASMSTART 21155; GFX900-NEXT: ; def s[6:7] 21156; GFX900-NEXT: ;;#ASMEND 21157; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 21158; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21159; GFX900-NEXT: ;;#ASMSTART 21160; GFX900-NEXT: ; use s[8:9] 21161; GFX900-NEXT: ;;#ASMEND 21162; GFX900-NEXT: s_setpc_b64 s[30:31] 21163; 21164; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_6_6_6: 21165; GFX90A: ; %bb.0: 21166; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21167; GFX90A-NEXT: ;;#ASMSTART 21168; GFX90A-NEXT: ; def s[4:5] 21169; GFX90A-NEXT: ;;#ASMEND 21170; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 21171; GFX90A-NEXT: ;;#ASMSTART 21172; GFX90A-NEXT: ; def s[6:7] 21173; GFX90A-NEXT: ;;#ASMEND 21174; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 21175; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21176; GFX90A-NEXT: ;;#ASMSTART 21177; GFX90A-NEXT: ; use s[8:9] 21178; GFX90A-NEXT: ;;#ASMEND 21179; GFX90A-NEXT: s_setpc_b64 s[30:31] 21180; 21181; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_6_6_6: 21182; GFX940: ; %bb.0: 21183; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21184; GFX940-NEXT: ;;#ASMSTART 21185; GFX940-NEXT: ; def s[0:1] 21186; GFX940-NEXT: ;;#ASMEND 21187; GFX940-NEXT: s_lshr_b32 s0, s1, 16 21188; GFX940-NEXT: ;;#ASMSTART 21189; GFX940-NEXT: ; def s[2:3] 21190; GFX940-NEXT: ;;#ASMEND 21191; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 21192; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 21193; GFX940-NEXT: ;;#ASMSTART 21194; GFX940-NEXT: ; use s[8:9] 21195; GFX940-NEXT: ;;#ASMEND 21196; GFX940-NEXT: s_setpc_b64 s[30:31] 21197 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21198 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21199 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6> 21200 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21201 ret void 21202} 21203 21204define void @s_shuffle_v4i16_v4i16__4_6_6_6() { 21205; GFX900-LABEL: s_shuffle_v4i16_v4i16__4_6_6_6: 21206; GFX900: ; %bb.0: 21207; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21208; GFX900-NEXT: ;;#ASMSTART 21209; GFX900-NEXT: ; def s[4:5] 21210; GFX900-NEXT: ;;#ASMEND 21211; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 21212; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21213; GFX900-NEXT: ;;#ASMSTART 21214; GFX900-NEXT: ; use s[8:9] 21215; GFX900-NEXT: ;;#ASMEND 21216; GFX900-NEXT: s_setpc_b64 s[30:31] 21217; 21218; GFX90A-LABEL: s_shuffle_v4i16_v4i16__4_6_6_6: 21219; GFX90A: ; %bb.0: 21220; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21221; GFX90A-NEXT: ;;#ASMSTART 21222; GFX90A-NEXT: ; def s[4:5] 21223; GFX90A-NEXT: ;;#ASMEND 21224; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 21225; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21226; GFX90A-NEXT: ;;#ASMSTART 21227; GFX90A-NEXT: ; use s[8:9] 21228; GFX90A-NEXT: ;;#ASMEND 21229; GFX90A-NEXT: s_setpc_b64 s[30:31] 21230; 21231; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_6_6_6: 21232; GFX940: ; %bb.0: 21233; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21234; GFX940-NEXT: ;;#ASMSTART 21235; GFX940-NEXT: ; def s[0:1] 21236; GFX940-NEXT: ;;#ASMEND 21237; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 21238; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 21239; GFX940-NEXT: ;;#ASMSTART 21240; GFX940-NEXT: ; use s[8:9] 21241; GFX940-NEXT: ;;#ASMEND 21242; GFX940-NEXT: s_setpc_b64 s[30:31] 21243 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21244 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21245 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6> 21246 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21247 ret void 21248} 21249 21250define void @s_shuffle_v4i16_v4i16__5_6_6_6() { 21251; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_6_6_6: 21252; GFX900: ; %bb.0: 21253; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21254; GFX900-NEXT: ;;#ASMSTART 21255; GFX900-NEXT: ; def s[4:5] 21256; GFX900-NEXT: ;;#ASMEND 21257; GFX900-NEXT: s_lshr_b32 s4, s4, 16 21258; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 21259; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21260; GFX900-NEXT: ;;#ASMSTART 21261; GFX900-NEXT: ; use s[8:9] 21262; GFX900-NEXT: ;;#ASMEND 21263; GFX900-NEXT: s_setpc_b64 s[30:31] 21264; 21265; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_6_6_6: 21266; GFX90A: ; %bb.0: 21267; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21268; GFX90A-NEXT: ;;#ASMSTART 21269; GFX90A-NEXT: ; def s[4:5] 21270; GFX90A-NEXT: ;;#ASMEND 21271; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 21272; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 21273; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21274; GFX90A-NEXT: ;;#ASMSTART 21275; GFX90A-NEXT: ; use s[8:9] 21276; GFX90A-NEXT: ;;#ASMEND 21277; GFX90A-NEXT: s_setpc_b64 s[30:31] 21278; 21279; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_6_6_6: 21280; GFX940: ; %bb.0: 21281; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21282; GFX940-NEXT: ;;#ASMSTART 21283; GFX940-NEXT: ; def s[0:1] 21284; GFX940-NEXT: ;;#ASMEND 21285; GFX940-NEXT: s_lshr_b32 s0, s0, 16 21286; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 21287; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 21288; GFX940-NEXT: ;;#ASMSTART 21289; GFX940-NEXT: ; use s[8:9] 21290; GFX940-NEXT: ;;#ASMEND 21291; GFX940-NEXT: s_setpc_b64 s[30:31] 21292 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21293 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21294 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6> 21295 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21296 ret void 21297} 21298 21299define void @s_shuffle_v4i16_v4i16__6_6_6_6() { 21300; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_6_6_6: 21301; GFX900: ; %bb.0: 21302; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21303; GFX900-NEXT: ;;#ASMSTART 21304; GFX900-NEXT: ; def s[4:5] 21305; GFX900-NEXT: ;;#ASMEND 21306; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 21307; GFX900-NEXT: s_mov_b32 s9, s8 21308; GFX900-NEXT: ;;#ASMSTART 21309; GFX900-NEXT: ; use s[8:9] 21310; GFX900-NEXT: ;;#ASMEND 21311; GFX900-NEXT: s_setpc_b64 s[30:31] 21312; 21313; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_6_6_6: 21314; GFX90A: ; %bb.0: 21315; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21316; GFX90A-NEXT: ;;#ASMSTART 21317; GFX90A-NEXT: ; def s[4:5] 21318; GFX90A-NEXT: ;;#ASMEND 21319; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 21320; GFX90A-NEXT: s_mov_b32 s9, s8 21321; GFX90A-NEXT: ;;#ASMSTART 21322; GFX90A-NEXT: ; use s[8:9] 21323; GFX90A-NEXT: ;;#ASMEND 21324; GFX90A-NEXT: s_setpc_b64 s[30:31] 21325; 21326; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_6_6_6: 21327; GFX940: ; %bb.0: 21328; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21329; GFX940-NEXT: ;;#ASMSTART 21330; GFX940-NEXT: ; def s[0:1] 21331; GFX940-NEXT: ;;#ASMEND 21332; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 21333; GFX940-NEXT: s_mov_b32 s9, s8 21334; GFX940-NEXT: ;;#ASMSTART 21335; GFX940-NEXT: ; use s[8:9] 21336; GFX940-NEXT: ;;#ASMEND 21337; GFX940-NEXT: s_setpc_b64 s[30:31] 21338 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21339 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21340 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6> 21341 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21342 ret void 21343} 21344 21345define void @s_shuffle_v4i16_v4i16__7_6_6_6() { 21346; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_6_6: 21347; GFX900: ; %bb.0: 21348; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21349; GFX900-NEXT: ;;#ASMSTART 21350; GFX900-NEXT: ; def s[4:5] 21351; GFX900-NEXT: ;;#ASMEND 21352; GFX900-NEXT: s_lshr_b32 s4, s5, 16 21353; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 21354; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21355; GFX900-NEXT: ;;#ASMSTART 21356; GFX900-NEXT: ; use s[8:9] 21357; GFX900-NEXT: ;;#ASMEND 21358; GFX900-NEXT: s_setpc_b64 s[30:31] 21359; 21360; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_6_6: 21361; GFX90A: ; %bb.0: 21362; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21363; GFX90A-NEXT: ;;#ASMSTART 21364; GFX90A-NEXT: ; def s[4:5] 21365; GFX90A-NEXT: ;;#ASMEND 21366; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 21367; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 21368; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21369; GFX90A-NEXT: ;;#ASMSTART 21370; GFX90A-NEXT: ; use s[8:9] 21371; GFX90A-NEXT: ;;#ASMEND 21372; GFX90A-NEXT: s_setpc_b64 s[30:31] 21373; 21374; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_6_6: 21375; GFX940: ; %bb.0: 21376; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21377; GFX940-NEXT: ;;#ASMSTART 21378; GFX940-NEXT: ; def s[0:1] 21379; GFX940-NEXT: ;;#ASMEND 21380; GFX940-NEXT: s_lshr_b32 s0, s1, 16 21381; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 21382; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 21383; GFX940-NEXT: ;;#ASMSTART 21384; GFX940-NEXT: ; use s[8:9] 21385; GFX940-NEXT: ;;#ASMEND 21386; GFX940-NEXT: s_setpc_b64 s[30:31] 21387 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21388 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21389 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 6, i32 6> 21390 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21391 ret void 21392} 21393 21394define void @s_shuffle_v4i16_v4i16__7_u_6_6() { 21395; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_6_6: 21396; GFX900: ; %bb.0: 21397; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21398; GFX900-NEXT: ;;#ASMSTART 21399; GFX900-NEXT: ; def s[4:5] 21400; GFX900-NEXT: ;;#ASMEND 21401; GFX900-NEXT: s_lshr_b32 s8, s5, 16 21402; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21403; GFX900-NEXT: ;;#ASMSTART 21404; GFX900-NEXT: ; use s[8:9] 21405; GFX900-NEXT: ;;#ASMEND 21406; GFX900-NEXT: s_setpc_b64 s[30:31] 21407; 21408; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_6_6: 21409; GFX90A: ; %bb.0: 21410; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21411; GFX90A-NEXT: ;;#ASMSTART 21412; GFX90A-NEXT: ; def s[4:5] 21413; GFX90A-NEXT: ;;#ASMEND 21414; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 21415; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21416; GFX90A-NEXT: ;;#ASMSTART 21417; GFX90A-NEXT: ; use s[8:9] 21418; GFX90A-NEXT: ;;#ASMEND 21419; GFX90A-NEXT: s_setpc_b64 s[30:31] 21420; 21421; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_6_6: 21422; GFX940: ; %bb.0: 21423; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21424; GFX940-NEXT: ;;#ASMSTART 21425; GFX940-NEXT: ; def s[0:1] 21426; GFX940-NEXT: ;;#ASMEND 21427; GFX940-NEXT: s_lshr_b32 s8, s1, 16 21428; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 21429; GFX940-NEXT: ;;#ASMSTART 21430; GFX940-NEXT: ; use s[8:9] 21431; GFX940-NEXT: ;;#ASMEND 21432; GFX940-NEXT: s_setpc_b64 s[30:31] 21433 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21434 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21435 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 6, i32 6> 21436 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21437 ret void 21438} 21439 21440define void @s_shuffle_v4i16_v4i16__7_0_6_6() { 21441; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_6_6: 21442; GFX900: ; %bb.0: 21443; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21444; GFX900-NEXT: ;;#ASMSTART 21445; GFX900-NEXT: ; def s[4:5] 21446; GFX900-NEXT: ;;#ASMEND 21447; GFX900-NEXT: ;;#ASMSTART 21448; GFX900-NEXT: ; def s[6:7] 21449; GFX900-NEXT: ;;#ASMEND 21450; GFX900-NEXT: s_lshr_b32 s5, s7, 16 21451; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 21452; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21453; GFX900-NEXT: ;;#ASMSTART 21454; GFX900-NEXT: ; use s[8:9] 21455; GFX900-NEXT: ;;#ASMEND 21456; GFX900-NEXT: s_setpc_b64 s[30:31] 21457; 21458; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_6_6: 21459; GFX90A: ; %bb.0: 21460; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21461; GFX90A-NEXT: ;;#ASMSTART 21462; GFX90A-NEXT: ; def s[4:5] 21463; GFX90A-NEXT: ;;#ASMEND 21464; GFX90A-NEXT: ;;#ASMSTART 21465; GFX90A-NEXT: ; def s[6:7] 21466; GFX90A-NEXT: ;;#ASMEND 21467; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 21468; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 21469; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21470; GFX90A-NEXT: ;;#ASMSTART 21471; GFX90A-NEXT: ; use s[8:9] 21472; GFX90A-NEXT: ;;#ASMEND 21473; GFX90A-NEXT: s_setpc_b64 s[30:31] 21474; 21475; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_6_6: 21476; GFX940: ; %bb.0: 21477; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21478; GFX940-NEXT: ;;#ASMSTART 21479; GFX940-NEXT: ; def s[0:1] 21480; GFX940-NEXT: ;;#ASMEND 21481; GFX940-NEXT: ;;#ASMSTART 21482; GFX940-NEXT: ; def s[2:3] 21483; GFX940-NEXT: ;;#ASMEND 21484; GFX940-NEXT: s_lshr_b32 s1, s3, 16 21485; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 21486; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 21487; GFX940-NEXT: ;;#ASMSTART 21488; GFX940-NEXT: ; use s[8:9] 21489; GFX940-NEXT: ;;#ASMEND 21490; GFX940-NEXT: s_setpc_b64 s[30:31] 21491 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21492 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21493 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 6, i32 6> 21494 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21495 ret void 21496} 21497 21498define void @s_shuffle_v4i16_v4i16__7_1_6_6() { 21499; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_6_6: 21500; GFX900: ; %bb.0: 21501; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21502; GFX900-NEXT: ;;#ASMSTART 21503; GFX900-NEXT: ; def s[4:5] 21504; GFX900-NEXT: ;;#ASMEND 21505; GFX900-NEXT: ;;#ASMSTART 21506; GFX900-NEXT: ; def s[6:7] 21507; GFX900-NEXT: ;;#ASMEND 21508; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 21509; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21510; GFX900-NEXT: ;;#ASMSTART 21511; GFX900-NEXT: ; use s[8:9] 21512; GFX900-NEXT: ;;#ASMEND 21513; GFX900-NEXT: s_setpc_b64 s[30:31] 21514; 21515; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_6_6: 21516; GFX90A: ; %bb.0: 21517; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21518; GFX90A-NEXT: ;;#ASMSTART 21519; GFX90A-NEXT: ; def s[4:5] 21520; GFX90A-NEXT: ;;#ASMEND 21521; GFX90A-NEXT: ;;#ASMSTART 21522; GFX90A-NEXT: ; def s[6:7] 21523; GFX90A-NEXT: ;;#ASMEND 21524; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 21525; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21526; GFX90A-NEXT: ;;#ASMSTART 21527; GFX90A-NEXT: ; use s[8:9] 21528; GFX90A-NEXT: ;;#ASMEND 21529; GFX90A-NEXT: s_setpc_b64 s[30:31] 21530; 21531; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_6_6: 21532; GFX940: ; %bb.0: 21533; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21534; GFX940-NEXT: ;;#ASMSTART 21535; GFX940-NEXT: ; def s[0:1] 21536; GFX940-NEXT: ;;#ASMEND 21537; GFX940-NEXT: ;;#ASMSTART 21538; GFX940-NEXT: ; def s[2:3] 21539; GFX940-NEXT: ;;#ASMEND 21540; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 21541; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 21542; GFX940-NEXT: ;;#ASMSTART 21543; GFX940-NEXT: ; use s[8:9] 21544; GFX940-NEXT: ;;#ASMEND 21545; GFX940-NEXT: s_setpc_b64 s[30:31] 21546 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21547 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21548 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 6, i32 6> 21549 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21550 ret void 21551} 21552 21553define void @s_shuffle_v4i16_v4i16__7_2_6_6() { 21554; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_6_6: 21555; GFX900: ; %bb.0: 21556; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21557; GFX900-NEXT: ;;#ASMSTART 21558; GFX900-NEXT: ; def s[4:5] 21559; GFX900-NEXT: ;;#ASMEND 21560; GFX900-NEXT: ;;#ASMSTART 21561; GFX900-NEXT: ; def s[6:7] 21562; GFX900-NEXT: ;;#ASMEND 21563; GFX900-NEXT: s_lshr_b32 s4, s7, 16 21564; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 21565; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21566; GFX900-NEXT: ;;#ASMSTART 21567; GFX900-NEXT: ; use s[8:9] 21568; GFX900-NEXT: ;;#ASMEND 21569; GFX900-NEXT: s_setpc_b64 s[30:31] 21570; 21571; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_6_6: 21572; GFX90A: ; %bb.0: 21573; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21574; GFX90A-NEXT: ;;#ASMSTART 21575; GFX90A-NEXT: ; def s[4:5] 21576; GFX90A-NEXT: ;;#ASMEND 21577; GFX90A-NEXT: ;;#ASMSTART 21578; GFX90A-NEXT: ; def s[6:7] 21579; GFX90A-NEXT: ;;#ASMEND 21580; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 21581; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 21582; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21583; GFX90A-NEXT: ;;#ASMSTART 21584; GFX90A-NEXT: ; use s[8:9] 21585; GFX90A-NEXT: ;;#ASMEND 21586; GFX90A-NEXT: s_setpc_b64 s[30:31] 21587; 21588; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_6_6: 21589; GFX940: ; %bb.0: 21590; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21591; GFX940-NEXT: ;;#ASMSTART 21592; GFX940-NEXT: ; def s[0:1] 21593; GFX940-NEXT: ;;#ASMEND 21594; GFX940-NEXT: ;;#ASMSTART 21595; GFX940-NEXT: ; def s[2:3] 21596; GFX940-NEXT: ;;#ASMEND 21597; GFX940-NEXT: s_lshr_b32 s0, s3, 16 21598; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 21599; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 21600; GFX940-NEXT: ;;#ASMSTART 21601; GFX940-NEXT: ; use s[8:9] 21602; GFX940-NEXT: ;;#ASMEND 21603; GFX940-NEXT: s_setpc_b64 s[30:31] 21604 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21605 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21606 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 6, i32 6> 21607 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21608 ret void 21609} 21610 21611define void @s_shuffle_v4i16_v4i16__7_3_6_6() { 21612; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_6_6: 21613; GFX900: ; %bb.0: 21614; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21615; GFX900-NEXT: ;;#ASMSTART 21616; GFX900-NEXT: ; def s[4:5] 21617; GFX900-NEXT: ;;#ASMEND 21618; GFX900-NEXT: ;;#ASMSTART 21619; GFX900-NEXT: ; def s[6:7] 21620; GFX900-NEXT: ;;#ASMEND 21621; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 21622; GFX900-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21623; GFX900-NEXT: ;;#ASMSTART 21624; GFX900-NEXT: ; use s[8:9] 21625; GFX900-NEXT: ;;#ASMEND 21626; GFX900-NEXT: s_setpc_b64 s[30:31] 21627; 21628; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_6_6: 21629; GFX90A: ; %bb.0: 21630; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21631; GFX90A-NEXT: ;;#ASMSTART 21632; GFX90A-NEXT: ; def s[4:5] 21633; GFX90A-NEXT: ;;#ASMEND 21634; GFX90A-NEXT: ;;#ASMSTART 21635; GFX90A-NEXT: ; def s[6:7] 21636; GFX90A-NEXT: ;;#ASMEND 21637; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 21638; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s7, s7 21639; GFX90A-NEXT: ;;#ASMSTART 21640; GFX90A-NEXT: ; use s[8:9] 21641; GFX90A-NEXT: ;;#ASMEND 21642; GFX90A-NEXT: s_setpc_b64 s[30:31] 21643; 21644; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_6_6: 21645; GFX940: ; %bb.0: 21646; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21647; GFX940-NEXT: ;;#ASMSTART 21648; GFX940-NEXT: ; def s[0:1] 21649; GFX940-NEXT: ;;#ASMEND 21650; GFX940-NEXT: ;;#ASMSTART 21651; GFX940-NEXT: ; def s[2:3] 21652; GFX940-NEXT: ;;#ASMEND 21653; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 21654; GFX940-NEXT: s_pack_ll_b32_b16 s9, s3, s3 21655; GFX940-NEXT: ;;#ASMSTART 21656; GFX940-NEXT: ; use s[8:9] 21657; GFX940-NEXT: ;;#ASMEND 21658; GFX940-NEXT: s_setpc_b64 s[30:31] 21659 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21660 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21661 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 6, i32 6> 21662 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21663 ret void 21664} 21665 21666define void @s_shuffle_v4i16_v4i16__7_4_6_6() { 21667; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_6_6: 21668; GFX900: ; %bb.0: 21669; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21670; GFX900-NEXT: ;;#ASMSTART 21671; GFX900-NEXT: ; def s[4:5] 21672; GFX900-NEXT: ;;#ASMEND 21673; GFX900-NEXT: s_lshr_b32 s6, s5, 16 21674; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4 21675; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21676; GFX900-NEXT: ;;#ASMSTART 21677; GFX900-NEXT: ; use s[8:9] 21678; GFX900-NEXT: ;;#ASMEND 21679; GFX900-NEXT: s_setpc_b64 s[30:31] 21680; 21681; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_6_6: 21682; GFX90A: ; %bb.0: 21683; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21684; GFX90A-NEXT: ;;#ASMSTART 21685; GFX90A-NEXT: ; def s[4:5] 21686; GFX90A-NEXT: ;;#ASMEND 21687; GFX90A-NEXT: s_lshr_b32 s6, s5, 16 21688; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4 21689; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21690; GFX90A-NEXT: ;;#ASMSTART 21691; GFX90A-NEXT: ; use s[8:9] 21692; GFX90A-NEXT: ;;#ASMEND 21693; GFX90A-NEXT: s_setpc_b64 s[30:31] 21694; 21695; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_6_6: 21696; GFX940: ; %bb.0: 21697; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21698; GFX940-NEXT: ;;#ASMSTART 21699; GFX940-NEXT: ; def s[0:1] 21700; GFX940-NEXT: ;;#ASMEND 21701; GFX940-NEXT: s_lshr_b32 s2, s1, 16 21702; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 21703; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 21704; GFX940-NEXT: ;;#ASMSTART 21705; GFX940-NEXT: ; use s[8:9] 21706; GFX940-NEXT: ;;#ASMEND 21707; GFX940-NEXT: s_setpc_b64 s[30:31] 21708 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21709 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21710 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 6, i32 6> 21711 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21712 ret void 21713} 21714 21715define void @s_shuffle_v4i16_v4i16__7_5_6_6() { 21716; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_6_6: 21717; GFX900: ; %bb.0: 21718; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21719; GFX900-NEXT: ;;#ASMSTART 21720; GFX900-NEXT: ; def s[4:5] 21721; GFX900-NEXT: ;;#ASMEND 21722; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 21723; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21724; GFX900-NEXT: ;;#ASMSTART 21725; GFX900-NEXT: ; use s[8:9] 21726; GFX900-NEXT: ;;#ASMEND 21727; GFX900-NEXT: s_setpc_b64 s[30:31] 21728; 21729; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_6_6: 21730; GFX90A: ; %bb.0: 21731; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21732; GFX90A-NEXT: ;;#ASMSTART 21733; GFX90A-NEXT: ; def s[4:5] 21734; GFX90A-NEXT: ;;#ASMEND 21735; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 21736; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21737; GFX90A-NEXT: ;;#ASMSTART 21738; GFX90A-NEXT: ; use s[8:9] 21739; GFX90A-NEXT: ;;#ASMEND 21740; GFX90A-NEXT: s_setpc_b64 s[30:31] 21741; 21742; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_6_6: 21743; GFX940: ; %bb.0: 21744; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21745; GFX940-NEXT: ;;#ASMSTART 21746; GFX940-NEXT: ; def s[0:1] 21747; GFX940-NEXT: ;;#ASMEND 21748; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 21749; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 21750; GFX940-NEXT: ;;#ASMSTART 21751; GFX940-NEXT: ; use s[8:9] 21752; GFX940-NEXT: ;;#ASMEND 21753; GFX940-NEXT: s_setpc_b64 s[30:31] 21754 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21755 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21756 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 6, i32 6> 21757 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21758 ret void 21759} 21760 21761define void @s_shuffle_v4i16_v4i16__7_7_6_6() { 21762; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_6: 21763; GFX900: ; %bb.0: 21764; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21765; GFX900-NEXT: ;;#ASMSTART 21766; GFX900-NEXT: ; def s[4:5] 21767; GFX900-NEXT: ;;#ASMEND 21768; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 21769; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21770; GFX900-NEXT: ;;#ASMSTART 21771; GFX900-NEXT: ; use s[8:9] 21772; GFX900-NEXT: ;;#ASMEND 21773; GFX900-NEXT: s_setpc_b64 s[30:31] 21774; 21775; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_6: 21776; GFX90A: ; %bb.0: 21777; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21778; GFX90A-NEXT: ;;#ASMSTART 21779; GFX90A-NEXT: ; def s[4:5] 21780; GFX90A-NEXT: ;;#ASMEND 21781; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 21782; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s5 21783; GFX90A-NEXT: ;;#ASMSTART 21784; GFX90A-NEXT: ; use s[8:9] 21785; GFX90A-NEXT: ;;#ASMEND 21786; GFX90A-NEXT: s_setpc_b64 s[30:31] 21787; 21788; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_6: 21789; GFX940: ; %bb.0: 21790; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21791; GFX940-NEXT: ;;#ASMSTART 21792; GFX940-NEXT: ; def s[0:1] 21793; GFX940-NEXT: ;;#ASMEND 21794; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 21795; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s1 21796; GFX940-NEXT: ;;#ASMSTART 21797; GFX940-NEXT: ; use s[8:9] 21798; GFX940-NEXT: ;;#ASMEND 21799; GFX940-NEXT: s_setpc_b64 s[30:31] 21800 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21801 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21802 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 6> 21803 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21804 ret void 21805} 21806 21807define void @s_shuffle_v4i16_v4i16__7_7_u_6() { 21808; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_6: 21809; GFX900: ; %bb.0: 21810; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21811; GFX900-NEXT: ;;#ASMSTART 21812; GFX900-NEXT: ; def s[4:5] 21813; GFX900-NEXT: ;;#ASMEND 21814; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 21815; GFX900-NEXT: s_lshl_b32 s9, s5, 16 21816; GFX900-NEXT: ;;#ASMSTART 21817; GFX900-NEXT: ; use s[8:9] 21818; GFX900-NEXT: ;;#ASMEND 21819; GFX900-NEXT: s_setpc_b64 s[30:31] 21820; 21821; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_6: 21822; GFX90A: ; %bb.0: 21823; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21824; GFX90A-NEXT: ;;#ASMSTART 21825; GFX90A-NEXT: ; def s[4:5] 21826; GFX90A-NEXT: ;;#ASMEND 21827; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 21828; GFX90A-NEXT: s_lshl_b32 s9, s5, 16 21829; GFX90A-NEXT: ;;#ASMSTART 21830; GFX90A-NEXT: ; use s[8:9] 21831; GFX90A-NEXT: ;;#ASMEND 21832; GFX90A-NEXT: s_setpc_b64 s[30:31] 21833; 21834; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_6: 21835; GFX940: ; %bb.0: 21836; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21837; GFX940-NEXT: ;;#ASMSTART 21838; GFX940-NEXT: ; def s[0:1] 21839; GFX940-NEXT: ;;#ASMEND 21840; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 21841; GFX940-NEXT: s_lshl_b32 s9, s1, 16 21842; GFX940-NEXT: ;;#ASMSTART 21843; GFX940-NEXT: ; use s[8:9] 21844; GFX940-NEXT: ;;#ASMEND 21845; GFX940-NEXT: s_setpc_b64 s[30:31] 21846 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21847 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21848 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 6> 21849 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21850 ret void 21851} 21852 21853define void @s_shuffle_v4i16_v4i16__7_7_0_6() { 21854; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_6: 21855; GFX900: ; %bb.0: 21856; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21857; GFX900-NEXT: ;;#ASMSTART 21858; GFX900-NEXT: ; def s[4:5] 21859; GFX900-NEXT: ;;#ASMEND 21860; GFX900-NEXT: ;;#ASMSTART 21861; GFX900-NEXT: ; def s[6:7] 21862; GFX900-NEXT: ;;#ASMEND 21863; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s7 21864; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 21865; GFX900-NEXT: ;;#ASMSTART 21866; GFX900-NEXT: ; use s[8:9] 21867; GFX900-NEXT: ;;#ASMEND 21868; GFX900-NEXT: s_setpc_b64 s[30:31] 21869; 21870; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_6: 21871; GFX90A: ; %bb.0: 21872; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21873; GFX90A-NEXT: ;;#ASMSTART 21874; GFX90A-NEXT: ; def s[4:5] 21875; GFX90A-NEXT: ;;#ASMEND 21876; GFX90A-NEXT: ;;#ASMSTART 21877; GFX90A-NEXT: ; def s[6:7] 21878; GFX90A-NEXT: ;;#ASMEND 21879; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s7 21880; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 21881; GFX90A-NEXT: ;;#ASMSTART 21882; GFX90A-NEXT: ; use s[8:9] 21883; GFX90A-NEXT: ;;#ASMEND 21884; GFX90A-NEXT: s_setpc_b64 s[30:31] 21885; 21886; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_6: 21887; GFX940: ; %bb.0: 21888; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21889; GFX940-NEXT: ;;#ASMSTART 21890; GFX940-NEXT: ; def s[0:1] 21891; GFX940-NEXT: ;;#ASMEND 21892; GFX940-NEXT: ;;#ASMSTART 21893; GFX940-NEXT: ; def s[2:3] 21894; GFX940-NEXT: ;;#ASMEND 21895; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 21896; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 21897; GFX940-NEXT: ;;#ASMSTART 21898; GFX940-NEXT: ; use s[8:9] 21899; GFX940-NEXT: ;;#ASMEND 21900; GFX940-NEXT: s_setpc_b64 s[30:31] 21901 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21902 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21903 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 6> 21904 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21905 ret void 21906} 21907 21908define void @s_shuffle_v4i16_v4i16__7_7_1_6() { 21909; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_6: 21910; GFX900: ; %bb.0: 21911; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21912; GFX900-NEXT: ;;#ASMSTART 21913; GFX900-NEXT: ; def s[4:5] 21914; GFX900-NEXT: ;;#ASMEND 21915; GFX900-NEXT: s_lshr_b32 s4, s4, 16 21916; GFX900-NEXT: ;;#ASMSTART 21917; GFX900-NEXT: ; def s[6:7] 21918; GFX900-NEXT: ;;#ASMEND 21919; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s7 21920; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 21921; GFX900-NEXT: ;;#ASMSTART 21922; GFX900-NEXT: ; use s[8:9] 21923; GFX900-NEXT: ;;#ASMEND 21924; GFX900-NEXT: s_setpc_b64 s[30:31] 21925; 21926; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_6: 21927; GFX90A: ; %bb.0: 21928; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21929; GFX90A-NEXT: ;;#ASMSTART 21930; GFX90A-NEXT: ; def s[4:5] 21931; GFX90A-NEXT: ;;#ASMEND 21932; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 21933; GFX90A-NEXT: ;;#ASMSTART 21934; GFX90A-NEXT: ; def s[6:7] 21935; GFX90A-NEXT: ;;#ASMEND 21936; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s7 21937; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 21938; GFX90A-NEXT: ;;#ASMSTART 21939; GFX90A-NEXT: ; use s[8:9] 21940; GFX90A-NEXT: ;;#ASMEND 21941; GFX90A-NEXT: s_setpc_b64 s[30:31] 21942; 21943; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_6: 21944; GFX940: ; %bb.0: 21945; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21946; GFX940-NEXT: ;;#ASMSTART 21947; GFX940-NEXT: ; def s[0:1] 21948; GFX940-NEXT: ;;#ASMEND 21949; GFX940-NEXT: s_lshr_b32 s0, s0, 16 21950; GFX940-NEXT: ;;#ASMSTART 21951; GFX940-NEXT: ; def s[2:3] 21952; GFX940-NEXT: ;;#ASMEND 21953; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 21954; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 21955; GFX940-NEXT: ;;#ASMSTART 21956; GFX940-NEXT: ; use s[8:9] 21957; GFX940-NEXT: ;;#ASMEND 21958; GFX940-NEXT: s_setpc_b64 s[30:31] 21959 %vec0 = call <4 x i16> asm "; def $0", "=s"() 21960 %vec1 = call <4 x i16> asm "; def $0", "=s"() 21961 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 6> 21962 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 21963 ret void 21964} 21965 21966define void @s_shuffle_v4i16_v4i16__7_7_2_6() { 21967; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_6: 21968; GFX900: ; %bb.0: 21969; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21970; GFX900-NEXT: ;;#ASMSTART 21971; GFX900-NEXT: ; def s[4:5] 21972; GFX900-NEXT: ;;#ASMEND 21973; GFX900-NEXT: ;;#ASMSTART 21974; GFX900-NEXT: ; def s[6:7] 21975; GFX900-NEXT: ;;#ASMEND 21976; GFX900-NEXT: s_pack_ll_b32_b16 s9, s5, s7 21977; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 21978; GFX900-NEXT: ;;#ASMSTART 21979; GFX900-NEXT: ; use s[8:9] 21980; GFX900-NEXT: ;;#ASMEND 21981; GFX900-NEXT: s_setpc_b64 s[30:31] 21982; 21983; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_6: 21984; GFX90A: ; %bb.0: 21985; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21986; GFX90A-NEXT: ;;#ASMSTART 21987; GFX90A-NEXT: ; def s[4:5] 21988; GFX90A-NEXT: ;;#ASMEND 21989; GFX90A-NEXT: ;;#ASMSTART 21990; GFX90A-NEXT: ; def s[6:7] 21991; GFX90A-NEXT: ;;#ASMEND 21992; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s5, s7 21993; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 21994; GFX90A-NEXT: ;;#ASMSTART 21995; GFX90A-NEXT: ; use s[8:9] 21996; GFX90A-NEXT: ;;#ASMEND 21997; GFX90A-NEXT: s_setpc_b64 s[30:31] 21998; 21999; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_6: 22000; GFX940: ; %bb.0: 22001; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22002; GFX940-NEXT: ;;#ASMSTART 22003; GFX940-NEXT: ; def s[0:1] 22004; GFX940-NEXT: ;;#ASMEND 22005; GFX940-NEXT: ;;#ASMSTART 22006; GFX940-NEXT: ; def s[2:3] 22007; GFX940-NEXT: ;;#ASMEND 22008; GFX940-NEXT: s_pack_ll_b32_b16 s9, s1, s3 22009; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 22010; GFX940-NEXT: ;;#ASMSTART 22011; GFX940-NEXT: ; use s[8:9] 22012; GFX940-NEXT: ;;#ASMEND 22013; GFX940-NEXT: s_setpc_b64 s[30:31] 22014 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22015 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22016 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 6> 22017 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22018 ret void 22019} 22020 22021define void @s_shuffle_v4i16_v4i16__7_7_3_6() { 22022; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_6: 22023; GFX900: ; %bb.0: 22024; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22025; GFX900-NEXT: ;;#ASMSTART 22026; GFX900-NEXT: ; def s[4:5] 22027; GFX900-NEXT: ;;#ASMEND 22028; GFX900-NEXT: s_lshr_b32 s4, s5, 16 22029; GFX900-NEXT: ;;#ASMSTART 22030; GFX900-NEXT: ; def s[6:7] 22031; GFX900-NEXT: ;;#ASMEND 22032; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s7 22033; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 22034; GFX900-NEXT: ;;#ASMSTART 22035; GFX900-NEXT: ; use s[8:9] 22036; GFX900-NEXT: ;;#ASMEND 22037; GFX900-NEXT: s_setpc_b64 s[30:31] 22038; 22039; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_6: 22040; GFX90A: ; %bb.0: 22041; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22042; GFX90A-NEXT: ;;#ASMSTART 22043; GFX90A-NEXT: ; def s[4:5] 22044; GFX90A-NEXT: ;;#ASMEND 22045; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 22046; GFX90A-NEXT: ;;#ASMSTART 22047; GFX90A-NEXT: ; def s[6:7] 22048; GFX90A-NEXT: ;;#ASMEND 22049; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s7 22050; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 22051; GFX90A-NEXT: ;;#ASMSTART 22052; GFX90A-NEXT: ; use s[8:9] 22053; GFX90A-NEXT: ;;#ASMEND 22054; GFX90A-NEXT: s_setpc_b64 s[30:31] 22055; 22056; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_6: 22057; GFX940: ; %bb.0: 22058; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22059; GFX940-NEXT: ;;#ASMSTART 22060; GFX940-NEXT: ; def s[0:1] 22061; GFX940-NEXT: ;;#ASMEND 22062; GFX940-NEXT: s_lshr_b32 s0, s1, 16 22063; GFX940-NEXT: ;;#ASMSTART 22064; GFX940-NEXT: ; def s[2:3] 22065; GFX940-NEXT: ;;#ASMEND 22066; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s3 22067; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 22068; GFX940-NEXT: ;;#ASMSTART 22069; GFX940-NEXT: ; use s[8:9] 22070; GFX940-NEXT: ;;#ASMEND 22071; GFX940-NEXT: s_setpc_b64 s[30:31] 22072 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22073 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22074 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 6> 22075 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22076 ret void 22077} 22078 22079define void @s_shuffle_v4i16_v4i16__7_7_4_6() { 22080; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_6: 22081; GFX900: ; %bb.0: 22082; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22083; GFX900-NEXT: ;;#ASMSTART 22084; GFX900-NEXT: ; def s[4:5] 22085; GFX900-NEXT: ;;#ASMEND 22086; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 22087; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 22088; GFX900-NEXT: ;;#ASMSTART 22089; GFX900-NEXT: ; use s[8:9] 22090; GFX900-NEXT: ;;#ASMEND 22091; GFX900-NEXT: s_setpc_b64 s[30:31] 22092; 22093; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_6: 22094; GFX90A: ; %bb.0: 22095; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22096; GFX90A-NEXT: ;;#ASMSTART 22097; GFX90A-NEXT: ; def s[4:5] 22098; GFX90A-NEXT: ;;#ASMEND 22099; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 22100; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 22101; GFX90A-NEXT: ;;#ASMSTART 22102; GFX90A-NEXT: ; use s[8:9] 22103; GFX90A-NEXT: ;;#ASMEND 22104; GFX90A-NEXT: s_setpc_b64 s[30:31] 22105; 22106; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_6: 22107; GFX940: ; %bb.0: 22108; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22109; GFX940-NEXT: ;;#ASMSTART 22110; GFX940-NEXT: ; def s[0:1] 22111; GFX940-NEXT: ;;#ASMEND 22112; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 22113; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 22114; GFX940-NEXT: ;;#ASMSTART 22115; GFX940-NEXT: ; use s[8:9] 22116; GFX940-NEXT: ;;#ASMEND 22117; GFX940-NEXT: s_setpc_b64 s[30:31] 22118 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22119 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22120 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 6> 22121 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22122 ret void 22123} 22124 22125define void @s_shuffle_v4i16_v4i16__7_7_5_6() { 22126; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_6: 22127; GFX900: ; %bb.0: 22128; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22129; GFX900-NEXT: ;;#ASMSTART 22130; GFX900-NEXT: ; def s[4:5] 22131; GFX900-NEXT: ;;#ASMEND 22132; GFX900-NEXT: s_lshr_b32 s4, s4, 16 22133; GFX900-NEXT: s_pack_ll_b32_b16 s9, s4, s5 22134; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 22135; GFX900-NEXT: ;;#ASMSTART 22136; GFX900-NEXT: ; use s[8:9] 22137; GFX900-NEXT: ;;#ASMEND 22138; GFX900-NEXT: s_setpc_b64 s[30:31] 22139; 22140; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_6: 22141; GFX90A: ; %bb.0: 22142; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22143; GFX90A-NEXT: ;;#ASMSTART 22144; GFX90A-NEXT: ; def s[4:5] 22145; GFX90A-NEXT: ;;#ASMEND 22146; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 22147; GFX90A-NEXT: s_pack_ll_b32_b16 s9, s4, s5 22148; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 22149; GFX90A-NEXT: ;;#ASMSTART 22150; GFX90A-NEXT: ; use s[8:9] 22151; GFX90A-NEXT: ;;#ASMEND 22152; GFX90A-NEXT: s_setpc_b64 s[30:31] 22153; 22154; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_6: 22155; GFX940: ; %bb.0: 22156; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22157; GFX940-NEXT: ;;#ASMSTART 22158; GFX940-NEXT: ; def s[0:1] 22159; GFX940-NEXT: ;;#ASMEND 22160; GFX940-NEXT: s_lshr_b32 s0, s0, 16 22161; GFX940-NEXT: s_pack_ll_b32_b16 s9, s0, s1 22162; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 22163; GFX940-NEXT: ;;#ASMSTART 22164; GFX940-NEXT: ; use s[8:9] 22165; GFX940-NEXT: ;;#ASMEND 22166; GFX940-NEXT: s_setpc_b64 s[30:31] 22167 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22168 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22169 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 6> 22170 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22171 ret void 22172} 22173 22174define void @s_shuffle_v4i16_v4i16__u_7_7_7() { 22175; GFX900-LABEL: s_shuffle_v4i16_v4i16__u_7_7_7: 22176; GFX900: ; %bb.0: 22177; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22178; GFX900-NEXT: ;;#ASMSTART 22179; GFX900-NEXT: ; def s[4:5] 22180; GFX900-NEXT: ;;#ASMEND 22181; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22182; GFX900-NEXT: s_mov_b32 s8, s5 22183; GFX900-NEXT: ;;#ASMSTART 22184; GFX900-NEXT: ; use s[8:9] 22185; GFX900-NEXT: ;;#ASMEND 22186; GFX900-NEXT: s_setpc_b64 s[30:31] 22187; 22188; GFX90A-LABEL: s_shuffle_v4i16_v4i16__u_7_7_7: 22189; GFX90A: ; %bb.0: 22190; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22191; GFX90A-NEXT: ;;#ASMSTART 22192; GFX90A-NEXT: ; def s[4:5] 22193; GFX90A-NEXT: ;;#ASMEND 22194; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22195; GFX90A-NEXT: s_mov_b32 s8, s5 22196; GFX90A-NEXT: ;;#ASMSTART 22197; GFX90A-NEXT: ; use s[8:9] 22198; GFX90A-NEXT: ;;#ASMEND 22199; GFX90A-NEXT: s_setpc_b64 s[30:31] 22200; 22201; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_7_7_7: 22202; GFX940: ; %bb.0: 22203; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22204; GFX940-NEXT: ;;#ASMSTART 22205; GFX940-NEXT: ; def s[0:1] 22206; GFX940-NEXT: ;;#ASMEND 22207; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 22208; GFX940-NEXT: s_mov_b32 s8, s1 22209; GFX940-NEXT: ;;#ASMSTART 22210; GFX940-NEXT: ; use s[8:9] 22211; GFX940-NEXT: ;;#ASMEND 22212; GFX940-NEXT: s_setpc_b64 s[30:31] 22213 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22214 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22215 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 7, i32 7, i32 7> 22216 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22217 ret void 22218} 22219 22220define void @s_shuffle_v4i16_v4i16__0_7_7_7() { 22221; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_7_7_7: 22222; GFX900: ; %bb.0: 22223; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22224; GFX900-NEXT: ;;#ASMSTART 22225; GFX900-NEXT: ; def s[4:5] 22226; GFX900-NEXT: ;;#ASMEND 22227; GFX900-NEXT: ;;#ASMSTART 22228; GFX900-NEXT: ; def s[6:7] 22229; GFX900-NEXT: ;;#ASMEND 22230; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s7 22231; GFX900-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22232; GFX900-NEXT: ;;#ASMSTART 22233; GFX900-NEXT: ; use s[8:9] 22234; GFX900-NEXT: ;;#ASMEND 22235; GFX900-NEXT: s_setpc_b64 s[30:31] 22236; 22237; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_7_7_7: 22238; GFX90A: ; %bb.0: 22239; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22240; GFX90A-NEXT: ;;#ASMSTART 22241; GFX90A-NEXT: ; def s[4:5] 22242; GFX90A-NEXT: ;;#ASMEND 22243; GFX90A-NEXT: ;;#ASMSTART 22244; GFX90A-NEXT: ; def s[6:7] 22245; GFX90A-NEXT: ;;#ASMEND 22246; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s7 22247; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22248; GFX90A-NEXT: ;;#ASMSTART 22249; GFX90A-NEXT: ; use s[8:9] 22250; GFX90A-NEXT: ;;#ASMEND 22251; GFX90A-NEXT: s_setpc_b64 s[30:31] 22252; 22253; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_7_7_7: 22254; GFX940: ; %bb.0: 22255; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22256; GFX940-NEXT: ;;#ASMSTART 22257; GFX940-NEXT: ; def s[0:1] 22258; GFX940-NEXT: ;;#ASMEND 22259; GFX940-NEXT: ;;#ASMSTART 22260; GFX940-NEXT: ; def s[2:3] 22261; GFX940-NEXT: ;;#ASMEND 22262; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s3 22263; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 22264; GFX940-NEXT: ;;#ASMSTART 22265; GFX940-NEXT: ; use s[8:9] 22266; GFX940-NEXT: ;;#ASMEND 22267; GFX940-NEXT: s_setpc_b64 s[30:31] 22268 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22269 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22270 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 7, i32 7, i32 7> 22271 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22272 ret void 22273} 22274 22275define void @s_shuffle_v4i16_v4i16__1_7_7_7() { 22276; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_7_7_7: 22277; GFX900: ; %bb.0: 22278; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22279; GFX900-NEXT: ;;#ASMSTART 22280; GFX900-NEXT: ; def s[4:5] 22281; GFX900-NEXT: ;;#ASMEND 22282; GFX900-NEXT: ;;#ASMSTART 22283; GFX900-NEXT: ; def s[6:7] 22284; GFX900-NEXT: ;;#ASMEND 22285; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s7 22286; GFX900-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22287; GFX900-NEXT: ;;#ASMSTART 22288; GFX900-NEXT: ; use s[8:9] 22289; GFX900-NEXT: ;;#ASMEND 22290; GFX900-NEXT: s_setpc_b64 s[30:31] 22291; 22292; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_7_7_7: 22293; GFX90A: ; %bb.0: 22294; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22295; GFX90A-NEXT: ;;#ASMSTART 22296; GFX90A-NEXT: ; def s[4:5] 22297; GFX90A-NEXT: ;;#ASMEND 22298; GFX90A-NEXT: ;;#ASMSTART 22299; GFX90A-NEXT: ; def s[6:7] 22300; GFX90A-NEXT: ;;#ASMEND 22301; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s7 22302; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22303; GFX90A-NEXT: ;;#ASMSTART 22304; GFX90A-NEXT: ; use s[8:9] 22305; GFX90A-NEXT: ;;#ASMEND 22306; GFX90A-NEXT: s_setpc_b64 s[30:31] 22307; 22308; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_7_7_7: 22309; GFX940: ; %bb.0: 22310; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22311; GFX940-NEXT: ;;#ASMSTART 22312; GFX940-NEXT: ; def s[0:1] 22313; GFX940-NEXT: ;;#ASMEND 22314; GFX940-NEXT: ;;#ASMSTART 22315; GFX940-NEXT: ; def s[2:3] 22316; GFX940-NEXT: ;;#ASMEND 22317; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s3 22318; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 22319; GFX940-NEXT: ;;#ASMSTART 22320; GFX940-NEXT: ; use s[8:9] 22321; GFX940-NEXT: ;;#ASMEND 22322; GFX940-NEXT: s_setpc_b64 s[30:31] 22323 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22324 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22325 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 7, i32 7, i32 7> 22326 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22327 ret void 22328} 22329 22330define void @s_shuffle_v4i16_v4i16__2_7_7_7() { 22331; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_7_7_7: 22332; GFX900: ; %bb.0: 22333; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22334; GFX900-NEXT: ;;#ASMSTART 22335; GFX900-NEXT: ; def s[4:5] 22336; GFX900-NEXT: ;;#ASMEND 22337; GFX900-NEXT: ;;#ASMSTART 22338; GFX900-NEXT: ; def s[6:7] 22339; GFX900-NEXT: ;;#ASMEND 22340; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s7 22341; GFX900-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22342; GFX900-NEXT: ;;#ASMSTART 22343; GFX900-NEXT: ; use s[8:9] 22344; GFX900-NEXT: ;;#ASMEND 22345; GFX900-NEXT: s_setpc_b64 s[30:31] 22346; 22347; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_7_7_7: 22348; GFX90A: ; %bb.0: 22349; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22350; GFX90A-NEXT: ;;#ASMSTART 22351; GFX90A-NEXT: ; def s[4:5] 22352; GFX90A-NEXT: ;;#ASMEND 22353; GFX90A-NEXT: ;;#ASMSTART 22354; GFX90A-NEXT: ; def s[6:7] 22355; GFX90A-NEXT: ;;#ASMEND 22356; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s7 22357; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22358; GFX90A-NEXT: ;;#ASMSTART 22359; GFX90A-NEXT: ; use s[8:9] 22360; GFX90A-NEXT: ;;#ASMEND 22361; GFX90A-NEXT: s_setpc_b64 s[30:31] 22362; 22363; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_7_7_7: 22364; GFX940: ; %bb.0: 22365; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22366; GFX940-NEXT: ;;#ASMSTART 22367; GFX940-NEXT: ; def s[0:1] 22368; GFX940-NEXT: ;;#ASMEND 22369; GFX940-NEXT: ;;#ASMSTART 22370; GFX940-NEXT: ; def s[2:3] 22371; GFX940-NEXT: ;;#ASMEND 22372; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s3 22373; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 22374; GFX940-NEXT: ;;#ASMSTART 22375; GFX940-NEXT: ; use s[8:9] 22376; GFX940-NEXT: ;;#ASMEND 22377; GFX940-NEXT: s_setpc_b64 s[30:31] 22378 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22379 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22380 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 7, i32 7, i32 7> 22381 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22382 ret void 22383} 22384 22385define void @s_shuffle_v4i16_v4i16__3_7_7_7() { 22386; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_7_7_7: 22387; GFX900: ; %bb.0: 22388; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22389; GFX900-NEXT: ;;#ASMSTART 22390; GFX900-NEXT: ; def s[4:5] 22391; GFX900-NEXT: ;;#ASMEND 22392; GFX900-NEXT: ;;#ASMSTART 22393; GFX900-NEXT: ; def s[6:7] 22394; GFX900-NEXT: ;;#ASMEND 22395; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s7 22396; GFX900-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22397; GFX900-NEXT: ;;#ASMSTART 22398; GFX900-NEXT: ; use s[8:9] 22399; GFX900-NEXT: ;;#ASMEND 22400; GFX900-NEXT: s_setpc_b64 s[30:31] 22401; 22402; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_7_7_7: 22403; GFX90A: ; %bb.0: 22404; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22405; GFX90A-NEXT: ;;#ASMSTART 22406; GFX90A-NEXT: ; def s[4:5] 22407; GFX90A-NEXT: ;;#ASMEND 22408; GFX90A-NEXT: ;;#ASMSTART 22409; GFX90A-NEXT: ; def s[6:7] 22410; GFX90A-NEXT: ;;#ASMEND 22411; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s7 22412; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22413; GFX90A-NEXT: ;;#ASMSTART 22414; GFX90A-NEXT: ; use s[8:9] 22415; GFX90A-NEXT: ;;#ASMEND 22416; GFX90A-NEXT: s_setpc_b64 s[30:31] 22417; 22418; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_7_7_7: 22419; GFX940: ; %bb.0: 22420; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22421; GFX940-NEXT: ;;#ASMSTART 22422; GFX940-NEXT: ; def s[0:1] 22423; GFX940-NEXT: ;;#ASMEND 22424; GFX940-NEXT: ;;#ASMSTART 22425; GFX940-NEXT: ; def s[2:3] 22426; GFX940-NEXT: ;;#ASMEND 22427; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s3 22428; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 22429; GFX940-NEXT: ;;#ASMSTART 22430; GFX940-NEXT: ; use s[8:9] 22431; GFX940-NEXT: ;;#ASMEND 22432; GFX940-NEXT: s_setpc_b64 s[30:31] 22433 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22434 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22435 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7> 22436 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22437 ret void 22438} 22439 22440define void @s_shuffle_v4i16_v4i16__4_7_7_7() { 22441; GFX900-LABEL: s_shuffle_v4i16_v4i16__4_7_7_7: 22442; GFX900: ; %bb.0: 22443; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22444; GFX900-NEXT: ;;#ASMSTART 22445; GFX900-NEXT: ; def s[4:5] 22446; GFX900-NEXT: ;;#ASMEND 22447; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s5 22448; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22449; GFX900-NEXT: ;;#ASMSTART 22450; GFX900-NEXT: ; use s[8:9] 22451; GFX900-NEXT: ;;#ASMEND 22452; GFX900-NEXT: s_setpc_b64 s[30:31] 22453; 22454; GFX90A-LABEL: s_shuffle_v4i16_v4i16__4_7_7_7: 22455; GFX90A: ; %bb.0: 22456; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22457; GFX90A-NEXT: ;;#ASMSTART 22458; GFX90A-NEXT: ; def s[4:5] 22459; GFX90A-NEXT: ;;#ASMEND 22460; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s5 22461; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22462; GFX90A-NEXT: ;;#ASMSTART 22463; GFX90A-NEXT: ; use s[8:9] 22464; GFX90A-NEXT: ;;#ASMEND 22465; GFX90A-NEXT: s_setpc_b64 s[30:31] 22466; 22467; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_7_7_7: 22468; GFX940: ; %bb.0: 22469; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22470; GFX940-NEXT: ;;#ASMSTART 22471; GFX940-NEXT: ; def s[0:1] 22472; GFX940-NEXT: ;;#ASMEND 22473; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 22474; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 22475; GFX940-NEXT: ;;#ASMSTART 22476; GFX940-NEXT: ; use s[8:9] 22477; GFX940-NEXT: ;;#ASMEND 22478; GFX940-NEXT: s_setpc_b64 s[30:31] 22479 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22480 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22481 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7> 22482 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22483 ret void 22484} 22485 22486define void @s_shuffle_v4i16_v4i16__5_7_7_7() { 22487; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_7_7_7: 22488; GFX900: ; %bb.0: 22489; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22490; GFX900-NEXT: ;;#ASMSTART 22491; GFX900-NEXT: ; def s[4:5] 22492; GFX900-NEXT: ;;#ASMEND 22493; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s5 22494; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22495; GFX900-NEXT: ;;#ASMSTART 22496; GFX900-NEXT: ; use s[8:9] 22497; GFX900-NEXT: ;;#ASMEND 22498; GFX900-NEXT: s_setpc_b64 s[30:31] 22499; 22500; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_7_7_7: 22501; GFX90A: ; %bb.0: 22502; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22503; GFX90A-NEXT: ;;#ASMSTART 22504; GFX90A-NEXT: ; def s[4:5] 22505; GFX90A-NEXT: ;;#ASMEND 22506; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s5 22507; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22508; GFX90A-NEXT: ;;#ASMSTART 22509; GFX90A-NEXT: ; use s[8:9] 22510; GFX90A-NEXT: ;;#ASMEND 22511; GFX90A-NEXT: s_setpc_b64 s[30:31] 22512; 22513; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_7_7_7: 22514; GFX940: ; %bb.0: 22515; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22516; GFX940-NEXT: ;;#ASMSTART 22517; GFX940-NEXT: ; def s[0:1] 22518; GFX940-NEXT: ;;#ASMEND 22519; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 22520; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 22521; GFX940-NEXT: ;;#ASMSTART 22522; GFX940-NEXT: ; use s[8:9] 22523; GFX940-NEXT: ;;#ASMEND 22524; GFX940-NEXT: s_setpc_b64 s[30:31] 22525 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22526 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22527 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7> 22528 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22529 ret void 22530} 22531 22532define void @s_shuffle_v4i16_v4i16__6_7_7_7() { 22533; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_7_7_7: 22534; GFX900: ; %bb.0: 22535; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22536; GFX900-NEXT: ;;#ASMSTART 22537; GFX900-NEXT: ; def s[4:5] 22538; GFX900-NEXT: ;;#ASMEND 22539; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22540; GFX900-NEXT: s_mov_b32 s8, s5 22541; GFX900-NEXT: ;;#ASMSTART 22542; GFX900-NEXT: ; use s[8:9] 22543; GFX900-NEXT: ;;#ASMEND 22544; GFX900-NEXT: s_setpc_b64 s[30:31] 22545; 22546; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_7_7_7: 22547; GFX90A: ; %bb.0: 22548; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22549; GFX90A-NEXT: ;;#ASMSTART 22550; GFX90A-NEXT: ; def s[4:5] 22551; GFX90A-NEXT: ;;#ASMEND 22552; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22553; GFX90A-NEXT: s_mov_b32 s8, s5 22554; GFX90A-NEXT: ;;#ASMSTART 22555; GFX90A-NEXT: ; use s[8:9] 22556; GFX90A-NEXT: ;;#ASMEND 22557; GFX90A-NEXT: s_setpc_b64 s[30:31] 22558; 22559; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_7_7_7: 22560; GFX940: ; %bb.0: 22561; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22562; GFX940-NEXT: ;;#ASMSTART 22563; GFX940-NEXT: ; def s[0:1] 22564; GFX940-NEXT: ;;#ASMEND 22565; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 22566; GFX940-NEXT: s_mov_b32 s8, s1 22567; GFX940-NEXT: ;;#ASMSTART 22568; GFX940-NEXT: ; use s[8:9] 22569; GFX940-NEXT: ;;#ASMEND 22570; GFX940-NEXT: s_setpc_b64 s[30:31] 22571 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22572 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22573 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7> 22574 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22575 ret void 22576} 22577 22578define void @s_shuffle_v4i16_v4i16__7_u_7_7() { 22579; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_7_7: 22580; GFX900: ; %bb.0: 22581; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22582; GFX900-NEXT: ;;#ASMSTART 22583; GFX900-NEXT: ; def s[4:5] 22584; GFX900-NEXT: ;;#ASMEND 22585; GFX900-NEXT: s_lshr_b32 s8, s5, 16 22586; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22587; GFX900-NEXT: ;;#ASMSTART 22588; GFX900-NEXT: ; use s[8:9] 22589; GFX900-NEXT: ;;#ASMEND 22590; GFX900-NEXT: s_setpc_b64 s[30:31] 22591; 22592; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_7_7: 22593; GFX90A: ; %bb.0: 22594; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22595; GFX90A-NEXT: ;;#ASMSTART 22596; GFX90A-NEXT: ; def s[4:5] 22597; GFX90A-NEXT: ;;#ASMEND 22598; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 22599; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22600; GFX90A-NEXT: ;;#ASMSTART 22601; GFX90A-NEXT: ; use s[8:9] 22602; GFX90A-NEXT: ;;#ASMEND 22603; GFX90A-NEXT: s_setpc_b64 s[30:31] 22604; 22605; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_7_7: 22606; GFX940: ; %bb.0: 22607; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22608; GFX940-NEXT: ;;#ASMSTART 22609; GFX940-NEXT: ; def s[0:1] 22610; GFX940-NEXT: ;;#ASMEND 22611; GFX940-NEXT: s_lshr_b32 s8, s1, 16 22612; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 22613; GFX940-NEXT: ;;#ASMSTART 22614; GFX940-NEXT: ; use s[8:9] 22615; GFX940-NEXT: ;;#ASMEND 22616; GFX940-NEXT: s_setpc_b64 s[30:31] 22617 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22618 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22619 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 7, i32 7> 22620 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22621 ret void 22622} 22623 22624define void @s_shuffle_v4i16_v4i16__7_0_7_7() { 22625; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_7_7: 22626; GFX900: ; %bb.0: 22627; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22628; GFX900-NEXT: ;;#ASMSTART 22629; GFX900-NEXT: ; def s[4:5] 22630; GFX900-NEXT: ;;#ASMEND 22631; GFX900-NEXT: ;;#ASMSTART 22632; GFX900-NEXT: ; def s[6:7] 22633; GFX900-NEXT: ;;#ASMEND 22634; GFX900-NEXT: s_lshr_b32 s5, s7, 16 22635; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 22636; GFX900-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22637; GFX900-NEXT: ;;#ASMSTART 22638; GFX900-NEXT: ; use s[8:9] 22639; GFX900-NEXT: ;;#ASMEND 22640; GFX900-NEXT: s_setpc_b64 s[30:31] 22641; 22642; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_7_7: 22643; GFX90A: ; %bb.0: 22644; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22645; GFX90A-NEXT: ;;#ASMSTART 22646; GFX90A-NEXT: ; def s[4:5] 22647; GFX90A-NEXT: ;;#ASMEND 22648; GFX90A-NEXT: ;;#ASMSTART 22649; GFX90A-NEXT: ; def s[6:7] 22650; GFX90A-NEXT: ;;#ASMEND 22651; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 22652; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 22653; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22654; GFX90A-NEXT: ;;#ASMSTART 22655; GFX90A-NEXT: ; use s[8:9] 22656; GFX90A-NEXT: ;;#ASMEND 22657; GFX90A-NEXT: s_setpc_b64 s[30:31] 22658; 22659; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_7_7: 22660; GFX940: ; %bb.0: 22661; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22662; GFX940-NEXT: ;;#ASMSTART 22663; GFX940-NEXT: ; def s[0:1] 22664; GFX940-NEXT: ;;#ASMEND 22665; GFX940-NEXT: ;;#ASMSTART 22666; GFX940-NEXT: ; def s[2:3] 22667; GFX940-NEXT: ;;#ASMEND 22668; GFX940-NEXT: s_lshr_b32 s1, s3, 16 22669; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 22670; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 22671; GFX940-NEXT: ;;#ASMSTART 22672; GFX940-NEXT: ; use s[8:9] 22673; GFX940-NEXT: ;;#ASMEND 22674; GFX940-NEXT: s_setpc_b64 s[30:31] 22675 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22676 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22677 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 7, i32 7> 22678 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22679 ret void 22680} 22681 22682define void @s_shuffle_v4i16_v4i16__7_1_7_7() { 22683; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_7_7: 22684; GFX900: ; %bb.0: 22685; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22686; GFX900-NEXT: ;;#ASMSTART 22687; GFX900-NEXT: ; def s[4:5] 22688; GFX900-NEXT: ;;#ASMEND 22689; GFX900-NEXT: ;;#ASMSTART 22690; GFX900-NEXT: ; def s[6:7] 22691; GFX900-NEXT: ;;#ASMEND 22692; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 22693; GFX900-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22694; GFX900-NEXT: ;;#ASMSTART 22695; GFX900-NEXT: ; use s[8:9] 22696; GFX900-NEXT: ;;#ASMEND 22697; GFX900-NEXT: s_setpc_b64 s[30:31] 22698; 22699; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_7_7: 22700; GFX90A: ; %bb.0: 22701; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22702; GFX90A-NEXT: ;;#ASMSTART 22703; GFX90A-NEXT: ; def s[4:5] 22704; GFX90A-NEXT: ;;#ASMEND 22705; GFX90A-NEXT: ;;#ASMSTART 22706; GFX90A-NEXT: ; def s[6:7] 22707; GFX90A-NEXT: ;;#ASMEND 22708; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 22709; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22710; GFX90A-NEXT: ;;#ASMSTART 22711; GFX90A-NEXT: ; use s[8:9] 22712; GFX90A-NEXT: ;;#ASMEND 22713; GFX90A-NEXT: s_setpc_b64 s[30:31] 22714; 22715; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_7_7: 22716; GFX940: ; %bb.0: 22717; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22718; GFX940-NEXT: ;;#ASMSTART 22719; GFX940-NEXT: ; def s[0:1] 22720; GFX940-NEXT: ;;#ASMEND 22721; GFX940-NEXT: ;;#ASMSTART 22722; GFX940-NEXT: ; def s[2:3] 22723; GFX940-NEXT: ;;#ASMEND 22724; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 22725; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 22726; GFX940-NEXT: ;;#ASMSTART 22727; GFX940-NEXT: ; use s[8:9] 22728; GFX940-NEXT: ;;#ASMEND 22729; GFX940-NEXT: s_setpc_b64 s[30:31] 22730 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22731 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22732 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 7, i32 7> 22733 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22734 ret void 22735} 22736 22737define void @s_shuffle_v4i16_v4i16__7_2_7_7() { 22738; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_7_7: 22739; GFX900: ; %bb.0: 22740; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22741; GFX900-NEXT: ;;#ASMSTART 22742; GFX900-NEXT: ; def s[4:5] 22743; GFX900-NEXT: ;;#ASMEND 22744; GFX900-NEXT: ;;#ASMSTART 22745; GFX900-NEXT: ; def s[6:7] 22746; GFX900-NEXT: ;;#ASMEND 22747; GFX900-NEXT: s_lshr_b32 s4, s7, 16 22748; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 22749; GFX900-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22750; GFX900-NEXT: ;;#ASMSTART 22751; GFX900-NEXT: ; use s[8:9] 22752; GFX900-NEXT: ;;#ASMEND 22753; GFX900-NEXT: s_setpc_b64 s[30:31] 22754; 22755; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_7_7: 22756; GFX90A: ; %bb.0: 22757; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22758; GFX90A-NEXT: ;;#ASMSTART 22759; GFX90A-NEXT: ; def s[4:5] 22760; GFX90A-NEXT: ;;#ASMEND 22761; GFX90A-NEXT: ;;#ASMSTART 22762; GFX90A-NEXT: ; def s[6:7] 22763; GFX90A-NEXT: ;;#ASMEND 22764; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 22765; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 22766; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22767; GFX90A-NEXT: ;;#ASMSTART 22768; GFX90A-NEXT: ; use s[8:9] 22769; GFX90A-NEXT: ;;#ASMEND 22770; GFX90A-NEXT: s_setpc_b64 s[30:31] 22771; 22772; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_7_7: 22773; GFX940: ; %bb.0: 22774; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22775; GFX940-NEXT: ;;#ASMSTART 22776; GFX940-NEXT: ; def s[0:1] 22777; GFX940-NEXT: ;;#ASMEND 22778; GFX940-NEXT: ;;#ASMSTART 22779; GFX940-NEXT: ; def s[2:3] 22780; GFX940-NEXT: ;;#ASMEND 22781; GFX940-NEXT: s_lshr_b32 s0, s3, 16 22782; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 22783; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 22784; GFX940-NEXT: ;;#ASMSTART 22785; GFX940-NEXT: ; use s[8:9] 22786; GFX940-NEXT: ;;#ASMEND 22787; GFX940-NEXT: s_setpc_b64 s[30:31] 22788 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22789 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22790 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 7, i32 7> 22791 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22792 ret void 22793} 22794 22795define void @s_shuffle_v4i16_v4i16__7_3_7_7() { 22796; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_7_7: 22797; GFX900: ; %bb.0: 22798; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22799; GFX900-NEXT: ;;#ASMSTART 22800; GFX900-NEXT: ; def s[4:5] 22801; GFX900-NEXT: ;;#ASMEND 22802; GFX900-NEXT: ;;#ASMSTART 22803; GFX900-NEXT: ; def s[6:7] 22804; GFX900-NEXT: ;;#ASMEND 22805; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 22806; GFX900-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22807; GFX900-NEXT: ;;#ASMSTART 22808; GFX900-NEXT: ; use s[8:9] 22809; GFX900-NEXT: ;;#ASMEND 22810; GFX900-NEXT: s_setpc_b64 s[30:31] 22811; 22812; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_7_7: 22813; GFX90A: ; %bb.0: 22814; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22815; GFX90A-NEXT: ;;#ASMSTART 22816; GFX90A-NEXT: ; def s[4:5] 22817; GFX90A-NEXT: ;;#ASMEND 22818; GFX90A-NEXT: ;;#ASMSTART 22819; GFX90A-NEXT: ; def s[6:7] 22820; GFX90A-NEXT: ;;#ASMEND 22821; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 22822; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s7, s7 22823; GFX90A-NEXT: ;;#ASMSTART 22824; GFX90A-NEXT: ; use s[8:9] 22825; GFX90A-NEXT: ;;#ASMEND 22826; GFX90A-NEXT: s_setpc_b64 s[30:31] 22827; 22828; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_7_7: 22829; GFX940: ; %bb.0: 22830; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22831; GFX940-NEXT: ;;#ASMSTART 22832; GFX940-NEXT: ; def s[0:1] 22833; GFX940-NEXT: ;;#ASMEND 22834; GFX940-NEXT: ;;#ASMSTART 22835; GFX940-NEXT: ; def s[2:3] 22836; GFX940-NEXT: ;;#ASMEND 22837; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 22838; GFX940-NEXT: s_pack_hh_b32_b16 s9, s3, s3 22839; GFX940-NEXT: ;;#ASMSTART 22840; GFX940-NEXT: ; use s[8:9] 22841; GFX940-NEXT: ;;#ASMEND 22842; GFX940-NEXT: s_setpc_b64 s[30:31] 22843 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22844 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22845 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 7, i32 7> 22846 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22847 ret void 22848} 22849 22850define void @s_shuffle_v4i16_v4i16__7_4_7_7() { 22851; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_7_7: 22852; GFX900: ; %bb.0: 22853; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22854; GFX900-NEXT: ;;#ASMSTART 22855; GFX900-NEXT: ; def s[4:5] 22856; GFX900-NEXT: ;;#ASMEND 22857; GFX900-NEXT: s_lshr_b32 s6, s5, 16 22858; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4 22859; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22860; GFX900-NEXT: ;;#ASMSTART 22861; GFX900-NEXT: ; use s[8:9] 22862; GFX900-NEXT: ;;#ASMEND 22863; GFX900-NEXT: s_setpc_b64 s[30:31] 22864; 22865; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_7_7: 22866; GFX90A: ; %bb.0: 22867; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22868; GFX90A-NEXT: ;;#ASMSTART 22869; GFX90A-NEXT: ; def s[4:5] 22870; GFX90A-NEXT: ;;#ASMEND 22871; GFX90A-NEXT: s_lshr_b32 s6, s5, 16 22872; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4 22873; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22874; GFX90A-NEXT: ;;#ASMSTART 22875; GFX90A-NEXT: ; use s[8:9] 22876; GFX90A-NEXT: ;;#ASMEND 22877; GFX90A-NEXT: s_setpc_b64 s[30:31] 22878; 22879; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_7_7: 22880; GFX940: ; %bb.0: 22881; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22882; GFX940-NEXT: ;;#ASMSTART 22883; GFX940-NEXT: ; def s[0:1] 22884; GFX940-NEXT: ;;#ASMEND 22885; GFX940-NEXT: s_lshr_b32 s2, s1, 16 22886; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 22887; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 22888; GFX940-NEXT: ;;#ASMSTART 22889; GFX940-NEXT: ; use s[8:9] 22890; GFX940-NEXT: ;;#ASMEND 22891; GFX940-NEXT: s_setpc_b64 s[30:31] 22892 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22893 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22894 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 7, i32 7> 22895 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22896 ret void 22897} 22898 22899define void @s_shuffle_v4i16_v4i16__7_5_7_7() { 22900; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_7_7: 22901; GFX900: ; %bb.0: 22902; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22903; GFX900-NEXT: ;;#ASMSTART 22904; GFX900-NEXT: ; def s[4:5] 22905; GFX900-NEXT: ;;#ASMEND 22906; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 22907; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22908; GFX900-NEXT: ;;#ASMSTART 22909; GFX900-NEXT: ; use s[8:9] 22910; GFX900-NEXT: ;;#ASMEND 22911; GFX900-NEXT: s_setpc_b64 s[30:31] 22912; 22913; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_7_7: 22914; GFX90A: ; %bb.0: 22915; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22916; GFX90A-NEXT: ;;#ASMSTART 22917; GFX90A-NEXT: ; def s[4:5] 22918; GFX90A-NEXT: ;;#ASMEND 22919; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 22920; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22921; GFX90A-NEXT: ;;#ASMSTART 22922; GFX90A-NEXT: ; use s[8:9] 22923; GFX90A-NEXT: ;;#ASMEND 22924; GFX90A-NEXT: s_setpc_b64 s[30:31] 22925; 22926; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_7_7: 22927; GFX940: ; %bb.0: 22928; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22929; GFX940-NEXT: ;;#ASMSTART 22930; GFX940-NEXT: ; def s[0:1] 22931; GFX940-NEXT: ;;#ASMEND 22932; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 22933; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 22934; GFX940-NEXT: ;;#ASMSTART 22935; GFX940-NEXT: ; use s[8:9] 22936; GFX940-NEXT: ;;#ASMEND 22937; GFX940-NEXT: s_setpc_b64 s[30:31] 22938 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22939 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22940 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 7, i32 7> 22941 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22942 ret void 22943} 22944 22945define void @s_shuffle_v4i16_v4i16__7_6_7_7() { 22946; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_7_7: 22947; GFX900: ; %bb.0: 22948; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22949; GFX900-NEXT: ;;#ASMSTART 22950; GFX900-NEXT: ; def s[4:5] 22951; GFX900-NEXT: ;;#ASMEND 22952; GFX900-NEXT: s_lshr_b32 s4, s5, 16 22953; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 22954; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22955; GFX900-NEXT: ;;#ASMSTART 22956; GFX900-NEXT: ; use s[8:9] 22957; GFX900-NEXT: ;;#ASMEND 22958; GFX900-NEXT: s_setpc_b64 s[30:31] 22959; 22960; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_7_7: 22961; GFX90A: ; %bb.0: 22962; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22963; GFX90A-NEXT: ;;#ASMSTART 22964; GFX90A-NEXT: ; def s[4:5] 22965; GFX90A-NEXT: ;;#ASMEND 22966; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 22967; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 22968; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s5 22969; GFX90A-NEXT: ;;#ASMSTART 22970; GFX90A-NEXT: ; use s[8:9] 22971; GFX90A-NEXT: ;;#ASMEND 22972; GFX90A-NEXT: s_setpc_b64 s[30:31] 22973; 22974; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_7_7: 22975; GFX940: ; %bb.0: 22976; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22977; GFX940-NEXT: ;;#ASMSTART 22978; GFX940-NEXT: ; def s[0:1] 22979; GFX940-NEXT: ;;#ASMEND 22980; GFX940-NEXT: s_lshr_b32 s0, s1, 16 22981; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 22982; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s1 22983; GFX940-NEXT: ;;#ASMSTART 22984; GFX940-NEXT: ; use s[8:9] 22985; GFX940-NEXT: ;;#ASMEND 22986; GFX940-NEXT: s_setpc_b64 s[30:31] 22987 %vec0 = call <4 x i16> asm "; def $0", "=s"() 22988 %vec1 = call <4 x i16> asm "; def $0", "=s"() 22989 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 7, i32 7> 22990 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 22991 ret void 22992} 22993 22994define void @s_shuffle_v4i16_v4i16__7_7_u_7() { 22995; GFX9-LABEL: s_shuffle_v4i16_v4i16__7_7_u_7: 22996; GFX9: ; %bb.0: 22997; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22998; GFX9-NEXT: ;;#ASMSTART 22999; GFX9-NEXT: ; def s[8:9] 23000; GFX9-NEXT: ;;#ASMEND 23001; GFX9-NEXT: s_pack_hh_b32_b16 s8, s9, s9 23002; GFX9-NEXT: ;;#ASMSTART 23003; GFX9-NEXT: ; use s[8:9] 23004; GFX9-NEXT: ;;#ASMEND 23005; GFX9-NEXT: s_setpc_b64 s[30:31] 23006 %vec0 = call <4 x i16> asm "; def $0", "=s"() 23007 %vec1 = call <4 x i16> asm "; def $0", "=s"() 23008 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 7> 23009 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 23010 ret void 23011} 23012 23013define void @s_shuffle_v4i16_v4i16__7_7_0_7() { 23014; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_7: 23015; GFX900: ; %bb.0: 23016; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23017; GFX900-NEXT: ;;#ASMSTART 23018; GFX900-NEXT: ; def s[4:5] 23019; GFX900-NEXT: ;;#ASMEND 23020; GFX900-NEXT: ;;#ASMSTART 23021; GFX900-NEXT: ; def s[6:7] 23022; GFX900-NEXT: ;;#ASMEND 23023; GFX900-NEXT: s_pack_lh_b32_b16 s9, s4, s7 23024; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 23025; GFX900-NEXT: ;;#ASMSTART 23026; GFX900-NEXT: ; use s[8:9] 23027; GFX900-NEXT: ;;#ASMEND 23028; GFX900-NEXT: s_setpc_b64 s[30:31] 23029; 23030; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_7: 23031; GFX90A: ; %bb.0: 23032; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23033; GFX90A-NEXT: ;;#ASMSTART 23034; GFX90A-NEXT: ; def s[4:5] 23035; GFX90A-NEXT: ;;#ASMEND 23036; GFX90A-NEXT: ;;#ASMSTART 23037; GFX90A-NEXT: ; def s[6:7] 23038; GFX90A-NEXT: ;;#ASMEND 23039; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s4, s7 23040; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 23041; GFX90A-NEXT: ;;#ASMSTART 23042; GFX90A-NEXT: ; use s[8:9] 23043; GFX90A-NEXT: ;;#ASMEND 23044; GFX90A-NEXT: s_setpc_b64 s[30:31] 23045; 23046; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_7: 23047; GFX940: ; %bb.0: 23048; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23049; GFX940-NEXT: ;;#ASMSTART 23050; GFX940-NEXT: ; def s[0:1] 23051; GFX940-NEXT: ;;#ASMEND 23052; GFX940-NEXT: ;;#ASMSTART 23053; GFX940-NEXT: ; def s[2:3] 23054; GFX940-NEXT: ;;#ASMEND 23055; GFX940-NEXT: s_pack_lh_b32_b16 s9, s0, s3 23056; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 23057; GFX940-NEXT: ;;#ASMSTART 23058; GFX940-NEXT: ; use s[8:9] 23059; GFX940-NEXT: ;;#ASMEND 23060; GFX940-NEXT: s_setpc_b64 s[30:31] 23061 %vec0 = call <4 x i16> asm "; def $0", "=s"() 23062 %vec1 = call <4 x i16> asm "; def $0", "=s"() 23063 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 7> 23064 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 23065 ret void 23066} 23067 23068define void @s_shuffle_v4i16_v4i16__7_7_1_7() { 23069; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_7: 23070; GFX900: ; %bb.0: 23071; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23072; GFX900-NEXT: ;;#ASMSTART 23073; GFX900-NEXT: ; def s[4:5] 23074; GFX900-NEXT: ;;#ASMEND 23075; GFX900-NEXT: ;;#ASMSTART 23076; GFX900-NEXT: ; def s[6:7] 23077; GFX900-NEXT: ;;#ASMEND 23078; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s7 23079; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 23080; GFX900-NEXT: ;;#ASMSTART 23081; GFX900-NEXT: ; use s[8:9] 23082; GFX900-NEXT: ;;#ASMEND 23083; GFX900-NEXT: s_setpc_b64 s[30:31] 23084; 23085; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_7: 23086; GFX90A: ; %bb.0: 23087; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23088; GFX90A-NEXT: ;;#ASMSTART 23089; GFX90A-NEXT: ; def s[4:5] 23090; GFX90A-NEXT: ;;#ASMEND 23091; GFX90A-NEXT: ;;#ASMSTART 23092; GFX90A-NEXT: ; def s[6:7] 23093; GFX90A-NEXT: ;;#ASMEND 23094; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s7 23095; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 23096; GFX90A-NEXT: ;;#ASMSTART 23097; GFX90A-NEXT: ; use s[8:9] 23098; GFX90A-NEXT: ;;#ASMEND 23099; GFX90A-NEXT: s_setpc_b64 s[30:31] 23100; 23101; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_7: 23102; GFX940: ; %bb.0: 23103; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23104; GFX940-NEXT: ;;#ASMSTART 23105; GFX940-NEXT: ; def s[0:1] 23106; GFX940-NEXT: ;;#ASMEND 23107; GFX940-NEXT: ;;#ASMSTART 23108; GFX940-NEXT: ; def s[2:3] 23109; GFX940-NEXT: ;;#ASMEND 23110; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s3 23111; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 23112; GFX940-NEXT: ;;#ASMSTART 23113; GFX940-NEXT: ; use s[8:9] 23114; GFX940-NEXT: ;;#ASMEND 23115; GFX940-NEXT: s_setpc_b64 s[30:31] 23116 %vec0 = call <4 x i16> asm "; def $0", "=s"() 23117 %vec1 = call <4 x i16> asm "; def $0", "=s"() 23118 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 7> 23119 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 23120 ret void 23121} 23122 23123define void @s_shuffle_v4i16_v4i16__7_7_2_7() { 23124; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_7: 23125; GFX900: ; %bb.0: 23126; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23127; GFX900-NEXT: ;;#ASMSTART 23128; GFX900-NEXT: ; def s[4:5] 23129; GFX900-NEXT: ;;#ASMEND 23130; GFX900-NEXT: ;;#ASMSTART 23131; GFX900-NEXT: ; def s[6:7] 23132; GFX900-NEXT: ;;#ASMEND 23133; GFX900-NEXT: s_pack_lh_b32_b16 s9, s5, s7 23134; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 23135; GFX900-NEXT: ;;#ASMSTART 23136; GFX900-NEXT: ; use s[8:9] 23137; GFX900-NEXT: ;;#ASMEND 23138; GFX900-NEXT: s_setpc_b64 s[30:31] 23139; 23140; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_7: 23141; GFX90A: ; %bb.0: 23142; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23143; GFX90A-NEXT: ;;#ASMSTART 23144; GFX90A-NEXT: ; def s[4:5] 23145; GFX90A-NEXT: ;;#ASMEND 23146; GFX90A-NEXT: ;;#ASMSTART 23147; GFX90A-NEXT: ; def s[6:7] 23148; GFX90A-NEXT: ;;#ASMEND 23149; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s5, s7 23150; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 23151; GFX90A-NEXT: ;;#ASMSTART 23152; GFX90A-NEXT: ; use s[8:9] 23153; GFX90A-NEXT: ;;#ASMEND 23154; GFX90A-NEXT: s_setpc_b64 s[30:31] 23155; 23156; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_7: 23157; GFX940: ; %bb.0: 23158; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23159; GFX940-NEXT: ;;#ASMSTART 23160; GFX940-NEXT: ; def s[0:1] 23161; GFX940-NEXT: ;;#ASMEND 23162; GFX940-NEXT: ;;#ASMSTART 23163; GFX940-NEXT: ; def s[2:3] 23164; GFX940-NEXT: ;;#ASMEND 23165; GFX940-NEXT: s_pack_lh_b32_b16 s9, s1, s3 23166; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 23167; GFX940-NEXT: ;;#ASMSTART 23168; GFX940-NEXT: ; use s[8:9] 23169; GFX940-NEXT: ;;#ASMEND 23170; GFX940-NEXT: s_setpc_b64 s[30:31] 23171 %vec0 = call <4 x i16> asm "; def $0", "=s"() 23172 %vec1 = call <4 x i16> asm "; def $0", "=s"() 23173 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 7> 23174 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 23175 ret void 23176} 23177 23178define void @s_shuffle_v4i16_v4i16__7_7_3_7() { 23179; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_7: 23180; GFX900: ; %bb.0: 23181; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23182; GFX900-NEXT: ;;#ASMSTART 23183; GFX900-NEXT: ; def s[4:5] 23184; GFX900-NEXT: ;;#ASMEND 23185; GFX900-NEXT: ;;#ASMSTART 23186; GFX900-NEXT: ; def s[6:7] 23187; GFX900-NEXT: ;;#ASMEND 23188; GFX900-NEXT: s_pack_hh_b32_b16 s9, s5, s7 23189; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 23190; GFX900-NEXT: ;;#ASMSTART 23191; GFX900-NEXT: ; use s[8:9] 23192; GFX900-NEXT: ;;#ASMEND 23193; GFX900-NEXT: s_setpc_b64 s[30:31] 23194; 23195; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_7: 23196; GFX90A: ; %bb.0: 23197; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23198; GFX90A-NEXT: ;;#ASMSTART 23199; GFX90A-NEXT: ; def s[4:5] 23200; GFX90A-NEXT: ;;#ASMEND 23201; GFX90A-NEXT: ;;#ASMSTART 23202; GFX90A-NEXT: ; def s[6:7] 23203; GFX90A-NEXT: ;;#ASMEND 23204; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s5, s7 23205; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 23206; GFX90A-NEXT: ;;#ASMSTART 23207; GFX90A-NEXT: ; use s[8:9] 23208; GFX90A-NEXT: ;;#ASMEND 23209; GFX90A-NEXT: s_setpc_b64 s[30:31] 23210; 23211; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_7: 23212; GFX940: ; %bb.0: 23213; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23214; GFX940-NEXT: ;;#ASMSTART 23215; GFX940-NEXT: ; def s[0:1] 23216; GFX940-NEXT: ;;#ASMEND 23217; GFX940-NEXT: ;;#ASMSTART 23218; GFX940-NEXT: ; def s[2:3] 23219; GFX940-NEXT: ;;#ASMEND 23220; GFX940-NEXT: s_pack_hh_b32_b16 s9, s1, s3 23221; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 23222; GFX940-NEXT: ;;#ASMSTART 23223; GFX940-NEXT: ; use s[8:9] 23224; GFX940-NEXT: ;;#ASMEND 23225; GFX940-NEXT: s_setpc_b64 s[30:31] 23226 %vec0 = call <4 x i16> asm "; def $0", "=s"() 23227 %vec1 = call <4 x i16> asm "; def $0", "=s"() 23228 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 7> 23229 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 23230 ret void 23231} 23232 23233define void @s_shuffle_v4i16_v4i16__7_7_4_7() { 23234; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_7: 23235; GFX900: ; %bb.0: 23236; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23237; GFX900-NEXT: ;;#ASMSTART 23238; GFX900-NEXT: ; def s[4:5] 23239; GFX900-NEXT: ;;#ASMEND 23240; GFX900-NEXT: s_pack_lh_b32_b16 s9, s4, s5 23241; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 23242; GFX900-NEXT: ;;#ASMSTART 23243; GFX900-NEXT: ; use s[8:9] 23244; GFX900-NEXT: ;;#ASMEND 23245; GFX900-NEXT: s_setpc_b64 s[30:31] 23246; 23247; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_7: 23248; GFX90A: ; %bb.0: 23249; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23250; GFX90A-NEXT: ;;#ASMSTART 23251; GFX90A-NEXT: ; def s[4:5] 23252; GFX90A-NEXT: ;;#ASMEND 23253; GFX90A-NEXT: s_pack_lh_b32_b16 s9, s4, s5 23254; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 23255; GFX90A-NEXT: ;;#ASMSTART 23256; GFX90A-NEXT: ; use s[8:9] 23257; GFX90A-NEXT: ;;#ASMEND 23258; GFX90A-NEXT: s_setpc_b64 s[30:31] 23259; 23260; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_7: 23261; GFX940: ; %bb.0: 23262; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23263; GFX940-NEXT: ;;#ASMSTART 23264; GFX940-NEXT: ; def s[0:1] 23265; GFX940-NEXT: ;;#ASMEND 23266; GFX940-NEXT: s_pack_lh_b32_b16 s9, s0, s1 23267; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 23268; GFX940-NEXT: ;;#ASMSTART 23269; GFX940-NEXT: ; use s[8:9] 23270; GFX940-NEXT: ;;#ASMEND 23271; GFX940-NEXT: s_setpc_b64 s[30:31] 23272 %vec0 = call <4 x i16> asm "; def $0", "=s"() 23273 %vec1 = call <4 x i16> asm "; def $0", "=s"() 23274 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 7> 23275 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 23276 ret void 23277} 23278 23279define void @s_shuffle_v4i16_v4i16__7_7_5_7() { 23280; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_7: 23281; GFX900: ; %bb.0: 23282; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23283; GFX900-NEXT: ;;#ASMSTART 23284; GFX900-NEXT: ; def s[4:5] 23285; GFX900-NEXT: ;;#ASMEND 23286; GFX900-NEXT: s_pack_hh_b32_b16 s9, s4, s5 23287; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 23288; GFX900-NEXT: ;;#ASMSTART 23289; GFX900-NEXT: ; use s[8:9] 23290; GFX900-NEXT: ;;#ASMEND 23291; GFX900-NEXT: s_setpc_b64 s[30:31] 23292; 23293; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_7: 23294; GFX90A: ; %bb.0: 23295; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23296; GFX90A-NEXT: ;;#ASMSTART 23297; GFX90A-NEXT: ; def s[4:5] 23298; GFX90A-NEXT: ;;#ASMEND 23299; GFX90A-NEXT: s_pack_hh_b32_b16 s9, s4, s5 23300; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 23301; GFX90A-NEXT: ;;#ASMSTART 23302; GFX90A-NEXT: ; use s[8:9] 23303; GFX90A-NEXT: ;;#ASMEND 23304; GFX90A-NEXT: s_setpc_b64 s[30:31] 23305; 23306; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_7: 23307; GFX940: ; %bb.0: 23308; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23309; GFX940-NEXT: ;;#ASMSTART 23310; GFX940-NEXT: ; def s[0:1] 23311; GFX940-NEXT: ;;#ASMEND 23312; GFX940-NEXT: s_pack_hh_b32_b16 s9, s0, s1 23313; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 23314; GFX940-NEXT: ;;#ASMSTART 23315; GFX940-NEXT: ; use s[8:9] 23316; GFX940-NEXT: ;;#ASMEND 23317; GFX940-NEXT: s_setpc_b64 s[30:31] 23318 %vec0 = call <4 x i16> asm "; def $0", "=s"() 23319 %vec1 = call <4 x i16> asm "; def $0", "=s"() 23320 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 7> 23321 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 23322 ret void 23323} 23324 23325define void @s_shuffle_v4i16_v4i16__7_7_6_7() { 23326; GFX9-LABEL: s_shuffle_v4i16_v4i16__7_7_6_7: 23327; GFX9: ; %bb.0: 23328; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23329; GFX9-NEXT: ;;#ASMSTART 23330; GFX9-NEXT: ; def s[8:9] 23331; GFX9-NEXT: ;;#ASMEND 23332; GFX9-NEXT: s_pack_hh_b32_b16 s8, s9, s9 23333; GFX9-NEXT: ;;#ASMSTART 23334; GFX9-NEXT: ; use s[8:9] 23335; GFX9-NEXT: ;;#ASMEND 23336; GFX9-NEXT: s_setpc_b64 s[30:31] 23337 %vec0 = call <4 x i16> asm "; def $0", "=s"() 23338 %vec1 = call <4 x i16> asm "; def $0", "=s"() 23339 %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 7> 23340 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf) 23341 ret void 23342} 23343;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 23344; GFX90APLUS: {{.*}} 23345