1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v2i16_v8i16__u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v2i16_v8i16__u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> poison 14 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 15 ret void 16} 17 18define void @v_shuffle_v2i16_v8i16__0_u(ptr addrspace(1) inreg %ptr) { 19; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_u: 20; GFX900: ; %bb.0: 21; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX900-NEXT: v_mov_b32_e32 v4, 0 23; GFX900-NEXT: ;;#ASMSTART 24; GFX900-NEXT: ; def v[0:3] 25; GFX900-NEXT: ;;#ASMEND 26; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 27; GFX900-NEXT: s_waitcnt vmcnt(0) 28; GFX900-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_u: 31; GFX90A: ; %bb.0: 32; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX90A-NEXT: v_mov_b32_e32 v4, 0 34; GFX90A-NEXT: ;;#ASMSTART 35; GFX90A-NEXT: ; def v[0:3] 36; GFX90A-NEXT: ;;#ASMEND 37; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 38; GFX90A-NEXT: s_waitcnt vmcnt(0) 39; GFX90A-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_u: 42; GFX940: ; %bb.0: 43; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX940-NEXT: v_mov_b32_e32 v4, 0 45; GFX940-NEXT: ;;#ASMSTART 46; GFX940-NEXT: ; def v[0:3] 47; GFX940-NEXT: ;;#ASMEND 48; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 49; GFX940-NEXT: s_waitcnt vmcnt(0) 50; GFX940-NEXT: s_setpc_b64 s[30:31] 51 %vec0 = call <8 x i16> asm "; def $0", "=v"() 52 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 poison> 53 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 54 ret void 55} 56 57define void @v_shuffle_v2i16_v8i16__1_u(ptr addrspace(1) inreg %ptr) { 58; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_u: 59; GFX900: ; %bb.0: 60; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GFX900-NEXT: ;;#ASMSTART 62; GFX900-NEXT: ; def v[0:3] 63; GFX900-NEXT: ;;#ASMEND 64; GFX900-NEXT: v_mov_b32_e32 v4, 0 65; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 66; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 67; GFX900-NEXT: s_waitcnt vmcnt(0) 68; GFX900-NEXT: s_setpc_b64 s[30:31] 69; 70; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_u: 71; GFX90A: ; %bb.0: 72; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX90A-NEXT: ;;#ASMSTART 74; GFX90A-NEXT: ; def v[0:3] 75; GFX90A-NEXT: ;;#ASMEND 76; GFX90A-NEXT: v_mov_b32_e32 v4, 0 77; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 78; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 79; GFX90A-NEXT: s_waitcnt vmcnt(0) 80; GFX90A-NEXT: s_setpc_b64 s[30:31] 81; 82; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_u: 83; GFX940: ; %bb.0: 84; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GFX940-NEXT: ;;#ASMSTART 86; GFX940-NEXT: ; def v[0:3] 87; GFX940-NEXT: ;;#ASMEND 88; GFX940-NEXT: v_mov_b32_e32 v4, 0 89; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 90; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 91; GFX940-NEXT: s_waitcnt vmcnt(0) 92; GFX940-NEXT: s_setpc_b64 s[30:31] 93 %vec0 = call <8 x i16> asm "; def $0", "=v"() 94 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 poison> 95 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 96 ret void 97} 98 99define void @v_shuffle_v2i16_v8i16__2_u(ptr addrspace(1) inreg %ptr) { 100; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_u: 101; GFX900: ; %bb.0: 102; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX900-NEXT: v_mov_b32_e32 v4, 0 104; GFX900-NEXT: ;;#ASMSTART 105; GFX900-NEXT: ; def v[0:3] 106; GFX900-NEXT: ;;#ASMEND 107; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 108; GFX900-NEXT: s_waitcnt vmcnt(0) 109; GFX900-NEXT: s_setpc_b64 s[30:31] 110; 111; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_u: 112; GFX90A: ; %bb.0: 113; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; GFX90A-NEXT: v_mov_b32_e32 v4, 0 115; GFX90A-NEXT: ;;#ASMSTART 116; GFX90A-NEXT: ; def v[0:3] 117; GFX90A-NEXT: ;;#ASMEND 118; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 119; GFX90A-NEXT: s_waitcnt vmcnt(0) 120; GFX90A-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_u: 123; GFX940: ; %bb.0: 124; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX940-NEXT: v_mov_b32_e32 v4, 0 126; GFX940-NEXT: ;;#ASMSTART 127; GFX940-NEXT: ; def v[0:3] 128; GFX940-NEXT: ;;#ASMEND 129; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 130; GFX940-NEXT: s_waitcnt vmcnt(0) 131; GFX940-NEXT: s_setpc_b64 s[30:31] 132 %vec0 = call <8 x i16> asm "; def $0", "=v"() 133 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 poison> 134 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 135 ret void 136} 137 138define void @v_shuffle_v2i16_v8i16__3_u(ptr addrspace(1) inreg %ptr) { 139; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_u: 140; GFX900: ; %bb.0: 141; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GFX900-NEXT: ;;#ASMSTART 143; GFX900-NEXT: ; def v[0:3] 144; GFX900-NEXT: ;;#ASMEND 145; GFX900-NEXT: v_mov_b32_e32 v4, 0 146; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 147; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 148; GFX900-NEXT: s_waitcnt vmcnt(0) 149; GFX900-NEXT: s_setpc_b64 s[30:31] 150; 151; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_u: 152; GFX90A: ; %bb.0: 153; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 154; GFX90A-NEXT: ;;#ASMSTART 155; GFX90A-NEXT: ; def v[0:3] 156; GFX90A-NEXT: ;;#ASMEND 157; GFX90A-NEXT: v_mov_b32_e32 v4, 0 158; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 159; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 160; GFX90A-NEXT: s_waitcnt vmcnt(0) 161; GFX90A-NEXT: s_setpc_b64 s[30:31] 162; 163; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_u: 164; GFX940: ; %bb.0: 165; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX940-NEXT: ;;#ASMSTART 167; GFX940-NEXT: ; def v[0:3] 168; GFX940-NEXT: ;;#ASMEND 169; GFX940-NEXT: v_mov_b32_e32 v4, 0 170; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 171; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 172; GFX940-NEXT: s_waitcnt vmcnt(0) 173; GFX940-NEXT: s_setpc_b64 s[30:31] 174 %vec0 = call <8 x i16> asm "; def $0", "=v"() 175 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 poison> 176 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 177 ret void 178} 179 180define void @v_shuffle_v2i16_v8i16__4_u(ptr addrspace(1) inreg %ptr) { 181; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_u: 182; GFX900: ; %bb.0: 183; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 184; GFX900-NEXT: v_mov_b32_e32 v4, 0 185; GFX900-NEXT: ;;#ASMSTART 186; GFX900-NEXT: ; def v[0:3] 187; GFX900-NEXT: ;;#ASMEND 188; GFX900-NEXT: global_store_dword v4, v2, s[16:17] 189; GFX900-NEXT: s_waitcnt vmcnt(0) 190; GFX900-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_u: 193; GFX90A: ; %bb.0: 194; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX90A-NEXT: v_mov_b32_e32 v4, 0 196; GFX90A-NEXT: ;;#ASMSTART 197; GFX90A-NEXT: ; def v[0:3] 198; GFX90A-NEXT: ;;#ASMEND 199; GFX90A-NEXT: global_store_dword v4, v2, s[16:17] 200; GFX90A-NEXT: s_waitcnt vmcnt(0) 201; GFX90A-NEXT: s_setpc_b64 s[30:31] 202; 203; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_u: 204; GFX940: ; %bb.0: 205; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX940-NEXT: v_mov_b32_e32 v4, 0 207; GFX940-NEXT: ;;#ASMSTART 208; GFX940-NEXT: ; def v[0:3] 209; GFX940-NEXT: ;;#ASMEND 210; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 211; GFX940-NEXT: s_waitcnt vmcnt(0) 212; GFX940-NEXT: s_setpc_b64 s[30:31] 213 %vec0 = call <8 x i16> asm "; def $0", "=v"() 214 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 poison> 215 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 216 ret void 217} 218 219define void @v_shuffle_v2i16_v8i16__5_u(ptr addrspace(1) inreg %ptr) { 220; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_u: 221; GFX900: ; %bb.0: 222; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 223; GFX900-NEXT: ;;#ASMSTART 224; GFX900-NEXT: ; def v[0:3] 225; GFX900-NEXT: ;;#ASMEND 226; GFX900-NEXT: v_mov_b32_e32 v4, 0 227; GFX900-NEXT: v_alignbit_b32 v0, s4, v2, 16 228; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 229; GFX900-NEXT: s_waitcnt vmcnt(0) 230; GFX900-NEXT: s_setpc_b64 s[30:31] 231; 232; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_u: 233; GFX90A: ; %bb.0: 234; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 235; GFX90A-NEXT: ;;#ASMSTART 236; GFX90A-NEXT: ; def v[0:3] 237; GFX90A-NEXT: ;;#ASMEND 238; GFX90A-NEXT: v_mov_b32_e32 v4, 0 239; GFX90A-NEXT: v_alignbit_b32 v0, s4, v2, 16 240; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 241; GFX90A-NEXT: s_waitcnt vmcnt(0) 242; GFX90A-NEXT: s_setpc_b64 s[30:31] 243; 244; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_u: 245; GFX940: ; %bb.0: 246; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 247; GFX940-NEXT: ;;#ASMSTART 248; GFX940-NEXT: ; def v[0:3] 249; GFX940-NEXT: ;;#ASMEND 250; GFX940-NEXT: v_mov_b32_e32 v4, 0 251; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 252; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 253; GFX940-NEXT: s_waitcnt vmcnt(0) 254; GFX940-NEXT: s_setpc_b64 s[30:31] 255 %vec0 = call <8 x i16> asm "; def $0", "=v"() 256 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 poison> 257 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 258 ret void 259} 260 261define void @v_shuffle_v2i16_v8i16__6_u(ptr addrspace(1) inreg %ptr) { 262; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_u: 263; GFX900: ; %bb.0: 264; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 265; GFX900-NEXT: v_mov_b32_e32 v4, 0 266; GFX900-NEXT: ;;#ASMSTART 267; GFX900-NEXT: ; def v[0:3] 268; GFX900-NEXT: ;;#ASMEND 269; GFX900-NEXT: global_store_dword v4, v3, s[16:17] 270; GFX900-NEXT: s_waitcnt vmcnt(0) 271; GFX900-NEXT: s_setpc_b64 s[30:31] 272; 273; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_u: 274; GFX90A: ; %bb.0: 275; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 276; GFX90A-NEXT: v_mov_b32_e32 v4, 0 277; GFX90A-NEXT: ;;#ASMSTART 278; GFX90A-NEXT: ; def v[0:3] 279; GFX90A-NEXT: ;;#ASMEND 280; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] 281; GFX90A-NEXT: s_waitcnt vmcnt(0) 282; GFX90A-NEXT: s_setpc_b64 s[30:31] 283; 284; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_u: 285; GFX940: ; %bb.0: 286; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 287; GFX940-NEXT: v_mov_b32_e32 v4, 0 288; GFX940-NEXT: ;;#ASMSTART 289; GFX940-NEXT: ; def v[0:3] 290; GFX940-NEXT: ;;#ASMEND 291; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 292; GFX940-NEXT: s_waitcnt vmcnt(0) 293; GFX940-NEXT: s_setpc_b64 s[30:31] 294 %vec0 = call <8 x i16> asm "; def $0", "=v"() 295 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 poison> 296 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 297 ret void 298} 299 300define void @v_shuffle_v2i16_v8i16__7_u(ptr addrspace(1) inreg %ptr) { 301; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_u: 302; GFX900: ; %bb.0: 303; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 304; GFX900-NEXT: ;;#ASMSTART 305; GFX900-NEXT: ; def v[0:3] 306; GFX900-NEXT: ;;#ASMEND 307; GFX900-NEXT: v_mov_b32_e32 v4, 0 308; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16 309; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 310; GFX900-NEXT: s_waitcnt vmcnt(0) 311; GFX900-NEXT: s_setpc_b64 s[30:31] 312; 313; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_u: 314; GFX90A: ; %bb.0: 315; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316; GFX90A-NEXT: ;;#ASMSTART 317; GFX90A-NEXT: ; def v[0:3] 318; GFX90A-NEXT: ;;#ASMEND 319; GFX90A-NEXT: v_mov_b32_e32 v4, 0 320; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16 321; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 322; GFX90A-NEXT: s_waitcnt vmcnt(0) 323; GFX90A-NEXT: s_setpc_b64 s[30:31] 324; 325; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_u: 326; GFX940: ; %bb.0: 327; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX940-NEXT: ;;#ASMSTART 329; GFX940-NEXT: ; def v[0:3] 330; GFX940-NEXT: ;;#ASMEND 331; GFX940-NEXT: v_mov_b32_e32 v4, 0 332; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 333; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 334; GFX940-NEXT: s_waitcnt vmcnt(0) 335; GFX940-NEXT: s_setpc_b64 s[30:31] 336 %vec0 = call <8 x i16> asm "; def $0", "=v"() 337 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 poison> 338 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 339 ret void 340} 341 342define void @v_shuffle_v2i16_v8i16__8_u(ptr addrspace(1) inreg %ptr) { 343; GFX9-LABEL: v_shuffle_v2i16_v8i16__8_u: 344; GFX9: ; %bb.0: 345; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 346; GFX9-NEXT: s_setpc_b64 s[30:31] 347 %vec0 = call <8 x i16> asm "; def $0", "=v"() 348 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 poison> 349 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 350 ret void 351} 352 353define void @v_shuffle_v2i16_v8i16__9_u(ptr addrspace(1) inreg %ptr) { 354; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_u: 355; GFX900: ; %bb.0: 356; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 357; GFX900-NEXT: ;;#ASMSTART 358; GFX900-NEXT: ; def v[0:3] 359; GFX900-NEXT: ;;#ASMEND 360; GFX900-NEXT: v_mov_b32_e32 v4, 0 361; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 362; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 363; GFX900-NEXT: s_waitcnt vmcnt(0) 364; GFX900-NEXT: s_setpc_b64 s[30:31] 365; 366; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_u: 367; GFX90A: ; %bb.0: 368; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 369; GFX90A-NEXT: ;;#ASMSTART 370; GFX90A-NEXT: ; def v[0:3] 371; GFX90A-NEXT: ;;#ASMEND 372; GFX90A-NEXT: v_mov_b32_e32 v4, 0 373; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 374; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 375; GFX90A-NEXT: s_waitcnt vmcnt(0) 376; GFX90A-NEXT: s_setpc_b64 s[30:31] 377; 378; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_u: 379; GFX940: ; %bb.0: 380; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 381; GFX940-NEXT: ;;#ASMSTART 382; GFX940-NEXT: ; def v[0:3] 383; GFX940-NEXT: ;;#ASMEND 384; GFX940-NEXT: v_mov_b32_e32 v4, 0 385; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 386; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 387; GFX940-NEXT: s_waitcnt vmcnt(0) 388; GFX940-NEXT: s_setpc_b64 s[30:31] 389 %vec0 = call <8 x i16> asm "; def $0", "=v"() 390 %vec1 = call <8 x i16> asm "; def $0", "=v"() 391 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 poison> 392 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 393 ret void 394} 395 396define void @v_shuffle_v2i16_v8i16__10_u(ptr addrspace(1) inreg %ptr) { 397; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_u: 398; GFX900: ; %bb.0: 399; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 400; GFX900-NEXT: v_mov_b32_e32 v4, 0 401; GFX900-NEXT: ;;#ASMSTART 402; GFX900-NEXT: ; def v[0:3] 403; GFX900-NEXT: ;;#ASMEND 404; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 405; GFX900-NEXT: s_waitcnt vmcnt(0) 406; GFX900-NEXT: s_setpc_b64 s[30:31] 407; 408; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_u: 409; GFX90A: ; %bb.0: 410; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 411; GFX90A-NEXT: v_mov_b32_e32 v4, 0 412; GFX90A-NEXT: ;;#ASMSTART 413; GFX90A-NEXT: ; def v[0:3] 414; GFX90A-NEXT: ;;#ASMEND 415; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 416; GFX90A-NEXT: s_waitcnt vmcnt(0) 417; GFX90A-NEXT: s_setpc_b64 s[30:31] 418; 419; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_u: 420; GFX940: ; %bb.0: 421; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 422; GFX940-NEXT: v_mov_b32_e32 v4, 0 423; GFX940-NEXT: ;;#ASMSTART 424; GFX940-NEXT: ; def v[0:3] 425; GFX940-NEXT: ;;#ASMEND 426; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 427; GFX940-NEXT: s_waitcnt vmcnt(0) 428; GFX940-NEXT: s_setpc_b64 s[30:31] 429 %vec0 = call <8 x i16> asm "; def $0", "=v"() 430 %vec1 = call <8 x i16> asm "; def $0", "=v"() 431 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 poison> 432 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 433 ret void 434} 435 436define void @v_shuffle_v2i16_v8i16__11_u(ptr addrspace(1) inreg %ptr) { 437; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_u: 438; GFX900: ; %bb.0: 439; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 440; GFX900-NEXT: ;;#ASMSTART 441; GFX900-NEXT: ; def v[0:3] 442; GFX900-NEXT: ;;#ASMEND 443; GFX900-NEXT: v_mov_b32_e32 v4, 0 444; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 445; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 446; GFX900-NEXT: s_waitcnt vmcnt(0) 447; GFX900-NEXT: s_setpc_b64 s[30:31] 448; 449; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_u: 450; GFX90A: ; %bb.0: 451; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 452; GFX90A-NEXT: ;;#ASMSTART 453; GFX90A-NEXT: ; def v[0:3] 454; GFX90A-NEXT: ;;#ASMEND 455; GFX90A-NEXT: v_mov_b32_e32 v4, 0 456; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 457; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 458; GFX90A-NEXT: s_waitcnt vmcnt(0) 459; GFX90A-NEXT: s_setpc_b64 s[30:31] 460; 461; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_u: 462; GFX940: ; %bb.0: 463; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 464; GFX940-NEXT: ;;#ASMSTART 465; GFX940-NEXT: ; def v[0:3] 466; GFX940-NEXT: ;;#ASMEND 467; GFX940-NEXT: v_mov_b32_e32 v4, 0 468; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 469; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 470; GFX940-NEXT: s_waitcnt vmcnt(0) 471; GFX940-NEXT: s_setpc_b64 s[30:31] 472 %vec0 = call <8 x i16> asm "; def $0", "=v"() 473 %vec1 = call <8 x i16> asm "; def $0", "=v"() 474 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 poison> 475 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 476 ret void 477} 478 479define void @v_shuffle_v2i16_v8i16__12_u(ptr addrspace(1) inreg %ptr) { 480; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_u: 481; GFX900: ; %bb.0: 482; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 483; GFX900-NEXT: v_mov_b32_e32 v4, 0 484; GFX900-NEXT: ;;#ASMSTART 485; GFX900-NEXT: ; def v[0:3] 486; GFX900-NEXT: ;;#ASMEND 487; GFX900-NEXT: global_store_dword v4, v2, s[16:17] 488; GFX900-NEXT: s_waitcnt vmcnt(0) 489; GFX900-NEXT: s_setpc_b64 s[30:31] 490; 491; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_u: 492; GFX90A: ; %bb.0: 493; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 494; GFX90A-NEXT: v_mov_b32_e32 v4, 0 495; GFX90A-NEXT: ;;#ASMSTART 496; GFX90A-NEXT: ; def v[0:3] 497; GFX90A-NEXT: ;;#ASMEND 498; GFX90A-NEXT: global_store_dword v4, v2, s[16:17] 499; GFX90A-NEXT: s_waitcnt vmcnt(0) 500; GFX90A-NEXT: s_setpc_b64 s[30:31] 501; 502; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_u: 503; GFX940: ; %bb.0: 504; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 505; GFX940-NEXT: v_mov_b32_e32 v4, 0 506; GFX940-NEXT: ;;#ASMSTART 507; GFX940-NEXT: ; def v[0:3] 508; GFX940-NEXT: ;;#ASMEND 509; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 510; GFX940-NEXT: s_waitcnt vmcnt(0) 511; GFX940-NEXT: s_setpc_b64 s[30:31] 512 %vec0 = call <8 x i16> asm "; def $0", "=v"() 513 %vec1 = call <8 x i16> asm "; def $0", "=v"() 514 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 poison> 515 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 516 ret void 517} 518 519define void @v_shuffle_v2i16_v8i16__13_u(ptr addrspace(1) inreg %ptr) { 520; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_u: 521; GFX900: ; %bb.0: 522; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 523; GFX900-NEXT: ;;#ASMSTART 524; GFX900-NEXT: ; def v[0:3] 525; GFX900-NEXT: ;;#ASMEND 526; GFX900-NEXT: v_mov_b32_e32 v4, 0 527; GFX900-NEXT: v_alignbit_b32 v0, s4, v2, 16 528; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 529; GFX900-NEXT: s_waitcnt vmcnt(0) 530; GFX900-NEXT: s_setpc_b64 s[30:31] 531; 532; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_u: 533; GFX90A: ; %bb.0: 534; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 535; GFX90A-NEXT: ;;#ASMSTART 536; GFX90A-NEXT: ; def v[0:3] 537; GFX90A-NEXT: ;;#ASMEND 538; GFX90A-NEXT: v_mov_b32_e32 v4, 0 539; GFX90A-NEXT: v_alignbit_b32 v0, s4, v2, 16 540; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 541; GFX90A-NEXT: s_waitcnt vmcnt(0) 542; GFX90A-NEXT: s_setpc_b64 s[30:31] 543; 544; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_u: 545; GFX940: ; %bb.0: 546; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 547; GFX940-NEXT: ;;#ASMSTART 548; GFX940-NEXT: ; def v[0:3] 549; GFX940-NEXT: ;;#ASMEND 550; GFX940-NEXT: v_mov_b32_e32 v4, 0 551; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 552; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 553; GFX940-NEXT: s_waitcnt vmcnt(0) 554; GFX940-NEXT: s_setpc_b64 s[30:31] 555 %vec0 = call <8 x i16> asm "; def $0", "=v"() 556 %vec1 = call <8 x i16> asm "; def $0", "=v"() 557 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 poison> 558 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 559 ret void 560} 561 562define void @v_shuffle_v2i16_v8i16__14_u(ptr addrspace(1) inreg %ptr) { 563; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_u: 564; GFX900: ; %bb.0: 565; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 566; GFX900-NEXT: v_mov_b32_e32 v4, 0 567; GFX900-NEXT: ;;#ASMSTART 568; GFX900-NEXT: ; def v[0:3] 569; GFX900-NEXT: ;;#ASMEND 570; GFX900-NEXT: global_store_dword v4, v3, s[16:17] 571; GFX900-NEXT: s_waitcnt vmcnt(0) 572; GFX900-NEXT: s_setpc_b64 s[30:31] 573; 574; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_u: 575; GFX90A: ; %bb.0: 576; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 577; GFX90A-NEXT: v_mov_b32_e32 v4, 0 578; GFX90A-NEXT: ;;#ASMSTART 579; GFX90A-NEXT: ; def v[0:3] 580; GFX90A-NEXT: ;;#ASMEND 581; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] 582; GFX90A-NEXT: s_waitcnt vmcnt(0) 583; GFX90A-NEXT: s_setpc_b64 s[30:31] 584; 585; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_u: 586; GFX940: ; %bb.0: 587; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 588; GFX940-NEXT: v_mov_b32_e32 v4, 0 589; GFX940-NEXT: ;;#ASMSTART 590; GFX940-NEXT: ; def v[0:3] 591; GFX940-NEXT: ;;#ASMEND 592; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 593; GFX940-NEXT: s_waitcnt vmcnt(0) 594; GFX940-NEXT: s_setpc_b64 s[30:31] 595 %vec0 = call <8 x i16> asm "; def $0", "=v"() 596 %vec1 = call <8 x i16> asm "; def $0", "=v"() 597 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 poison> 598 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 599 ret void 600} 601 602define void @v_shuffle_v2i16_v8i16__15_u(ptr addrspace(1) inreg %ptr) { 603; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_u: 604; GFX900: ; %bb.0: 605; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 606; GFX900-NEXT: ;;#ASMSTART 607; GFX900-NEXT: ; def v[0:3] 608; GFX900-NEXT: ;;#ASMEND 609; GFX900-NEXT: v_mov_b32_e32 v4, 0 610; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16 611; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 612; GFX900-NEXT: s_waitcnt vmcnt(0) 613; GFX900-NEXT: s_setpc_b64 s[30:31] 614; 615; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_u: 616; GFX90A: ; %bb.0: 617; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 618; GFX90A-NEXT: ;;#ASMSTART 619; GFX90A-NEXT: ; def v[0:3] 620; GFX90A-NEXT: ;;#ASMEND 621; GFX90A-NEXT: v_mov_b32_e32 v4, 0 622; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16 623; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 624; GFX90A-NEXT: s_waitcnt vmcnt(0) 625; GFX90A-NEXT: s_setpc_b64 s[30:31] 626; 627; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_u: 628; GFX940: ; %bb.0: 629; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 630; GFX940-NEXT: ;;#ASMSTART 631; GFX940-NEXT: ; def v[0:3] 632; GFX940-NEXT: ;;#ASMEND 633; GFX940-NEXT: v_mov_b32_e32 v4, 0 634; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 635; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 636; GFX940-NEXT: s_waitcnt vmcnt(0) 637; GFX940-NEXT: s_setpc_b64 s[30:31] 638 %vec0 = call <8 x i16> asm "; def $0", "=v"() 639 %vec1 = call <8 x i16> asm "; def $0", "=v"() 640 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 poison> 641 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 642 ret void 643} 644 645define void @v_shuffle_v2i16_v8i16__15_0(ptr addrspace(1) inreg %ptr) { 646; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_0: 647; GFX900: ; %bb.0: 648; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 649; GFX900-NEXT: ;;#ASMSTART 650; GFX900-NEXT: ; def v[0:3] 651; GFX900-NEXT: ;;#ASMEND 652; GFX900-NEXT: v_mov_b32_e32 v5, 0 653; GFX900-NEXT: ;;#ASMSTART 654; GFX900-NEXT: ; def v[1:4] 655; GFX900-NEXT: ;;#ASMEND 656; GFX900-NEXT: v_alignbit_b32 v0, v0, v4, 16 657; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 658; GFX900-NEXT: s_waitcnt vmcnt(0) 659; GFX900-NEXT: s_setpc_b64 s[30:31] 660; 661; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_0: 662; GFX90A: ; %bb.0: 663; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 664; GFX90A-NEXT: ;;#ASMSTART 665; GFX90A-NEXT: ; def v[0:3] 666; GFX90A-NEXT: ;;#ASMEND 667; GFX90A-NEXT: v_mov_b32_e32 v6, 0 668; GFX90A-NEXT: ;;#ASMSTART 669; GFX90A-NEXT: ; def v[2:5] 670; GFX90A-NEXT: ;;#ASMEND 671; GFX90A-NEXT: v_alignbit_b32 v0, v0, v5, 16 672; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 673; GFX90A-NEXT: s_waitcnt vmcnt(0) 674; GFX90A-NEXT: s_setpc_b64 s[30:31] 675; 676; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_0: 677; GFX940: ; %bb.0: 678; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 679; GFX940-NEXT: ;;#ASMSTART 680; GFX940-NEXT: ; def v[0:3] 681; GFX940-NEXT: ;;#ASMEND 682; GFX940-NEXT: v_mov_b32_e32 v6, 0 683; GFX940-NEXT: ;;#ASMSTART 684; GFX940-NEXT: ; def v[2:5] 685; GFX940-NEXT: ;;#ASMEND 686; GFX940-NEXT: s_nop 0 687; GFX940-NEXT: v_alignbit_b32 v0, v0, v5, 16 688; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 689; GFX940-NEXT: s_waitcnt vmcnt(0) 690; GFX940-NEXT: s_setpc_b64 s[30:31] 691 %vec0 = call <8 x i16> asm "; def $0", "=v"() 692 %vec1 = call <8 x i16> asm "; def $0", "=v"() 693 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 0> 694 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 695 ret void 696} 697 698define void @v_shuffle_v2i16_v8i16__15_1(ptr addrspace(1) inreg %ptr) { 699; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_1: 700; GFX900: ; %bb.0: 701; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 702; GFX900-NEXT: ;;#ASMSTART 703; GFX900-NEXT: ; def v[0:3] 704; GFX900-NEXT: ;;#ASMEND 705; GFX900-NEXT: s_mov_b32 s4, 0x7060302 706; GFX900-NEXT: v_mov_b32_e32 v5, 0 707; GFX900-NEXT: ;;#ASMSTART 708; GFX900-NEXT: ; def v[1:4] 709; GFX900-NEXT: ;;#ASMEND 710; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 711; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 712; GFX900-NEXT: s_waitcnt vmcnt(0) 713; GFX900-NEXT: s_setpc_b64 s[30:31] 714; 715; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_1: 716; GFX90A: ; %bb.0: 717; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 718; GFX90A-NEXT: ;;#ASMSTART 719; GFX90A-NEXT: ; def v[0:3] 720; GFX90A-NEXT: ;;#ASMEND 721; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 722; GFX90A-NEXT: v_mov_b32_e32 v6, 0 723; GFX90A-NEXT: ;;#ASMSTART 724; GFX90A-NEXT: ; def v[2:5] 725; GFX90A-NEXT: ;;#ASMEND 726; GFX90A-NEXT: v_perm_b32 v0, v0, v5, s4 727; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 728; GFX90A-NEXT: s_waitcnt vmcnt(0) 729; GFX90A-NEXT: s_setpc_b64 s[30:31] 730; 731; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_1: 732; GFX940: ; %bb.0: 733; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 734; GFX940-NEXT: ;;#ASMSTART 735; GFX940-NEXT: ; def v[0:3] 736; GFX940-NEXT: ;;#ASMEND 737; GFX940-NEXT: s_mov_b32 s2, 0x7060302 738; GFX940-NEXT: v_mov_b32_e32 v6, 0 739; GFX940-NEXT: ;;#ASMSTART 740; GFX940-NEXT: ; def v[2:5] 741; GFX940-NEXT: ;;#ASMEND 742; GFX940-NEXT: s_nop 0 743; GFX940-NEXT: v_perm_b32 v0, v0, v5, s2 744; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 745; GFX940-NEXT: s_waitcnt vmcnt(0) 746; GFX940-NEXT: s_setpc_b64 s[30:31] 747 %vec0 = call <8 x i16> asm "; def $0", "=v"() 748 %vec1 = call <8 x i16> asm "; def $0", "=v"() 749 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 1> 750 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 751 ret void 752} 753 754define void @v_shuffle_v2i16_v8i16__15_2(ptr addrspace(1) inreg %ptr) { 755; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_2: 756; GFX900: ; %bb.0: 757; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 758; GFX900-NEXT: ;;#ASMSTART 759; GFX900-NEXT: ; def v[0:3] 760; GFX900-NEXT: ;;#ASMEND 761; GFX900-NEXT: v_mov_b32_e32 v6, 0 762; GFX900-NEXT: ;;#ASMSTART 763; GFX900-NEXT: ; def v[2:5] 764; GFX900-NEXT: ;;#ASMEND 765; GFX900-NEXT: v_alignbit_b32 v0, v1, v5, 16 766; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 767; GFX900-NEXT: s_waitcnt vmcnt(0) 768; GFX900-NEXT: s_setpc_b64 s[30:31] 769; 770; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_2: 771; GFX90A: ; %bb.0: 772; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 773; GFX90A-NEXT: ;;#ASMSTART 774; GFX90A-NEXT: ; def v[0:3] 775; GFX90A-NEXT: ;;#ASMEND 776; GFX90A-NEXT: v_mov_b32_e32 v6, 0 777; GFX90A-NEXT: ;;#ASMSTART 778; GFX90A-NEXT: ; def v[2:5] 779; GFX90A-NEXT: ;;#ASMEND 780; GFX90A-NEXT: v_alignbit_b32 v0, v1, v5, 16 781; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 782; GFX90A-NEXT: s_waitcnt vmcnt(0) 783; GFX90A-NEXT: s_setpc_b64 s[30:31] 784; 785; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_2: 786; GFX940: ; %bb.0: 787; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 788; GFX940-NEXT: ;;#ASMSTART 789; GFX940-NEXT: ; def v[0:3] 790; GFX940-NEXT: ;;#ASMEND 791; GFX940-NEXT: v_mov_b32_e32 v6, 0 792; GFX940-NEXT: ;;#ASMSTART 793; GFX940-NEXT: ; def v[2:5] 794; GFX940-NEXT: ;;#ASMEND 795; GFX940-NEXT: s_nop 0 796; GFX940-NEXT: v_alignbit_b32 v0, v1, v5, 16 797; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 798; GFX940-NEXT: s_waitcnt vmcnt(0) 799; GFX940-NEXT: s_setpc_b64 s[30:31] 800 %vec0 = call <8 x i16> asm "; def $0", "=v"() 801 %vec1 = call <8 x i16> asm "; def $0", "=v"() 802 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 2> 803 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 804 ret void 805} 806 807define void @v_shuffle_v2i16_v8i16__15_3(ptr addrspace(1) inreg %ptr) { 808; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_3: 809; GFX900: ; %bb.0: 810; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 811; GFX900-NEXT: ;;#ASMSTART 812; GFX900-NEXT: ; def v[0:3] 813; GFX900-NEXT: ;;#ASMEND 814; GFX900-NEXT: s_mov_b32 s4, 0x7060302 815; GFX900-NEXT: v_mov_b32_e32 v6, 0 816; GFX900-NEXT: ;;#ASMSTART 817; GFX900-NEXT: ; def v[2:5] 818; GFX900-NEXT: ;;#ASMEND 819; GFX900-NEXT: v_perm_b32 v0, v1, v5, s4 820; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 821; GFX900-NEXT: s_waitcnt vmcnt(0) 822; GFX900-NEXT: s_setpc_b64 s[30:31] 823; 824; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_3: 825; GFX90A: ; %bb.0: 826; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 827; GFX90A-NEXT: ;;#ASMSTART 828; GFX90A-NEXT: ; def v[0:3] 829; GFX90A-NEXT: ;;#ASMEND 830; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 831; GFX90A-NEXT: v_mov_b32_e32 v6, 0 832; GFX90A-NEXT: ;;#ASMSTART 833; GFX90A-NEXT: ; def v[2:5] 834; GFX90A-NEXT: ;;#ASMEND 835; GFX90A-NEXT: v_perm_b32 v0, v1, v5, s4 836; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 837; GFX90A-NEXT: s_waitcnt vmcnt(0) 838; GFX90A-NEXT: s_setpc_b64 s[30:31] 839; 840; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_3: 841; GFX940: ; %bb.0: 842; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 843; GFX940-NEXT: ;;#ASMSTART 844; GFX940-NEXT: ; def v[0:3] 845; GFX940-NEXT: ;;#ASMEND 846; GFX940-NEXT: s_mov_b32 s2, 0x7060302 847; GFX940-NEXT: v_mov_b32_e32 v6, 0 848; GFX940-NEXT: ;;#ASMSTART 849; GFX940-NEXT: ; def v[2:5] 850; GFX940-NEXT: ;;#ASMEND 851; GFX940-NEXT: s_nop 0 852; GFX940-NEXT: v_perm_b32 v0, v1, v5, s2 853; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 854; GFX940-NEXT: s_waitcnt vmcnt(0) 855; GFX940-NEXT: s_setpc_b64 s[30:31] 856 %vec0 = call <8 x i16> asm "; def $0", "=v"() 857 %vec1 = call <8 x i16> asm "; def $0", "=v"() 858 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 3> 859 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 860 ret void 861} 862 863define void @v_shuffle_v2i16_v8i16__15_4(ptr addrspace(1) inreg %ptr) { 864; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_4: 865; GFX900: ; %bb.0: 866; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 867; GFX900-NEXT: ;;#ASMSTART 868; GFX900-NEXT: ; def v[0:3] 869; GFX900-NEXT: ;;#ASMEND 870; GFX900-NEXT: v_mov_b32_e32 v7, 0 871; GFX900-NEXT: ;;#ASMSTART 872; GFX900-NEXT: ; def v[3:6] 873; GFX900-NEXT: ;;#ASMEND 874; GFX900-NEXT: v_alignbit_b32 v0, v2, v6, 16 875; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 876; GFX900-NEXT: s_waitcnt vmcnt(0) 877; GFX900-NEXT: s_setpc_b64 s[30:31] 878; 879; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_4: 880; GFX90A: ; %bb.0: 881; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 882; GFX90A-NEXT: ;;#ASMSTART 883; GFX90A-NEXT: ; def v[0:3] 884; GFX90A-NEXT: ;;#ASMEND 885; GFX90A-NEXT: v_mov_b32_e32 v8, 0 886; GFX90A-NEXT: ;;#ASMSTART 887; GFX90A-NEXT: ; def v[4:7] 888; GFX90A-NEXT: ;;#ASMEND 889; GFX90A-NEXT: v_alignbit_b32 v0, v2, v7, 16 890; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 891; GFX90A-NEXT: s_waitcnt vmcnt(0) 892; GFX90A-NEXT: s_setpc_b64 s[30:31] 893; 894; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_4: 895; GFX940: ; %bb.0: 896; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 897; GFX940-NEXT: ;;#ASMSTART 898; GFX940-NEXT: ; def v[0:3] 899; GFX940-NEXT: ;;#ASMEND 900; GFX940-NEXT: v_mov_b32_e32 v8, 0 901; GFX940-NEXT: ;;#ASMSTART 902; GFX940-NEXT: ; def v[4:7] 903; GFX940-NEXT: ;;#ASMEND 904; GFX940-NEXT: s_nop 0 905; GFX940-NEXT: v_alignbit_b32 v0, v2, v7, 16 906; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 907; GFX940-NEXT: s_waitcnt vmcnt(0) 908; GFX940-NEXT: s_setpc_b64 s[30:31] 909 %vec0 = call <8 x i16> asm "; def $0", "=v"() 910 %vec1 = call <8 x i16> asm "; def $0", "=v"() 911 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 4> 912 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 913 ret void 914} 915 916define void @v_shuffle_v2i16_v8i16__15_5(ptr addrspace(1) inreg %ptr) { 917; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_5: 918; GFX900: ; %bb.0: 919; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 920; GFX900-NEXT: ;;#ASMSTART 921; GFX900-NEXT: ; def v[0:3] 922; GFX900-NEXT: ;;#ASMEND 923; GFX900-NEXT: s_mov_b32 s4, 0x7060302 924; GFX900-NEXT: v_mov_b32_e32 v7, 0 925; GFX900-NEXT: ;;#ASMSTART 926; GFX900-NEXT: ; def v[3:6] 927; GFX900-NEXT: ;;#ASMEND 928; GFX900-NEXT: v_perm_b32 v0, v2, v6, s4 929; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 930; GFX900-NEXT: s_waitcnt vmcnt(0) 931; GFX900-NEXT: s_setpc_b64 s[30:31] 932; 933; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_5: 934; GFX90A: ; %bb.0: 935; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 936; GFX90A-NEXT: ;;#ASMSTART 937; GFX90A-NEXT: ; def v[0:3] 938; GFX90A-NEXT: ;;#ASMEND 939; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 940; GFX90A-NEXT: v_mov_b32_e32 v8, 0 941; GFX90A-NEXT: ;;#ASMSTART 942; GFX90A-NEXT: ; def v[4:7] 943; GFX90A-NEXT: ;;#ASMEND 944; GFX90A-NEXT: v_perm_b32 v0, v2, v7, s4 945; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 946; GFX90A-NEXT: s_waitcnt vmcnt(0) 947; GFX90A-NEXT: s_setpc_b64 s[30:31] 948; 949; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_5: 950; GFX940: ; %bb.0: 951; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 952; GFX940-NEXT: ;;#ASMSTART 953; GFX940-NEXT: ; def v[0:3] 954; GFX940-NEXT: ;;#ASMEND 955; GFX940-NEXT: s_mov_b32 s2, 0x7060302 956; GFX940-NEXT: v_mov_b32_e32 v8, 0 957; GFX940-NEXT: ;;#ASMSTART 958; GFX940-NEXT: ; def v[4:7] 959; GFX940-NEXT: ;;#ASMEND 960; GFX940-NEXT: s_nop 0 961; GFX940-NEXT: v_perm_b32 v0, v2, v7, s2 962; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 963; GFX940-NEXT: s_waitcnt vmcnt(0) 964; GFX940-NEXT: s_setpc_b64 s[30:31] 965 %vec0 = call <8 x i16> asm "; def $0", "=v"() 966 %vec1 = call <8 x i16> asm "; def $0", "=v"() 967 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 5> 968 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 969 ret void 970} 971 972define void @v_shuffle_v2i16_v8i16__15_6(ptr addrspace(1) inreg %ptr) { 973; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_6: 974; GFX900: ; %bb.0: 975; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 976; GFX900-NEXT: ;;#ASMSTART 977; GFX900-NEXT: ; def v[0:3] 978; GFX900-NEXT: ;;#ASMEND 979; GFX900-NEXT: v_mov_b32_e32 v8, 0 980; GFX900-NEXT: ;;#ASMSTART 981; GFX900-NEXT: ; def v[4:7] 982; GFX900-NEXT: ;;#ASMEND 983; GFX900-NEXT: v_alignbit_b32 v0, v3, v7, 16 984; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 985; GFX900-NEXT: s_waitcnt vmcnt(0) 986; GFX900-NEXT: s_setpc_b64 s[30:31] 987; 988; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_6: 989; GFX90A: ; %bb.0: 990; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 991; GFX90A-NEXT: ;;#ASMSTART 992; GFX90A-NEXT: ; def v[0:3] 993; GFX90A-NEXT: ;;#ASMEND 994; GFX90A-NEXT: v_mov_b32_e32 v8, 0 995; GFX90A-NEXT: ;;#ASMSTART 996; GFX90A-NEXT: ; def v[4:7] 997; GFX90A-NEXT: ;;#ASMEND 998; GFX90A-NEXT: v_alignbit_b32 v0, v3, v7, 16 999; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 1000; GFX90A-NEXT: s_waitcnt vmcnt(0) 1001; GFX90A-NEXT: s_setpc_b64 s[30:31] 1002; 1003; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_6: 1004; GFX940: ; %bb.0: 1005; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1006; GFX940-NEXT: ;;#ASMSTART 1007; GFX940-NEXT: ; def v[0:3] 1008; GFX940-NEXT: ;;#ASMEND 1009; GFX940-NEXT: v_mov_b32_e32 v8, 0 1010; GFX940-NEXT: ;;#ASMSTART 1011; GFX940-NEXT: ; def v[4:7] 1012; GFX940-NEXT: ;;#ASMEND 1013; GFX940-NEXT: s_nop 0 1014; GFX940-NEXT: v_alignbit_b32 v0, v3, v7, 16 1015; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 1016; GFX940-NEXT: s_waitcnt vmcnt(0) 1017; GFX940-NEXT: s_setpc_b64 s[30:31] 1018 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1019 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1020 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 6> 1021 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1022 ret void 1023} 1024 1025define void @v_shuffle_v2i16_v8i16__15_7(ptr addrspace(1) inreg %ptr) { 1026; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_7: 1027; GFX900: ; %bb.0: 1028; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1029; GFX900-NEXT: ;;#ASMSTART 1030; GFX900-NEXT: ; def v[0:3] 1031; GFX900-NEXT: ;;#ASMEND 1032; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1033; GFX900-NEXT: v_mov_b32_e32 v8, 0 1034; GFX900-NEXT: ;;#ASMSTART 1035; GFX900-NEXT: ; def v[4:7] 1036; GFX900-NEXT: ;;#ASMEND 1037; GFX900-NEXT: v_perm_b32 v0, v3, v7, s4 1038; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 1039; GFX900-NEXT: s_waitcnt vmcnt(0) 1040; GFX900-NEXT: s_setpc_b64 s[30:31] 1041; 1042; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_7: 1043; GFX90A: ; %bb.0: 1044; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1045; GFX90A-NEXT: ;;#ASMSTART 1046; GFX90A-NEXT: ; def v[0:3] 1047; GFX90A-NEXT: ;;#ASMEND 1048; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1049; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1050; GFX90A-NEXT: ;;#ASMSTART 1051; GFX90A-NEXT: ; def v[4:7] 1052; GFX90A-NEXT: ;;#ASMEND 1053; GFX90A-NEXT: v_perm_b32 v0, v3, v7, s4 1054; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 1055; GFX90A-NEXT: s_waitcnt vmcnt(0) 1056; GFX90A-NEXT: s_setpc_b64 s[30:31] 1057; 1058; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_7: 1059; GFX940: ; %bb.0: 1060; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1061; GFX940-NEXT: ;;#ASMSTART 1062; GFX940-NEXT: ; def v[0:3] 1063; GFX940-NEXT: ;;#ASMEND 1064; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1065; GFX940-NEXT: v_mov_b32_e32 v8, 0 1066; GFX940-NEXT: ;;#ASMSTART 1067; GFX940-NEXT: ; def v[4:7] 1068; GFX940-NEXT: ;;#ASMEND 1069; GFX940-NEXT: s_nop 0 1070; GFX940-NEXT: v_perm_b32 v0, v3, v7, s2 1071; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 1072; GFX940-NEXT: s_waitcnt vmcnt(0) 1073; GFX940-NEXT: s_setpc_b64 s[30:31] 1074 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1075 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1076 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 7> 1077 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1078 ret void 1079} 1080 1081define void @v_shuffle_v2i16_v8i16__15_8(ptr addrspace(1) inreg %ptr) { 1082; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_8: 1083; GFX900: ; %bb.0: 1084; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1085; GFX900-NEXT: ;;#ASMSTART 1086; GFX900-NEXT: ; def v[0:3] 1087; GFX900-NEXT: ;;#ASMEND 1088; GFX900-NEXT: v_mov_b32_e32 v4, 0 1089; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16 1090; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1091; GFX900-NEXT: s_waitcnt vmcnt(0) 1092; GFX900-NEXT: s_setpc_b64 s[30:31] 1093; 1094; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_8: 1095; GFX90A: ; %bb.0: 1096; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1097; GFX90A-NEXT: ;;#ASMSTART 1098; GFX90A-NEXT: ; def v[0:3] 1099; GFX90A-NEXT: ;;#ASMEND 1100; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1101; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 1102; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1103; GFX90A-NEXT: s_waitcnt vmcnt(0) 1104; GFX90A-NEXT: s_setpc_b64 s[30:31] 1105; 1106; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_8: 1107; GFX940: ; %bb.0: 1108; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1109; GFX940-NEXT: ;;#ASMSTART 1110; GFX940-NEXT: ; def v[0:3] 1111; GFX940-NEXT: ;;#ASMEND 1112; GFX940-NEXT: v_mov_b32_e32 v4, 0 1113; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 1114; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1115; GFX940-NEXT: s_waitcnt vmcnt(0) 1116; GFX940-NEXT: s_setpc_b64 s[30:31] 1117 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1118 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1119 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 8> 1120 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1121 ret void 1122} 1123 1124define void @v_shuffle_v2i16_v8i16__15_9(ptr addrspace(1) inreg %ptr) { 1125; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_9: 1126; GFX900: ; %bb.0: 1127; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1128; GFX900-NEXT: ;;#ASMSTART 1129; GFX900-NEXT: ; def v[0:3] 1130; GFX900-NEXT: ;;#ASMEND 1131; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1132; GFX900-NEXT: v_mov_b32_e32 v4, 0 1133; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 1134; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1135; GFX900-NEXT: s_waitcnt vmcnt(0) 1136; GFX900-NEXT: s_setpc_b64 s[30:31] 1137; 1138; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_9: 1139; GFX90A: ; %bb.0: 1140; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1141; GFX90A-NEXT: ;;#ASMSTART 1142; GFX90A-NEXT: ; def v[0:3] 1143; GFX90A-NEXT: ;;#ASMEND 1144; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1145; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1146; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 1147; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1148; GFX90A-NEXT: s_waitcnt vmcnt(0) 1149; GFX90A-NEXT: s_setpc_b64 s[30:31] 1150; 1151; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_9: 1152; GFX940: ; %bb.0: 1153; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1154; GFX940-NEXT: ;;#ASMSTART 1155; GFX940-NEXT: ; def v[0:3] 1156; GFX940-NEXT: ;;#ASMEND 1157; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1158; GFX940-NEXT: v_mov_b32_e32 v4, 0 1159; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 1160; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1161; GFX940-NEXT: s_waitcnt vmcnt(0) 1162; GFX940-NEXT: s_setpc_b64 s[30:31] 1163 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1164 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1165 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 9> 1166 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1167 ret void 1168} 1169 1170define void @v_shuffle_v2i16_v8i16__15_10(ptr addrspace(1) inreg %ptr) { 1171; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_10: 1172; GFX900: ; %bb.0: 1173; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1174; GFX900-NEXT: ;;#ASMSTART 1175; GFX900-NEXT: ; def v[0:3] 1176; GFX900-NEXT: ;;#ASMEND 1177; GFX900-NEXT: v_mov_b32_e32 v4, 0 1178; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 1179; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1180; GFX900-NEXT: s_waitcnt vmcnt(0) 1181; GFX900-NEXT: s_setpc_b64 s[30:31] 1182; 1183; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_10: 1184; GFX90A: ; %bb.0: 1185; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1186; GFX90A-NEXT: ;;#ASMSTART 1187; GFX90A-NEXT: ; def v[0:3] 1188; GFX90A-NEXT: ;;#ASMEND 1189; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1190; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 1191; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1192; GFX90A-NEXT: s_waitcnt vmcnt(0) 1193; GFX90A-NEXT: s_setpc_b64 s[30:31] 1194; 1195; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_10: 1196; GFX940: ; %bb.0: 1197; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1198; GFX940-NEXT: ;;#ASMSTART 1199; GFX940-NEXT: ; def v[0:3] 1200; GFX940-NEXT: ;;#ASMEND 1201; GFX940-NEXT: v_mov_b32_e32 v4, 0 1202; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 1203; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1204; GFX940-NEXT: s_waitcnt vmcnt(0) 1205; GFX940-NEXT: s_setpc_b64 s[30:31] 1206 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1207 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1208 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 10> 1209 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1210 ret void 1211} 1212 1213define void @v_shuffle_v2i16_v8i16__15_11(ptr addrspace(1) inreg %ptr) { 1214; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_11: 1215; GFX900: ; %bb.0: 1216; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1217; GFX900-NEXT: ;;#ASMSTART 1218; GFX900-NEXT: ; def v[0:3] 1219; GFX900-NEXT: ;;#ASMEND 1220; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1221; GFX900-NEXT: v_mov_b32_e32 v4, 0 1222; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 1223; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1224; GFX900-NEXT: s_waitcnt vmcnt(0) 1225; GFX900-NEXT: s_setpc_b64 s[30:31] 1226; 1227; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_11: 1228; GFX90A: ; %bb.0: 1229; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1230; GFX90A-NEXT: ;;#ASMSTART 1231; GFX90A-NEXT: ; def v[0:3] 1232; GFX90A-NEXT: ;;#ASMEND 1233; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1234; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1235; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 1236; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1237; GFX90A-NEXT: s_waitcnt vmcnt(0) 1238; GFX90A-NEXT: s_setpc_b64 s[30:31] 1239; 1240; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_11: 1241; GFX940: ; %bb.0: 1242; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1243; GFX940-NEXT: ;;#ASMSTART 1244; GFX940-NEXT: ; def v[0:3] 1245; GFX940-NEXT: ;;#ASMEND 1246; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1247; GFX940-NEXT: v_mov_b32_e32 v4, 0 1248; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 1249; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1250; GFX940-NEXT: s_waitcnt vmcnt(0) 1251; GFX940-NEXT: s_setpc_b64 s[30:31] 1252 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1253 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1254 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 11> 1255 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1256 ret void 1257} 1258 1259define void @v_shuffle_v2i16_v8i16__15_12(ptr addrspace(1) inreg %ptr) { 1260; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_12: 1261; GFX900: ; %bb.0: 1262; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1263; GFX900-NEXT: ;;#ASMSTART 1264; GFX900-NEXT: ; def v[0:3] 1265; GFX900-NEXT: ;;#ASMEND 1266; GFX900-NEXT: v_mov_b32_e32 v4, 0 1267; GFX900-NEXT: v_alignbit_b32 v0, v2, v3, 16 1268; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1269; GFX900-NEXT: s_waitcnt vmcnt(0) 1270; GFX900-NEXT: s_setpc_b64 s[30:31] 1271; 1272; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_12: 1273; GFX90A: ; %bb.0: 1274; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1275; GFX90A-NEXT: ;;#ASMSTART 1276; GFX90A-NEXT: ; def v[0:3] 1277; GFX90A-NEXT: ;;#ASMEND 1278; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1279; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16 1280; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1281; GFX90A-NEXT: s_waitcnt vmcnt(0) 1282; GFX90A-NEXT: s_setpc_b64 s[30:31] 1283; 1284; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_12: 1285; GFX940: ; %bb.0: 1286; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1287; GFX940-NEXT: ;;#ASMSTART 1288; GFX940-NEXT: ; def v[0:3] 1289; GFX940-NEXT: ;;#ASMEND 1290; GFX940-NEXT: v_mov_b32_e32 v4, 0 1291; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 1292; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1293; GFX940-NEXT: s_waitcnt vmcnt(0) 1294; GFX940-NEXT: s_setpc_b64 s[30:31] 1295 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1296 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1297 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 12> 1298 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1299 ret void 1300} 1301 1302define void @v_shuffle_v2i16_v8i16__15_13(ptr addrspace(1) inreg %ptr) { 1303; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_13: 1304; GFX900: ; %bb.0: 1305; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1306; GFX900-NEXT: ;;#ASMSTART 1307; GFX900-NEXT: ; def v[0:3] 1308; GFX900-NEXT: ;;#ASMEND 1309; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1310; GFX900-NEXT: v_mov_b32_e32 v4, 0 1311; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 1312; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1313; GFX900-NEXT: s_waitcnt vmcnt(0) 1314; GFX900-NEXT: s_setpc_b64 s[30:31] 1315; 1316; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_13: 1317; GFX90A: ; %bb.0: 1318; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1319; GFX90A-NEXT: ;;#ASMSTART 1320; GFX90A-NEXT: ; def v[0:3] 1321; GFX90A-NEXT: ;;#ASMEND 1322; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1323; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1324; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 1325; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1326; GFX90A-NEXT: s_waitcnt vmcnt(0) 1327; GFX90A-NEXT: s_setpc_b64 s[30:31] 1328; 1329; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_13: 1330; GFX940: ; %bb.0: 1331; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1332; GFX940-NEXT: ;;#ASMSTART 1333; GFX940-NEXT: ; def v[0:3] 1334; GFX940-NEXT: ;;#ASMEND 1335; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1336; GFX940-NEXT: v_mov_b32_e32 v4, 0 1337; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 1338; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1339; GFX940-NEXT: s_waitcnt vmcnt(0) 1340; GFX940-NEXT: s_setpc_b64 s[30:31] 1341 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1342 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1343 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 13> 1344 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1345 ret void 1346} 1347 1348define void @v_shuffle_v2i16_v8i16__15_14(ptr addrspace(1) inreg %ptr) { 1349; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_14: 1350; GFX900: ; %bb.0: 1351; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1352; GFX900-NEXT: ;;#ASMSTART 1353; GFX900-NEXT: ; def v[0:3] 1354; GFX900-NEXT: ;;#ASMEND 1355; GFX900-NEXT: v_mov_b32_e32 v4, 0 1356; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16 1357; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1358; GFX900-NEXT: s_waitcnt vmcnt(0) 1359; GFX900-NEXT: s_setpc_b64 s[30:31] 1360; 1361; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_14: 1362; GFX90A: ; %bb.0: 1363; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1364; GFX90A-NEXT: ;;#ASMSTART 1365; GFX90A-NEXT: ; def v[0:3] 1366; GFX90A-NEXT: ;;#ASMEND 1367; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1368; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16 1369; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1370; GFX90A-NEXT: s_waitcnt vmcnt(0) 1371; GFX90A-NEXT: s_setpc_b64 s[30:31] 1372; 1373; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_14: 1374; GFX940: ; %bb.0: 1375; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1376; GFX940-NEXT: ;;#ASMSTART 1377; GFX940-NEXT: ; def v[0:3] 1378; GFX940-NEXT: ;;#ASMEND 1379; GFX940-NEXT: v_mov_b32_e32 v4, 0 1380; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 1381; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1382; GFX940-NEXT: s_waitcnt vmcnt(0) 1383; GFX940-NEXT: s_setpc_b64 s[30:31] 1384 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1385 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1386 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 14> 1387 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1388 ret void 1389} 1390 1391define void @v_shuffle_v2i16_v8i16__15_15(ptr addrspace(1) inreg %ptr) { 1392; GFX900-LABEL: v_shuffle_v2i16_v8i16__15_15: 1393; GFX900: ; %bb.0: 1394; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1395; GFX900-NEXT: ;;#ASMSTART 1396; GFX900-NEXT: ; def v[0:3] 1397; GFX900-NEXT: ;;#ASMEND 1398; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1399; GFX900-NEXT: v_mov_b32_e32 v4, 0 1400; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 1401; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1402; GFX900-NEXT: s_waitcnt vmcnt(0) 1403; GFX900-NEXT: s_setpc_b64 s[30:31] 1404; 1405; GFX90A-LABEL: v_shuffle_v2i16_v8i16__15_15: 1406; GFX90A: ; %bb.0: 1407; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1408; GFX90A-NEXT: ;;#ASMSTART 1409; GFX90A-NEXT: ; def v[0:3] 1410; GFX90A-NEXT: ;;#ASMEND 1411; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1412; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1413; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 1414; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1415; GFX90A-NEXT: s_waitcnt vmcnt(0) 1416; GFX90A-NEXT: s_setpc_b64 s[30:31] 1417; 1418; GFX940-LABEL: v_shuffle_v2i16_v8i16__15_15: 1419; GFX940: ; %bb.0: 1420; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1421; GFX940-NEXT: ;;#ASMSTART 1422; GFX940-NEXT: ; def v[0:3] 1423; GFX940-NEXT: ;;#ASMEND 1424; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1425; GFX940-NEXT: v_mov_b32_e32 v4, 0 1426; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 1427; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1428; GFX940-NEXT: s_waitcnt vmcnt(0) 1429; GFX940-NEXT: s_setpc_b64 s[30:31] 1430 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1431 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1432 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 15> 1433 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1434 ret void 1435} 1436 1437define void @v_shuffle_v2i16_v8i16__u_0(ptr addrspace(1) inreg %ptr) { 1438; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_0: 1439; GFX900: ; %bb.0: 1440; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1441; GFX900-NEXT: ;;#ASMSTART 1442; GFX900-NEXT: ; def v[0:3] 1443; GFX900-NEXT: ;;#ASMEND 1444; GFX900-NEXT: v_mov_b32_e32 v4, 0 1445; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1446; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1447; GFX900-NEXT: s_waitcnt vmcnt(0) 1448; GFX900-NEXT: s_setpc_b64 s[30:31] 1449; 1450; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_0: 1451; GFX90A: ; %bb.0: 1452; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1453; GFX90A-NEXT: ;;#ASMSTART 1454; GFX90A-NEXT: ; def v[0:3] 1455; GFX90A-NEXT: ;;#ASMEND 1456; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1457; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1458; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1459; GFX90A-NEXT: s_waitcnt vmcnt(0) 1460; GFX90A-NEXT: s_setpc_b64 s[30:31] 1461; 1462; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_0: 1463; GFX940: ; %bb.0: 1464; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1465; GFX940-NEXT: ;;#ASMSTART 1466; GFX940-NEXT: ; def v[0:3] 1467; GFX940-NEXT: ;;#ASMEND 1468; GFX940-NEXT: v_mov_b32_e32 v4, 0 1469; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1470; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1471; GFX940-NEXT: s_waitcnt vmcnt(0) 1472; GFX940-NEXT: s_setpc_b64 s[30:31] 1473 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1474 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 0> 1475 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1476 ret void 1477} 1478 1479define void @v_shuffle_v2i16_v8i16__0_0(ptr addrspace(1) inreg %ptr) { 1480; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_0: 1481; GFX900: ; %bb.0: 1482; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1483; GFX900-NEXT: ;;#ASMSTART 1484; GFX900-NEXT: ; def v[0:3] 1485; GFX900-NEXT: ;;#ASMEND 1486; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1487; GFX900-NEXT: v_mov_b32_e32 v4, 0 1488; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 1489; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1490; GFX900-NEXT: s_waitcnt vmcnt(0) 1491; GFX900-NEXT: s_setpc_b64 s[30:31] 1492; 1493; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_0: 1494; GFX90A: ; %bb.0: 1495; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1496; GFX90A-NEXT: ;;#ASMSTART 1497; GFX90A-NEXT: ; def v[0:3] 1498; GFX90A-NEXT: ;;#ASMEND 1499; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1500; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1501; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 1502; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1503; GFX90A-NEXT: s_waitcnt vmcnt(0) 1504; GFX90A-NEXT: s_setpc_b64 s[30:31] 1505; 1506; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_0: 1507; GFX940: ; %bb.0: 1508; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1509; GFX940-NEXT: ;;#ASMSTART 1510; GFX940-NEXT: ; def v[0:3] 1511; GFX940-NEXT: ;;#ASMEND 1512; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1513; GFX940-NEXT: v_mov_b32_e32 v4, 0 1514; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 1515; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1516; GFX940-NEXT: s_waitcnt vmcnt(0) 1517; GFX940-NEXT: s_setpc_b64 s[30:31] 1518 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1519 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> zeroinitializer 1520 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1521 ret void 1522} 1523 1524define void @v_shuffle_v2i16_v8i16__1_0(ptr addrspace(1) inreg %ptr) { 1525; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_0: 1526; GFX900: ; %bb.0: 1527; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1528; GFX900-NEXT: ;;#ASMSTART 1529; GFX900-NEXT: ; def v[0:3] 1530; GFX900-NEXT: ;;#ASMEND 1531; GFX900-NEXT: v_mov_b32_e32 v4, 0 1532; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16 1533; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1534; GFX900-NEXT: s_waitcnt vmcnt(0) 1535; GFX900-NEXT: s_setpc_b64 s[30:31] 1536; 1537; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_0: 1538; GFX90A: ; %bb.0: 1539; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1540; GFX90A-NEXT: ;;#ASMSTART 1541; GFX90A-NEXT: ; def v[0:3] 1542; GFX90A-NEXT: ;;#ASMEND 1543; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1544; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16 1545; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1546; GFX90A-NEXT: s_waitcnt vmcnt(0) 1547; GFX90A-NEXT: s_setpc_b64 s[30:31] 1548; 1549; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_0: 1550; GFX940: ; %bb.0: 1551; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1552; GFX940-NEXT: ;;#ASMSTART 1553; GFX940-NEXT: ; def v[0:3] 1554; GFX940-NEXT: ;;#ASMEND 1555; GFX940-NEXT: v_mov_b32_e32 v4, 0 1556; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 1557; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1558; GFX940-NEXT: s_waitcnt vmcnt(0) 1559; GFX940-NEXT: s_setpc_b64 s[30:31] 1560 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1561 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 0> 1562 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1563 ret void 1564} 1565 1566define void @v_shuffle_v2i16_v8i16__2_0(ptr addrspace(1) inreg %ptr) { 1567; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_0: 1568; GFX900: ; %bb.0: 1569; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1570; GFX900-NEXT: ;;#ASMSTART 1571; GFX900-NEXT: ; def v[0:3] 1572; GFX900-NEXT: ;;#ASMEND 1573; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1574; GFX900-NEXT: v_mov_b32_e32 v4, 0 1575; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 1576; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1577; GFX900-NEXT: s_waitcnt vmcnt(0) 1578; GFX900-NEXT: s_setpc_b64 s[30:31] 1579; 1580; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_0: 1581; GFX90A: ; %bb.0: 1582; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1583; GFX90A-NEXT: ;;#ASMSTART 1584; GFX90A-NEXT: ; def v[0:3] 1585; GFX90A-NEXT: ;;#ASMEND 1586; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1587; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1588; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 1589; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1590; GFX90A-NEXT: s_waitcnt vmcnt(0) 1591; GFX90A-NEXT: s_setpc_b64 s[30:31] 1592; 1593; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_0: 1594; GFX940: ; %bb.0: 1595; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1596; GFX940-NEXT: ;;#ASMSTART 1597; GFX940-NEXT: ; def v[0:3] 1598; GFX940-NEXT: ;;#ASMEND 1599; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1600; GFX940-NEXT: v_mov_b32_e32 v4, 0 1601; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 1602; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1603; GFX940-NEXT: s_waitcnt vmcnt(0) 1604; GFX940-NEXT: s_setpc_b64 s[30:31] 1605 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1606 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 0> 1607 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1608 ret void 1609} 1610 1611define void @v_shuffle_v2i16_v8i16__3_0(ptr addrspace(1) inreg %ptr) { 1612; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_0: 1613; GFX900: ; %bb.0: 1614; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1615; GFX900-NEXT: ;;#ASMSTART 1616; GFX900-NEXT: ; def v[0:3] 1617; GFX900-NEXT: ;;#ASMEND 1618; GFX900-NEXT: v_mov_b32_e32 v4, 0 1619; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 1620; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1621; GFX900-NEXT: s_waitcnt vmcnt(0) 1622; GFX900-NEXT: s_setpc_b64 s[30:31] 1623; 1624; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_0: 1625; GFX90A: ; %bb.0: 1626; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1627; GFX90A-NEXT: ;;#ASMSTART 1628; GFX90A-NEXT: ; def v[0:3] 1629; GFX90A-NEXT: ;;#ASMEND 1630; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1631; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 1632; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1633; GFX90A-NEXT: s_waitcnt vmcnt(0) 1634; GFX90A-NEXT: s_setpc_b64 s[30:31] 1635; 1636; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_0: 1637; GFX940: ; %bb.0: 1638; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1639; GFX940-NEXT: ;;#ASMSTART 1640; GFX940-NEXT: ; def v[0:3] 1641; GFX940-NEXT: ;;#ASMEND 1642; GFX940-NEXT: v_mov_b32_e32 v4, 0 1643; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 1644; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1645; GFX940-NEXT: s_waitcnt vmcnt(0) 1646; GFX940-NEXT: s_setpc_b64 s[30:31] 1647 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1648 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 0> 1649 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1650 ret void 1651} 1652 1653define void @v_shuffle_v2i16_v8i16__4_0(ptr addrspace(1) inreg %ptr) { 1654; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_0: 1655; GFX900: ; %bb.0: 1656; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1657; GFX900-NEXT: ;;#ASMSTART 1658; GFX900-NEXT: ; def v[0:3] 1659; GFX900-NEXT: ;;#ASMEND 1660; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1661; GFX900-NEXT: v_mov_b32_e32 v4, 0 1662; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 1663; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1664; GFX900-NEXT: s_waitcnt vmcnt(0) 1665; GFX900-NEXT: s_setpc_b64 s[30:31] 1666; 1667; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_0: 1668; GFX90A: ; %bb.0: 1669; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1670; GFX90A-NEXT: ;;#ASMSTART 1671; GFX90A-NEXT: ; def v[0:3] 1672; GFX90A-NEXT: ;;#ASMEND 1673; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1674; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1675; GFX90A-NEXT: v_perm_b32 v0, v0, v2, s4 1676; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1677; GFX90A-NEXT: s_waitcnt vmcnt(0) 1678; GFX90A-NEXT: s_setpc_b64 s[30:31] 1679; 1680; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_0: 1681; GFX940: ; %bb.0: 1682; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1683; GFX940-NEXT: ;;#ASMSTART 1684; GFX940-NEXT: ; def v[0:3] 1685; GFX940-NEXT: ;;#ASMEND 1686; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1687; GFX940-NEXT: v_mov_b32_e32 v4, 0 1688; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 1689; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1690; GFX940-NEXT: s_waitcnt vmcnt(0) 1691; GFX940-NEXT: s_setpc_b64 s[30:31] 1692 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1693 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 0> 1694 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1695 ret void 1696} 1697 1698define void @v_shuffle_v2i16_v8i16__5_0(ptr addrspace(1) inreg %ptr) { 1699; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_0: 1700; GFX900: ; %bb.0: 1701; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1702; GFX900-NEXT: ;;#ASMSTART 1703; GFX900-NEXT: ; def v[0:3] 1704; GFX900-NEXT: ;;#ASMEND 1705; GFX900-NEXT: v_mov_b32_e32 v4, 0 1706; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 1707; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1708; GFX900-NEXT: s_waitcnt vmcnt(0) 1709; GFX900-NEXT: s_setpc_b64 s[30:31] 1710; 1711; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_0: 1712; GFX90A: ; %bb.0: 1713; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1714; GFX90A-NEXT: ;;#ASMSTART 1715; GFX90A-NEXT: ; def v[0:3] 1716; GFX90A-NEXT: ;;#ASMEND 1717; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1718; GFX90A-NEXT: v_alignbit_b32 v0, v0, v2, 16 1719; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1720; GFX90A-NEXT: s_waitcnt vmcnt(0) 1721; GFX90A-NEXT: s_setpc_b64 s[30:31] 1722; 1723; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_0: 1724; GFX940: ; %bb.0: 1725; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1726; GFX940-NEXT: ;;#ASMSTART 1727; GFX940-NEXT: ; def v[0:3] 1728; GFX940-NEXT: ;;#ASMEND 1729; GFX940-NEXT: v_mov_b32_e32 v4, 0 1730; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 1731; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1732; GFX940-NEXT: s_waitcnt vmcnt(0) 1733; GFX940-NEXT: s_setpc_b64 s[30:31] 1734 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1735 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 0> 1736 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1737 ret void 1738} 1739 1740define void @v_shuffle_v2i16_v8i16__6_0(ptr addrspace(1) inreg %ptr) { 1741; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_0: 1742; GFX900: ; %bb.0: 1743; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1744; GFX900-NEXT: ;;#ASMSTART 1745; GFX900-NEXT: ; def v[0:3] 1746; GFX900-NEXT: ;;#ASMEND 1747; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1748; GFX900-NEXT: v_mov_b32_e32 v4, 0 1749; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 1750; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1751; GFX900-NEXT: s_waitcnt vmcnt(0) 1752; GFX900-NEXT: s_setpc_b64 s[30:31] 1753; 1754; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_0: 1755; GFX90A: ; %bb.0: 1756; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1757; GFX90A-NEXT: ;;#ASMSTART 1758; GFX90A-NEXT: ; def v[0:3] 1759; GFX90A-NEXT: ;;#ASMEND 1760; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1761; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1762; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 1763; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1764; GFX90A-NEXT: s_waitcnt vmcnt(0) 1765; GFX90A-NEXT: s_setpc_b64 s[30:31] 1766; 1767; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_0: 1768; GFX940: ; %bb.0: 1769; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1770; GFX940-NEXT: ;;#ASMSTART 1771; GFX940-NEXT: ; def v[0:3] 1772; GFX940-NEXT: ;;#ASMEND 1773; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1774; GFX940-NEXT: v_mov_b32_e32 v4, 0 1775; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 1776; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1777; GFX940-NEXT: s_waitcnt vmcnt(0) 1778; GFX940-NEXT: s_setpc_b64 s[30:31] 1779 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1780 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 0> 1781 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1782 ret void 1783} 1784 1785define void @v_shuffle_v2i16_v8i16__7_0(ptr addrspace(1) inreg %ptr) { 1786; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_0: 1787; GFX900: ; %bb.0: 1788; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1789; GFX900-NEXT: ;;#ASMSTART 1790; GFX900-NEXT: ; def v[0:3] 1791; GFX900-NEXT: ;;#ASMEND 1792; GFX900-NEXT: v_mov_b32_e32 v4, 0 1793; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16 1794; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1795; GFX900-NEXT: s_waitcnt vmcnt(0) 1796; GFX900-NEXT: s_setpc_b64 s[30:31] 1797; 1798; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_0: 1799; GFX90A: ; %bb.0: 1800; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1801; GFX90A-NEXT: ;;#ASMSTART 1802; GFX90A-NEXT: ; def v[0:3] 1803; GFX90A-NEXT: ;;#ASMEND 1804; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1805; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 1806; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1807; GFX90A-NEXT: s_waitcnt vmcnt(0) 1808; GFX90A-NEXT: s_setpc_b64 s[30:31] 1809; 1810; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_0: 1811; GFX940: ; %bb.0: 1812; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1813; GFX940-NEXT: ;;#ASMSTART 1814; GFX940-NEXT: ; def v[0:3] 1815; GFX940-NEXT: ;;#ASMEND 1816; GFX940-NEXT: v_mov_b32_e32 v4, 0 1817; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 1818; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1819; GFX940-NEXT: s_waitcnt vmcnt(0) 1820; GFX940-NEXT: s_setpc_b64 s[30:31] 1821 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1822 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 0> 1823 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1824 ret void 1825} 1826 1827define void @v_shuffle_v2i16_v8i16__8_0(ptr addrspace(1) inreg %ptr) { 1828; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_0: 1829; GFX900: ; %bb.0: 1830; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1831; GFX900-NEXT: ;;#ASMSTART 1832; GFX900-NEXT: ; def v[0:3] 1833; GFX900-NEXT: ;;#ASMEND 1834; GFX900-NEXT: v_mov_b32_e32 v4, 0 1835; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1836; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1837; GFX900-NEXT: s_waitcnt vmcnt(0) 1838; GFX900-NEXT: s_setpc_b64 s[30:31] 1839; 1840; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_0: 1841; GFX90A: ; %bb.0: 1842; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1843; GFX90A-NEXT: ;;#ASMSTART 1844; GFX90A-NEXT: ; def v[0:3] 1845; GFX90A-NEXT: ;;#ASMEND 1846; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1847; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1848; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1849; GFX90A-NEXT: s_waitcnt vmcnt(0) 1850; GFX90A-NEXT: s_setpc_b64 s[30:31] 1851; 1852; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_0: 1853; GFX940: ; %bb.0: 1854; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1855; GFX940-NEXT: ;;#ASMSTART 1856; GFX940-NEXT: ; def v[0:3] 1857; GFX940-NEXT: ;;#ASMEND 1858; GFX940-NEXT: v_mov_b32_e32 v4, 0 1859; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1860; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1861; GFX940-NEXT: s_waitcnt vmcnt(0) 1862; GFX940-NEXT: s_setpc_b64 s[30:31] 1863 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1864 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 0> 1865 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1866 ret void 1867} 1868 1869define void @v_shuffle_v2i16_v8i16__9_0(ptr addrspace(1) inreg %ptr) { 1870; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_0: 1871; GFX900: ; %bb.0: 1872; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1873; GFX900-NEXT: ;;#ASMSTART 1874; GFX900-NEXT: ; def v[0:3] 1875; GFX900-NEXT: ;;#ASMEND 1876; GFX900-NEXT: v_mov_b32_e32 v5, 0 1877; GFX900-NEXT: ;;#ASMSTART 1878; GFX900-NEXT: ; def v[1:4] 1879; GFX900-NEXT: ;;#ASMEND 1880; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 1881; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 1882; GFX900-NEXT: s_waitcnt vmcnt(0) 1883; GFX900-NEXT: s_setpc_b64 s[30:31] 1884; 1885; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_0: 1886; GFX90A: ; %bb.0: 1887; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1888; GFX90A-NEXT: ;;#ASMSTART 1889; GFX90A-NEXT: ; def v[0:3] 1890; GFX90A-NEXT: ;;#ASMEND 1891; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1892; GFX90A-NEXT: ;;#ASMSTART 1893; GFX90A-NEXT: ; def v[2:5] 1894; GFX90A-NEXT: ;;#ASMEND 1895; GFX90A-NEXT: v_alignbit_b32 v0, v0, v2, 16 1896; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 1897; GFX90A-NEXT: s_waitcnt vmcnt(0) 1898; GFX90A-NEXT: s_setpc_b64 s[30:31] 1899; 1900; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_0: 1901; GFX940: ; %bb.0: 1902; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1903; GFX940-NEXT: ;;#ASMSTART 1904; GFX940-NEXT: ; def v[0:3] 1905; GFX940-NEXT: ;;#ASMEND 1906; GFX940-NEXT: v_mov_b32_e32 v6, 0 1907; GFX940-NEXT: ;;#ASMSTART 1908; GFX940-NEXT: ; def v[2:5] 1909; GFX940-NEXT: ;;#ASMEND 1910; GFX940-NEXT: s_nop 0 1911; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 1912; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 1913; GFX940-NEXT: s_waitcnt vmcnt(0) 1914; GFX940-NEXT: s_setpc_b64 s[30:31] 1915 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1916 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1917 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 0> 1918 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1919 ret void 1920} 1921 1922define void @v_shuffle_v2i16_v8i16__10_0(ptr addrspace(1) inreg %ptr) { 1923; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_0: 1924; GFX900: ; %bb.0: 1925; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1926; GFX900-NEXT: ;;#ASMSTART 1927; GFX900-NEXT: ; def v[0:3] 1928; GFX900-NEXT: ;;#ASMEND 1929; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1930; GFX900-NEXT: v_mov_b32_e32 v5, 0 1931; GFX900-NEXT: ;;#ASMSTART 1932; GFX900-NEXT: ; def v[1:4] 1933; GFX900-NEXT: ;;#ASMEND 1934; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 1935; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 1936; GFX900-NEXT: s_waitcnt vmcnt(0) 1937; GFX900-NEXT: s_setpc_b64 s[30:31] 1938; 1939; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_0: 1940; GFX90A: ; %bb.0: 1941; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1942; GFX90A-NEXT: ;;#ASMSTART 1943; GFX90A-NEXT: ; def v[0:3] 1944; GFX90A-NEXT: ;;#ASMEND 1945; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1946; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1947; GFX90A-NEXT: ;;#ASMSTART 1948; GFX90A-NEXT: ; def v[2:5] 1949; GFX90A-NEXT: ;;#ASMEND 1950; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 1951; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 1952; GFX90A-NEXT: s_waitcnt vmcnt(0) 1953; GFX90A-NEXT: s_setpc_b64 s[30:31] 1954; 1955; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_0: 1956; GFX940: ; %bb.0: 1957; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1958; GFX940-NEXT: ;;#ASMSTART 1959; GFX940-NEXT: ; def v[0:3] 1960; GFX940-NEXT: ;;#ASMEND 1961; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1962; GFX940-NEXT: v_mov_b32_e32 v6, 0 1963; GFX940-NEXT: ;;#ASMSTART 1964; GFX940-NEXT: ; def v[2:5] 1965; GFX940-NEXT: ;;#ASMEND 1966; GFX940-NEXT: s_nop 0 1967; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 1968; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 1969; GFX940-NEXT: s_waitcnt vmcnt(0) 1970; GFX940-NEXT: s_setpc_b64 s[30:31] 1971 %vec0 = call <8 x i16> asm "; def $0", "=v"() 1972 %vec1 = call <8 x i16> asm "; def $0", "=v"() 1973 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 0> 1974 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 1975 ret void 1976} 1977 1978define void @v_shuffle_v2i16_v8i16__11_0(ptr addrspace(1) inreg %ptr) { 1979; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_0: 1980; GFX900: ; %bb.0: 1981; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1982; GFX900-NEXT: ;;#ASMSTART 1983; GFX900-NEXT: ; def v[0:3] 1984; GFX900-NEXT: ;;#ASMEND 1985; GFX900-NEXT: v_mov_b32_e32 v5, 0 1986; GFX900-NEXT: ;;#ASMSTART 1987; GFX900-NEXT: ; def v[1:4] 1988; GFX900-NEXT: ;;#ASMEND 1989; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 1990; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 1991; GFX900-NEXT: s_waitcnt vmcnt(0) 1992; GFX900-NEXT: s_setpc_b64 s[30:31] 1993; 1994; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_0: 1995; GFX90A: ; %bb.0: 1996; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1997; GFX90A-NEXT: ;;#ASMSTART 1998; GFX90A-NEXT: ; def v[0:3] 1999; GFX90A-NEXT: ;;#ASMEND 2000; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2001; GFX90A-NEXT: ;;#ASMSTART 2002; GFX90A-NEXT: ; def v[2:5] 2003; GFX90A-NEXT: ;;#ASMEND 2004; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 2005; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 2006; GFX90A-NEXT: s_waitcnt vmcnt(0) 2007; GFX90A-NEXT: s_setpc_b64 s[30:31] 2008; 2009; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_0: 2010; GFX940: ; %bb.0: 2011; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2012; GFX940-NEXT: ;;#ASMSTART 2013; GFX940-NEXT: ; def v[0:3] 2014; GFX940-NEXT: ;;#ASMEND 2015; GFX940-NEXT: v_mov_b32_e32 v6, 0 2016; GFX940-NEXT: ;;#ASMSTART 2017; GFX940-NEXT: ; def v[2:5] 2018; GFX940-NEXT: ;;#ASMEND 2019; GFX940-NEXT: s_nop 0 2020; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 2021; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 2022; GFX940-NEXT: s_waitcnt vmcnt(0) 2023; GFX940-NEXT: s_setpc_b64 s[30:31] 2024 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2025 %vec1 = call <8 x i16> asm "; def $0", "=v"() 2026 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 0> 2027 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2028 ret void 2029} 2030 2031define void @v_shuffle_v2i16_v8i16__12_0(ptr addrspace(1) inreg %ptr) { 2032; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_0: 2033; GFX900: ; %bb.0: 2034; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2035; GFX900-NEXT: ;;#ASMSTART 2036; GFX900-NEXT: ; def v[0:3] 2037; GFX900-NEXT: ;;#ASMEND 2038; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2039; GFX900-NEXT: v_mov_b32_e32 v5, 0 2040; GFX900-NEXT: ;;#ASMSTART 2041; GFX900-NEXT: ; def v[1:4] 2042; GFX900-NEXT: ;;#ASMEND 2043; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 2044; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 2045; GFX900-NEXT: s_waitcnt vmcnt(0) 2046; GFX900-NEXT: s_setpc_b64 s[30:31] 2047; 2048; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_0: 2049; GFX90A: ; %bb.0: 2050; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2051; GFX90A-NEXT: ;;#ASMSTART 2052; GFX90A-NEXT: ; def v[0:3] 2053; GFX90A-NEXT: ;;#ASMEND 2054; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2055; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2056; GFX90A-NEXT: ;;#ASMSTART 2057; GFX90A-NEXT: ; def v[2:5] 2058; GFX90A-NEXT: ;;#ASMEND 2059; GFX90A-NEXT: v_perm_b32 v0, v0, v4, s4 2060; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 2061; GFX90A-NEXT: s_waitcnt vmcnt(0) 2062; GFX90A-NEXT: s_setpc_b64 s[30:31] 2063; 2064; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_0: 2065; GFX940: ; %bb.0: 2066; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2067; GFX940-NEXT: ;;#ASMSTART 2068; GFX940-NEXT: ; def v[0:3] 2069; GFX940-NEXT: ;;#ASMEND 2070; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2071; GFX940-NEXT: v_mov_b32_e32 v6, 0 2072; GFX940-NEXT: ;;#ASMSTART 2073; GFX940-NEXT: ; def v[2:5] 2074; GFX940-NEXT: ;;#ASMEND 2075; GFX940-NEXT: s_nop 0 2076; GFX940-NEXT: v_perm_b32 v0, v0, v4, s2 2077; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 2078; GFX940-NEXT: s_waitcnt vmcnt(0) 2079; GFX940-NEXT: s_setpc_b64 s[30:31] 2080 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2081 %vec1 = call <8 x i16> asm "; def $0", "=v"() 2082 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 0> 2083 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2084 ret void 2085} 2086 2087define void @v_shuffle_v2i16_v8i16__13_0(ptr addrspace(1) inreg %ptr) { 2088; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_0: 2089; GFX900: ; %bb.0: 2090; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2091; GFX900-NEXT: ;;#ASMSTART 2092; GFX900-NEXT: ; def v[0:3] 2093; GFX900-NEXT: ;;#ASMEND 2094; GFX900-NEXT: v_mov_b32_e32 v5, 0 2095; GFX900-NEXT: ;;#ASMSTART 2096; GFX900-NEXT: ; def v[1:4] 2097; GFX900-NEXT: ;;#ASMEND 2098; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16 2099; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 2100; GFX900-NEXT: s_waitcnt vmcnt(0) 2101; GFX900-NEXT: s_setpc_b64 s[30:31] 2102; 2103; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_0: 2104; GFX90A: ; %bb.0: 2105; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2106; GFX90A-NEXT: ;;#ASMSTART 2107; GFX90A-NEXT: ; def v[0:3] 2108; GFX90A-NEXT: ;;#ASMEND 2109; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2110; GFX90A-NEXT: ;;#ASMSTART 2111; GFX90A-NEXT: ; def v[2:5] 2112; GFX90A-NEXT: ;;#ASMEND 2113; GFX90A-NEXT: v_alignbit_b32 v0, v0, v4, 16 2114; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 2115; GFX90A-NEXT: s_waitcnt vmcnt(0) 2116; GFX90A-NEXT: s_setpc_b64 s[30:31] 2117; 2118; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_0: 2119; GFX940: ; %bb.0: 2120; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2121; GFX940-NEXT: ;;#ASMSTART 2122; GFX940-NEXT: ; def v[0:3] 2123; GFX940-NEXT: ;;#ASMEND 2124; GFX940-NEXT: v_mov_b32_e32 v6, 0 2125; GFX940-NEXT: ;;#ASMSTART 2126; GFX940-NEXT: ; def v[2:5] 2127; GFX940-NEXT: ;;#ASMEND 2128; GFX940-NEXT: s_nop 0 2129; GFX940-NEXT: v_alignbit_b32 v0, v0, v4, 16 2130; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 2131; GFX940-NEXT: s_waitcnt vmcnt(0) 2132; GFX940-NEXT: s_setpc_b64 s[30:31] 2133 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2134 %vec1 = call <8 x i16> asm "; def $0", "=v"() 2135 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 0> 2136 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2137 ret void 2138} 2139 2140define void @v_shuffle_v2i16_v8i16__14_0(ptr addrspace(1) inreg %ptr) { 2141; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_0: 2142; GFX900: ; %bb.0: 2143; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2144; GFX900-NEXT: ;;#ASMSTART 2145; GFX900-NEXT: ; def v[0:3] 2146; GFX900-NEXT: ;;#ASMEND 2147; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2148; GFX900-NEXT: v_mov_b32_e32 v5, 0 2149; GFX900-NEXT: ;;#ASMSTART 2150; GFX900-NEXT: ; def v[1:4] 2151; GFX900-NEXT: ;;#ASMEND 2152; GFX900-NEXT: v_perm_b32 v0, v0, v4, s4 2153; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 2154; GFX900-NEXT: s_waitcnt vmcnt(0) 2155; GFX900-NEXT: s_setpc_b64 s[30:31] 2156; 2157; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_0: 2158; GFX90A: ; %bb.0: 2159; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2160; GFX90A-NEXT: ;;#ASMSTART 2161; GFX90A-NEXT: ; def v[0:3] 2162; GFX90A-NEXT: ;;#ASMEND 2163; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2164; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2165; GFX90A-NEXT: ;;#ASMSTART 2166; GFX90A-NEXT: ; def v[2:5] 2167; GFX90A-NEXT: ;;#ASMEND 2168; GFX90A-NEXT: v_perm_b32 v0, v0, v5, s4 2169; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 2170; GFX90A-NEXT: s_waitcnt vmcnt(0) 2171; GFX90A-NEXT: s_setpc_b64 s[30:31] 2172; 2173; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_0: 2174; GFX940: ; %bb.0: 2175; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2176; GFX940-NEXT: ;;#ASMSTART 2177; GFX940-NEXT: ; def v[0:3] 2178; GFX940-NEXT: ;;#ASMEND 2179; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2180; GFX940-NEXT: v_mov_b32_e32 v6, 0 2181; GFX940-NEXT: ;;#ASMSTART 2182; GFX940-NEXT: ; def v[2:5] 2183; GFX940-NEXT: ;;#ASMEND 2184; GFX940-NEXT: s_nop 0 2185; GFX940-NEXT: v_perm_b32 v0, v0, v5, s2 2186; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 2187; GFX940-NEXT: s_waitcnt vmcnt(0) 2188; GFX940-NEXT: s_setpc_b64 s[30:31] 2189 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2190 %vec1 = call <8 x i16> asm "; def $0", "=v"() 2191 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 0> 2192 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2193 ret void 2194} 2195 2196define void @v_shuffle_v2i16_v8i16__u_1(ptr addrspace(1) inreg %ptr) { 2197; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_1: 2198; GFX900: ; %bb.0: 2199; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2200; GFX900-NEXT: v_mov_b32_e32 v4, 0 2201; GFX900-NEXT: ;;#ASMSTART 2202; GFX900-NEXT: ; def v[0:3] 2203; GFX900-NEXT: ;;#ASMEND 2204; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2205; GFX900-NEXT: s_waitcnt vmcnt(0) 2206; GFX900-NEXT: s_setpc_b64 s[30:31] 2207; 2208; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_1: 2209; GFX90A: ; %bb.0: 2210; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2211; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2212; GFX90A-NEXT: ;;#ASMSTART 2213; GFX90A-NEXT: ; def v[0:3] 2214; GFX90A-NEXT: ;;#ASMEND 2215; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2216; GFX90A-NEXT: s_waitcnt vmcnt(0) 2217; GFX90A-NEXT: s_setpc_b64 s[30:31] 2218; 2219; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_1: 2220; GFX940: ; %bb.0: 2221; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2222; GFX940-NEXT: v_mov_b32_e32 v4, 0 2223; GFX940-NEXT: ;;#ASMSTART 2224; GFX940-NEXT: ; def v[0:3] 2225; GFX940-NEXT: ;;#ASMEND 2226; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2227; GFX940-NEXT: s_waitcnt vmcnt(0) 2228; GFX940-NEXT: s_setpc_b64 s[30:31] 2229 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2230 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 1> 2231 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2232 ret void 2233} 2234 2235define void @v_shuffle_v2i16_v8i16__0_1(ptr addrspace(1) inreg %ptr) { 2236; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_1: 2237; GFX900: ; %bb.0: 2238; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2239; GFX900-NEXT: v_mov_b32_e32 v4, 0 2240; GFX900-NEXT: ;;#ASMSTART 2241; GFX900-NEXT: ; def v[0:3] 2242; GFX900-NEXT: ;;#ASMEND 2243; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2244; GFX900-NEXT: s_waitcnt vmcnt(0) 2245; GFX900-NEXT: s_setpc_b64 s[30:31] 2246; 2247; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_1: 2248; GFX90A: ; %bb.0: 2249; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2250; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2251; GFX90A-NEXT: ;;#ASMSTART 2252; GFX90A-NEXT: ; def v[0:3] 2253; GFX90A-NEXT: ;;#ASMEND 2254; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2255; GFX90A-NEXT: s_waitcnt vmcnt(0) 2256; GFX90A-NEXT: s_setpc_b64 s[30:31] 2257; 2258; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_1: 2259; GFX940: ; %bb.0: 2260; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2261; GFX940-NEXT: v_mov_b32_e32 v4, 0 2262; GFX940-NEXT: ;;#ASMSTART 2263; GFX940-NEXT: ; def v[0:3] 2264; GFX940-NEXT: ;;#ASMEND 2265; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2266; GFX940-NEXT: s_waitcnt vmcnt(0) 2267; GFX940-NEXT: s_setpc_b64 s[30:31] 2268 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2269 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 1> 2270 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2271 ret void 2272} 2273 2274define void @v_shuffle_v2i16_v8i16__1_1(ptr addrspace(1) inreg %ptr) { 2275; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_1: 2276; GFX900: ; %bb.0: 2277; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2278; GFX900-NEXT: ;;#ASMSTART 2279; GFX900-NEXT: ; def v[0:3] 2280; GFX900-NEXT: ;;#ASMEND 2281; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2282; GFX900-NEXT: v_mov_b32_e32 v4, 0 2283; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 2284; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2285; GFX900-NEXT: s_waitcnt vmcnt(0) 2286; GFX900-NEXT: s_setpc_b64 s[30:31] 2287; 2288; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_1: 2289; GFX90A: ; %bb.0: 2290; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2291; GFX90A-NEXT: ;;#ASMSTART 2292; GFX90A-NEXT: ; def v[0:3] 2293; GFX90A-NEXT: ;;#ASMEND 2294; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2295; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2296; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 2297; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2298; GFX90A-NEXT: s_waitcnt vmcnt(0) 2299; GFX90A-NEXT: s_setpc_b64 s[30:31] 2300; 2301; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_1: 2302; GFX940: ; %bb.0: 2303; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2304; GFX940-NEXT: ;;#ASMSTART 2305; GFX940-NEXT: ; def v[0:3] 2306; GFX940-NEXT: ;;#ASMEND 2307; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2308; GFX940-NEXT: v_mov_b32_e32 v4, 0 2309; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 2310; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2311; GFX940-NEXT: s_waitcnt vmcnt(0) 2312; GFX940-NEXT: s_setpc_b64 s[30:31] 2313 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2314 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 1> 2315 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2316 ret void 2317} 2318 2319define void @v_shuffle_v2i16_v8i16__2_1(ptr addrspace(1) inreg %ptr) { 2320; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_1: 2321; GFX900: ; %bb.0: 2322; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2323; GFX900-NEXT: ;;#ASMSTART 2324; GFX900-NEXT: ; def v[0:3] 2325; GFX900-NEXT: ;;#ASMEND 2326; GFX900-NEXT: s_mov_b32 s4, 0xffff 2327; GFX900-NEXT: v_mov_b32_e32 v4, 0 2328; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0 2329; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2330; GFX900-NEXT: s_waitcnt vmcnt(0) 2331; GFX900-NEXT: s_setpc_b64 s[30:31] 2332; 2333; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_1: 2334; GFX90A: ; %bb.0: 2335; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2336; GFX90A-NEXT: ;;#ASMSTART 2337; GFX90A-NEXT: ; def v[0:3] 2338; GFX90A-NEXT: ;;#ASMEND 2339; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2340; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2341; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0 2342; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2343; GFX90A-NEXT: s_waitcnt vmcnt(0) 2344; GFX90A-NEXT: s_setpc_b64 s[30:31] 2345; 2346; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_1: 2347; GFX940: ; %bb.0: 2348; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2349; GFX940-NEXT: ;;#ASMSTART 2350; GFX940-NEXT: ; def v[0:3] 2351; GFX940-NEXT: ;;#ASMEND 2352; GFX940-NEXT: s_mov_b32 s2, 0xffff 2353; GFX940-NEXT: v_mov_b32_e32 v4, 0 2354; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 2355; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2356; GFX940-NEXT: s_waitcnt vmcnt(0) 2357; GFX940-NEXT: s_setpc_b64 s[30:31] 2358 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2359 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 1> 2360 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2361 ret void 2362} 2363 2364define void @v_shuffle_v2i16_v8i16__3_1(ptr addrspace(1) inreg %ptr) { 2365; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_1: 2366; GFX900: ; %bb.0: 2367; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2368; GFX900-NEXT: ;;#ASMSTART 2369; GFX900-NEXT: ; def v[0:3] 2370; GFX900-NEXT: ;;#ASMEND 2371; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2372; GFX900-NEXT: v_mov_b32_e32 v4, 0 2373; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 2374; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2375; GFX900-NEXT: s_waitcnt vmcnt(0) 2376; GFX900-NEXT: s_setpc_b64 s[30:31] 2377; 2378; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_1: 2379; GFX90A: ; %bb.0: 2380; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2381; GFX90A-NEXT: ;;#ASMSTART 2382; GFX90A-NEXT: ; def v[0:3] 2383; GFX90A-NEXT: ;;#ASMEND 2384; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2385; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2386; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 2387; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2388; GFX90A-NEXT: s_waitcnt vmcnt(0) 2389; GFX90A-NEXT: s_setpc_b64 s[30:31] 2390; 2391; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_1: 2392; GFX940: ; %bb.0: 2393; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2394; GFX940-NEXT: ;;#ASMSTART 2395; GFX940-NEXT: ; def v[0:3] 2396; GFX940-NEXT: ;;#ASMEND 2397; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2398; GFX940-NEXT: v_mov_b32_e32 v4, 0 2399; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 2400; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2401; GFX940-NEXT: s_waitcnt vmcnt(0) 2402; GFX940-NEXT: s_setpc_b64 s[30:31] 2403 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2404 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 1> 2405 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2406 ret void 2407} 2408 2409define void @v_shuffle_v2i16_v8i16__4_1(ptr addrspace(1) inreg %ptr) { 2410; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_1: 2411; GFX900: ; %bb.0: 2412; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2413; GFX900-NEXT: ;;#ASMSTART 2414; GFX900-NEXT: ; def v[0:3] 2415; GFX900-NEXT: ;;#ASMEND 2416; GFX900-NEXT: s_mov_b32 s4, 0xffff 2417; GFX900-NEXT: v_mov_b32_e32 v4, 0 2418; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 2419; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2420; GFX900-NEXT: s_waitcnt vmcnt(0) 2421; GFX900-NEXT: s_setpc_b64 s[30:31] 2422; 2423; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_1: 2424; GFX90A: ; %bb.0: 2425; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2426; GFX90A-NEXT: ;;#ASMSTART 2427; GFX90A-NEXT: ; def v[0:3] 2428; GFX90A-NEXT: ;;#ASMEND 2429; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2430; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2431; GFX90A-NEXT: v_bfi_b32 v0, s4, v2, v0 2432; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2433; GFX90A-NEXT: s_waitcnt vmcnt(0) 2434; GFX90A-NEXT: s_setpc_b64 s[30:31] 2435; 2436; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_1: 2437; GFX940: ; %bb.0: 2438; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2439; GFX940-NEXT: ;;#ASMSTART 2440; GFX940-NEXT: ; def v[0:3] 2441; GFX940-NEXT: ;;#ASMEND 2442; GFX940-NEXT: s_mov_b32 s2, 0xffff 2443; GFX940-NEXT: v_mov_b32_e32 v4, 0 2444; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v0 2445; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2446; GFX940-NEXT: s_waitcnt vmcnt(0) 2447; GFX940-NEXT: s_setpc_b64 s[30:31] 2448 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2449 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 1> 2450 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2451 ret void 2452} 2453 2454define void @v_shuffle_v2i16_v8i16__5_1(ptr addrspace(1) inreg %ptr) { 2455; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_1: 2456; GFX900: ; %bb.0: 2457; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2458; GFX900-NEXT: ;;#ASMSTART 2459; GFX900-NEXT: ; def v[0:3] 2460; GFX900-NEXT: ;;#ASMEND 2461; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2462; GFX900-NEXT: v_mov_b32_e32 v4, 0 2463; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 2464; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2465; GFX900-NEXT: s_waitcnt vmcnt(0) 2466; GFX900-NEXT: s_setpc_b64 s[30:31] 2467; 2468; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_1: 2469; GFX90A: ; %bb.0: 2470; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2471; GFX90A-NEXT: ;;#ASMSTART 2472; GFX90A-NEXT: ; def v[0:3] 2473; GFX90A-NEXT: ;;#ASMEND 2474; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2475; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2476; GFX90A-NEXT: v_perm_b32 v0, v0, v2, s4 2477; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2478; GFX90A-NEXT: s_waitcnt vmcnt(0) 2479; GFX90A-NEXT: s_setpc_b64 s[30:31] 2480; 2481; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_1: 2482; GFX940: ; %bb.0: 2483; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2484; GFX940-NEXT: ;;#ASMSTART 2485; GFX940-NEXT: ; def v[0:3] 2486; GFX940-NEXT: ;;#ASMEND 2487; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2488; GFX940-NEXT: v_mov_b32_e32 v4, 0 2489; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 2490; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2491; GFX940-NEXT: s_waitcnt vmcnt(0) 2492; GFX940-NEXT: s_setpc_b64 s[30:31] 2493 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2494 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 1> 2495 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2496 ret void 2497} 2498 2499define void @v_shuffle_v2i16_v8i16__6_1(ptr addrspace(1) inreg %ptr) { 2500; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_1: 2501; GFX900: ; %bb.0: 2502; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2503; GFX900-NEXT: ;;#ASMSTART 2504; GFX900-NEXT: ; def v[0:3] 2505; GFX900-NEXT: ;;#ASMEND 2506; GFX900-NEXT: s_mov_b32 s4, 0xffff 2507; GFX900-NEXT: v_mov_b32_e32 v4, 0 2508; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v0 2509; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2510; GFX900-NEXT: s_waitcnt vmcnt(0) 2511; GFX900-NEXT: s_setpc_b64 s[30:31] 2512; 2513; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_1: 2514; GFX90A: ; %bb.0: 2515; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2516; GFX90A-NEXT: ;;#ASMSTART 2517; GFX90A-NEXT: ; def v[0:3] 2518; GFX90A-NEXT: ;;#ASMEND 2519; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2520; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2521; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 2522; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2523; GFX90A-NEXT: s_waitcnt vmcnt(0) 2524; GFX90A-NEXT: s_setpc_b64 s[30:31] 2525; 2526; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_1: 2527; GFX940: ; %bb.0: 2528; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2529; GFX940-NEXT: ;;#ASMSTART 2530; GFX940-NEXT: ; def v[0:3] 2531; GFX940-NEXT: ;;#ASMEND 2532; GFX940-NEXT: s_mov_b32 s2, 0xffff 2533; GFX940-NEXT: v_mov_b32_e32 v4, 0 2534; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 2535; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2536; GFX940-NEXT: s_waitcnt vmcnt(0) 2537; GFX940-NEXT: s_setpc_b64 s[30:31] 2538 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2539 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 1> 2540 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2541 ret void 2542} 2543 2544define void @v_shuffle_v2i16_v8i16__7_1(ptr addrspace(1) inreg %ptr) { 2545; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_1: 2546; GFX900: ; %bb.0: 2547; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2548; GFX900-NEXT: ;;#ASMSTART 2549; GFX900-NEXT: ; def v[0:3] 2550; GFX900-NEXT: ;;#ASMEND 2551; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2552; GFX900-NEXT: v_mov_b32_e32 v4, 0 2553; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 2554; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2555; GFX900-NEXT: s_waitcnt vmcnt(0) 2556; GFX900-NEXT: s_setpc_b64 s[30:31] 2557; 2558; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_1: 2559; GFX90A: ; %bb.0: 2560; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2561; GFX90A-NEXT: ;;#ASMSTART 2562; GFX90A-NEXT: ; def v[0:3] 2563; GFX90A-NEXT: ;;#ASMEND 2564; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2565; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2566; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 2567; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2568; GFX90A-NEXT: s_waitcnt vmcnt(0) 2569; GFX90A-NEXT: s_setpc_b64 s[30:31] 2570; 2571; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_1: 2572; GFX940: ; %bb.0: 2573; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2574; GFX940-NEXT: ;;#ASMSTART 2575; GFX940-NEXT: ; def v[0:3] 2576; GFX940-NEXT: ;;#ASMEND 2577; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2578; GFX940-NEXT: v_mov_b32_e32 v4, 0 2579; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 2580; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2581; GFX940-NEXT: s_waitcnt vmcnt(0) 2582; GFX940-NEXT: s_setpc_b64 s[30:31] 2583 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2584 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 1> 2585 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2586 ret void 2587} 2588 2589define void @v_shuffle_v2i16_v8i16__8_1(ptr addrspace(1) inreg %ptr) { 2590; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_1: 2591; GFX900: ; %bb.0: 2592; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2593; GFX900-NEXT: v_mov_b32_e32 v4, 0 2594; GFX900-NEXT: ;;#ASMSTART 2595; GFX900-NEXT: ; def v[0:3] 2596; GFX900-NEXT: ;;#ASMEND 2597; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2598; GFX900-NEXT: s_waitcnt vmcnt(0) 2599; GFX900-NEXT: s_setpc_b64 s[30:31] 2600; 2601; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_1: 2602; GFX90A: ; %bb.0: 2603; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2604; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2605; GFX90A-NEXT: ;;#ASMSTART 2606; GFX90A-NEXT: ; def v[0:3] 2607; GFX90A-NEXT: ;;#ASMEND 2608; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2609; GFX90A-NEXT: s_waitcnt vmcnt(0) 2610; GFX90A-NEXT: s_setpc_b64 s[30:31] 2611; 2612; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_1: 2613; GFX940: ; %bb.0: 2614; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2615; GFX940-NEXT: v_mov_b32_e32 v4, 0 2616; GFX940-NEXT: ;;#ASMSTART 2617; GFX940-NEXT: ; def v[0:3] 2618; GFX940-NEXT: ;;#ASMEND 2619; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2620; GFX940-NEXT: s_waitcnt vmcnt(0) 2621; GFX940-NEXT: s_setpc_b64 s[30:31] 2622 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2623 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 1> 2624 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2625 ret void 2626} 2627 2628define void @v_shuffle_v2i16_v8i16__9_1(ptr addrspace(1) inreg %ptr) { 2629; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_1: 2630; GFX900: ; %bb.0: 2631; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2632; GFX900-NEXT: ;;#ASMSTART 2633; GFX900-NEXT: ; def v[0:3] 2634; GFX900-NEXT: ;;#ASMEND 2635; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2636; GFX900-NEXT: v_mov_b32_e32 v5, 0 2637; GFX900-NEXT: ;;#ASMSTART 2638; GFX900-NEXT: ; def v[1:4] 2639; GFX900-NEXT: ;;#ASMEND 2640; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 2641; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 2642; GFX900-NEXT: s_waitcnt vmcnt(0) 2643; GFX900-NEXT: s_setpc_b64 s[30:31] 2644; 2645; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_1: 2646; GFX90A: ; %bb.0: 2647; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2648; GFX90A-NEXT: ;;#ASMSTART 2649; GFX90A-NEXT: ; def v[0:3] 2650; GFX90A-NEXT: ;;#ASMEND 2651; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2652; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2653; GFX90A-NEXT: ;;#ASMSTART 2654; GFX90A-NEXT: ; def v[2:5] 2655; GFX90A-NEXT: ;;#ASMEND 2656; GFX90A-NEXT: v_perm_b32 v0, v0, v2, s4 2657; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 2658; GFX90A-NEXT: s_waitcnt vmcnt(0) 2659; GFX90A-NEXT: s_setpc_b64 s[30:31] 2660; 2661; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_1: 2662; GFX940: ; %bb.0: 2663; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2664; GFX940-NEXT: ;;#ASMSTART 2665; GFX940-NEXT: ; def v[0:3] 2666; GFX940-NEXT: ;;#ASMEND 2667; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2668; GFX940-NEXT: v_mov_b32_e32 v6, 0 2669; GFX940-NEXT: ;;#ASMSTART 2670; GFX940-NEXT: ; def v[2:5] 2671; GFX940-NEXT: ;;#ASMEND 2672; GFX940-NEXT: s_nop 0 2673; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 2674; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 2675; GFX940-NEXT: s_waitcnt vmcnt(0) 2676; GFX940-NEXT: s_setpc_b64 s[30:31] 2677 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2678 %vec1 = call <8 x i16> asm "; def $0", "=v"() 2679 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 1> 2680 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2681 ret void 2682} 2683 2684define void @v_shuffle_v2i16_v8i16__10_1(ptr addrspace(1) inreg %ptr) { 2685; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_1: 2686; GFX900: ; %bb.0: 2687; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2688; GFX900-NEXT: ;;#ASMSTART 2689; GFX900-NEXT: ; def v[0:3] 2690; GFX900-NEXT: ;;#ASMEND 2691; GFX900-NEXT: s_mov_b32 s4, 0xffff 2692; GFX900-NEXT: v_mov_b32_e32 v5, 0 2693; GFX900-NEXT: ;;#ASMSTART 2694; GFX900-NEXT: ; def v[1:4] 2695; GFX900-NEXT: ;;#ASMEND 2696; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 2697; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 2698; GFX900-NEXT: s_waitcnt vmcnt(0) 2699; GFX900-NEXT: s_setpc_b64 s[30:31] 2700; 2701; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_1: 2702; GFX90A: ; %bb.0: 2703; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2704; GFX90A-NEXT: ;;#ASMSTART 2705; GFX90A-NEXT: ; def v[0:3] 2706; GFX90A-NEXT: ;;#ASMEND 2707; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2708; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2709; GFX90A-NEXT: ;;#ASMSTART 2710; GFX90A-NEXT: ; def v[2:5] 2711; GFX90A-NEXT: ;;#ASMEND 2712; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 2713; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 2714; GFX90A-NEXT: s_waitcnt vmcnt(0) 2715; GFX90A-NEXT: s_setpc_b64 s[30:31] 2716; 2717; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_1: 2718; GFX940: ; %bb.0: 2719; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2720; GFX940-NEXT: ;;#ASMSTART 2721; GFX940-NEXT: ; def v[0:3] 2722; GFX940-NEXT: ;;#ASMEND 2723; GFX940-NEXT: s_mov_b32 s2, 0xffff 2724; GFX940-NEXT: v_mov_b32_e32 v6, 0 2725; GFX940-NEXT: ;;#ASMSTART 2726; GFX940-NEXT: ; def v[2:5] 2727; GFX940-NEXT: ;;#ASMEND 2728; GFX940-NEXT: s_nop 0 2729; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 2730; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 2731; GFX940-NEXT: s_waitcnt vmcnt(0) 2732; GFX940-NEXT: s_setpc_b64 s[30:31] 2733 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2734 %vec1 = call <8 x i16> asm "; def $0", "=v"() 2735 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 1> 2736 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2737 ret void 2738} 2739 2740define void @v_shuffle_v2i16_v8i16__11_1(ptr addrspace(1) inreg %ptr) { 2741; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_1: 2742; GFX900: ; %bb.0: 2743; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2744; GFX900-NEXT: ;;#ASMSTART 2745; GFX900-NEXT: ; def v[0:3] 2746; GFX900-NEXT: ;;#ASMEND 2747; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2748; GFX900-NEXT: v_mov_b32_e32 v5, 0 2749; GFX900-NEXT: ;;#ASMSTART 2750; GFX900-NEXT: ; def v[1:4] 2751; GFX900-NEXT: ;;#ASMEND 2752; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 2753; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 2754; GFX900-NEXT: s_waitcnt vmcnt(0) 2755; GFX900-NEXT: s_setpc_b64 s[30:31] 2756; 2757; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_1: 2758; GFX90A: ; %bb.0: 2759; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2760; GFX90A-NEXT: ;;#ASMSTART 2761; GFX90A-NEXT: ; def v[0:3] 2762; GFX90A-NEXT: ;;#ASMEND 2763; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2764; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2765; GFX90A-NEXT: ;;#ASMSTART 2766; GFX90A-NEXT: ; def v[2:5] 2767; GFX90A-NEXT: ;;#ASMEND 2768; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 2769; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 2770; GFX90A-NEXT: s_waitcnt vmcnt(0) 2771; GFX90A-NEXT: s_setpc_b64 s[30:31] 2772; 2773; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_1: 2774; GFX940: ; %bb.0: 2775; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2776; GFX940-NEXT: ;;#ASMSTART 2777; GFX940-NEXT: ; def v[0:3] 2778; GFX940-NEXT: ;;#ASMEND 2779; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2780; GFX940-NEXT: v_mov_b32_e32 v6, 0 2781; GFX940-NEXT: ;;#ASMSTART 2782; GFX940-NEXT: ; def v[2:5] 2783; GFX940-NEXT: ;;#ASMEND 2784; GFX940-NEXT: s_nop 0 2785; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 2786; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 2787; GFX940-NEXT: s_waitcnt vmcnt(0) 2788; GFX940-NEXT: s_setpc_b64 s[30:31] 2789 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2790 %vec1 = call <8 x i16> asm "; def $0", "=v"() 2791 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 1> 2792 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2793 ret void 2794} 2795 2796define void @v_shuffle_v2i16_v8i16__12_1(ptr addrspace(1) inreg %ptr) { 2797; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_1: 2798; GFX900: ; %bb.0: 2799; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2800; GFX900-NEXT: ;;#ASMSTART 2801; GFX900-NEXT: ; def v[0:3] 2802; GFX900-NEXT: ;;#ASMEND 2803; GFX900-NEXT: s_mov_b32 s4, 0xffff 2804; GFX900-NEXT: v_mov_b32_e32 v5, 0 2805; GFX900-NEXT: ;;#ASMSTART 2806; GFX900-NEXT: ; def v[1:4] 2807; GFX900-NEXT: ;;#ASMEND 2808; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v0 2809; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 2810; GFX900-NEXT: s_waitcnt vmcnt(0) 2811; GFX900-NEXT: s_setpc_b64 s[30:31] 2812; 2813; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_1: 2814; GFX90A: ; %bb.0: 2815; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2816; GFX90A-NEXT: ;;#ASMSTART 2817; GFX90A-NEXT: ; def v[0:3] 2818; GFX90A-NEXT: ;;#ASMEND 2819; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2820; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2821; GFX90A-NEXT: ;;#ASMSTART 2822; GFX90A-NEXT: ; def v[2:5] 2823; GFX90A-NEXT: ;;#ASMEND 2824; GFX90A-NEXT: v_bfi_b32 v0, s4, v4, v0 2825; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 2826; GFX90A-NEXT: s_waitcnt vmcnt(0) 2827; GFX90A-NEXT: s_setpc_b64 s[30:31] 2828; 2829; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_1: 2830; GFX940: ; %bb.0: 2831; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2832; GFX940-NEXT: ;;#ASMSTART 2833; GFX940-NEXT: ; def v[0:3] 2834; GFX940-NEXT: ;;#ASMEND 2835; GFX940-NEXT: s_mov_b32 s2, 0xffff 2836; GFX940-NEXT: v_mov_b32_e32 v6, 0 2837; GFX940-NEXT: ;;#ASMSTART 2838; GFX940-NEXT: ; def v[2:5] 2839; GFX940-NEXT: ;;#ASMEND 2840; GFX940-NEXT: s_nop 0 2841; GFX940-NEXT: v_bfi_b32 v0, s2, v4, v0 2842; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 2843; GFX940-NEXT: s_waitcnt vmcnt(0) 2844; GFX940-NEXT: s_setpc_b64 s[30:31] 2845 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2846 %vec1 = call <8 x i16> asm "; def $0", "=v"() 2847 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 1> 2848 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2849 ret void 2850} 2851 2852define void @v_shuffle_v2i16_v8i16__13_1(ptr addrspace(1) inreg %ptr) { 2853; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_1: 2854; GFX900: ; %bb.0: 2855; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2856; GFX900-NEXT: ;;#ASMSTART 2857; GFX900-NEXT: ; def v[0:3] 2858; GFX900-NEXT: ;;#ASMEND 2859; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2860; GFX900-NEXT: v_mov_b32_e32 v5, 0 2861; GFX900-NEXT: ;;#ASMSTART 2862; GFX900-NEXT: ; def v[1:4] 2863; GFX900-NEXT: ;;#ASMEND 2864; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 2865; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 2866; GFX900-NEXT: s_waitcnt vmcnt(0) 2867; GFX900-NEXT: s_setpc_b64 s[30:31] 2868; 2869; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_1: 2870; GFX90A: ; %bb.0: 2871; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2872; GFX90A-NEXT: ;;#ASMSTART 2873; GFX90A-NEXT: ; def v[0:3] 2874; GFX90A-NEXT: ;;#ASMEND 2875; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2876; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2877; GFX90A-NEXT: ;;#ASMSTART 2878; GFX90A-NEXT: ; def v[2:5] 2879; GFX90A-NEXT: ;;#ASMEND 2880; GFX90A-NEXT: v_perm_b32 v0, v0, v4, s4 2881; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 2882; GFX90A-NEXT: s_waitcnt vmcnt(0) 2883; GFX90A-NEXT: s_setpc_b64 s[30:31] 2884; 2885; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_1: 2886; GFX940: ; %bb.0: 2887; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2888; GFX940-NEXT: ;;#ASMSTART 2889; GFX940-NEXT: ; def v[0:3] 2890; GFX940-NEXT: ;;#ASMEND 2891; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2892; GFX940-NEXT: v_mov_b32_e32 v6, 0 2893; GFX940-NEXT: ;;#ASMSTART 2894; GFX940-NEXT: ; def v[2:5] 2895; GFX940-NEXT: ;;#ASMEND 2896; GFX940-NEXT: s_nop 0 2897; GFX940-NEXT: v_perm_b32 v0, v0, v4, s2 2898; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 2899; GFX940-NEXT: s_waitcnt vmcnt(0) 2900; GFX940-NEXT: s_setpc_b64 s[30:31] 2901 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2902 %vec1 = call <8 x i16> asm "; def $0", "=v"() 2903 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 1> 2904 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2905 ret void 2906} 2907 2908define void @v_shuffle_v2i16_v8i16__14_1(ptr addrspace(1) inreg %ptr) { 2909; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_1: 2910; GFX900: ; %bb.0: 2911; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2912; GFX900-NEXT: ;;#ASMSTART 2913; GFX900-NEXT: ; def v[0:3] 2914; GFX900-NEXT: ;;#ASMEND 2915; GFX900-NEXT: s_mov_b32 s4, 0xffff 2916; GFX900-NEXT: v_mov_b32_e32 v5, 0 2917; GFX900-NEXT: ;;#ASMSTART 2918; GFX900-NEXT: ; def v[1:4] 2919; GFX900-NEXT: ;;#ASMEND 2920; GFX900-NEXT: v_bfi_b32 v0, s4, v4, v0 2921; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 2922; GFX900-NEXT: s_waitcnt vmcnt(0) 2923; GFX900-NEXT: s_setpc_b64 s[30:31] 2924; 2925; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_1: 2926; GFX90A: ; %bb.0: 2927; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2928; GFX90A-NEXT: ;;#ASMSTART 2929; GFX90A-NEXT: ; def v[0:3] 2930; GFX90A-NEXT: ;;#ASMEND 2931; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2932; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2933; GFX90A-NEXT: ;;#ASMSTART 2934; GFX90A-NEXT: ; def v[2:5] 2935; GFX90A-NEXT: ;;#ASMEND 2936; GFX90A-NEXT: v_bfi_b32 v0, s4, v5, v0 2937; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 2938; GFX90A-NEXT: s_waitcnt vmcnt(0) 2939; GFX90A-NEXT: s_setpc_b64 s[30:31] 2940; 2941; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_1: 2942; GFX940: ; %bb.0: 2943; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2944; GFX940-NEXT: ;;#ASMSTART 2945; GFX940-NEXT: ; def v[0:3] 2946; GFX940-NEXT: ;;#ASMEND 2947; GFX940-NEXT: s_mov_b32 s2, 0xffff 2948; GFX940-NEXT: v_mov_b32_e32 v6, 0 2949; GFX940-NEXT: ;;#ASMSTART 2950; GFX940-NEXT: ; def v[2:5] 2951; GFX940-NEXT: ;;#ASMEND 2952; GFX940-NEXT: s_nop 0 2953; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v0 2954; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 2955; GFX940-NEXT: s_waitcnt vmcnt(0) 2956; GFX940-NEXT: s_setpc_b64 s[30:31] 2957 %vec0 = call <8 x i16> asm "; def $0", "=v"() 2958 %vec1 = call <8 x i16> asm "; def $0", "=v"() 2959 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 1> 2960 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 2961 ret void 2962} 2963 2964define void @v_shuffle_v2i16_v8i16__u_2(ptr addrspace(1) inreg %ptr) { 2965; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_2: 2966; GFX900: ; %bb.0: 2967; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2968; GFX900-NEXT: ;;#ASMSTART 2969; GFX900-NEXT: ; def v[0:3] 2970; GFX900-NEXT: ;;#ASMEND 2971; GFX900-NEXT: v_mov_b32_e32 v4, 0 2972; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2973; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2974; GFX900-NEXT: s_waitcnt vmcnt(0) 2975; GFX900-NEXT: s_setpc_b64 s[30:31] 2976; 2977; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_2: 2978; GFX90A: ; %bb.0: 2979; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2980; GFX90A-NEXT: ;;#ASMSTART 2981; GFX90A-NEXT: ; def v[0:3] 2982; GFX90A-NEXT: ;;#ASMEND 2983; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2984; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2985; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2986; GFX90A-NEXT: s_waitcnt vmcnt(0) 2987; GFX90A-NEXT: s_setpc_b64 s[30:31] 2988; 2989; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_2: 2990; GFX940: ; %bb.0: 2991; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2992; GFX940-NEXT: ;;#ASMSTART 2993; GFX940-NEXT: ; def v[0:3] 2994; GFX940-NEXT: ;;#ASMEND 2995; GFX940-NEXT: v_mov_b32_e32 v4, 0 2996; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2997; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2998; GFX940-NEXT: s_waitcnt vmcnt(0) 2999; GFX940-NEXT: s_setpc_b64 s[30:31] 3000 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3001 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 2> 3002 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3003 ret void 3004} 3005 3006define void @v_shuffle_v2i16_v8i16__0_2(ptr addrspace(1) inreg %ptr) { 3007; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_2: 3008; GFX900: ; %bb.0: 3009; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3010; GFX900-NEXT: ;;#ASMSTART 3011; GFX900-NEXT: ; def v[0:3] 3012; GFX900-NEXT: ;;#ASMEND 3013; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3014; GFX900-NEXT: v_mov_b32_e32 v4, 0 3015; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 3016; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3017; GFX900-NEXT: s_waitcnt vmcnt(0) 3018; GFX900-NEXT: s_setpc_b64 s[30:31] 3019; 3020; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_2: 3021; GFX90A: ; %bb.0: 3022; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3023; GFX90A-NEXT: ;;#ASMSTART 3024; GFX90A-NEXT: ; def v[0:3] 3025; GFX90A-NEXT: ;;#ASMEND 3026; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3027; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3028; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 3029; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3030; GFX90A-NEXT: s_waitcnt vmcnt(0) 3031; GFX90A-NEXT: s_setpc_b64 s[30:31] 3032; 3033; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_2: 3034; GFX940: ; %bb.0: 3035; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3036; GFX940-NEXT: ;;#ASMSTART 3037; GFX940-NEXT: ; def v[0:3] 3038; GFX940-NEXT: ;;#ASMEND 3039; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3040; GFX940-NEXT: v_mov_b32_e32 v4, 0 3041; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 3042; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3043; GFX940-NEXT: s_waitcnt vmcnt(0) 3044; GFX940-NEXT: s_setpc_b64 s[30:31] 3045 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3046 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 2> 3047 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3048 ret void 3049} 3050 3051define void @v_shuffle_v2i16_v8i16__1_2(ptr addrspace(1) inreg %ptr) { 3052; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_2: 3053; GFX900: ; %bb.0: 3054; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3055; GFX900-NEXT: ;;#ASMSTART 3056; GFX900-NEXT: ; def v[0:3] 3057; GFX900-NEXT: ;;#ASMEND 3058; GFX900-NEXT: v_mov_b32_e32 v4, 0 3059; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 3060; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3061; GFX900-NEXT: s_waitcnt vmcnt(0) 3062; GFX900-NEXT: s_setpc_b64 s[30:31] 3063; 3064; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_2: 3065; GFX90A: ; %bb.0: 3066; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3067; GFX90A-NEXT: ;;#ASMSTART 3068; GFX90A-NEXT: ; def v[0:3] 3069; GFX90A-NEXT: ;;#ASMEND 3070; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3071; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 3072; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3073; GFX90A-NEXT: s_waitcnt vmcnt(0) 3074; GFX90A-NEXT: s_setpc_b64 s[30:31] 3075; 3076; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_2: 3077; GFX940: ; %bb.0: 3078; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3079; GFX940-NEXT: ;;#ASMSTART 3080; GFX940-NEXT: ; def v[0:3] 3081; GFX940-NEXT: ;;#ASMEND 3082; GFX940-NEXT: v_mov_b32_e32 v4, 0 3083; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 3084; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3085; GFX940-NEXT: s_waitcnt vmcnt(0) 3086; GFX940-NEXT: s_setpc_b64 s[30:31] 3087 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3088 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 2> 3089 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3090 ret void 3091} 3092 3093define void @v_shuffle_v2i16_v8i16__2_2(ptr addrspace(1) inreg %ptr) { 3094; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_2: 3095; GFX900: ; %bb.0: 3096; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3097; GFX900-NEXT: ;;#ASMSTART 3098; GFX900-NEXT: ; def v[0:3] 3099; GFX900-NEXT: ;;#ASMEND 3100; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3101; GFX900-NEXT: v_mov_b32_e32 v4, 0 3102; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 3103; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3104; GFX900-NEXT: s_waitcnt vmcnt(0) 3105; GFX900-NEXT: s_setpc_b64 s[30:31] 3106; 3107; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_2: 3108; GFX90A: ; %bb.0: 3109; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3110; GFX90A-NEXT: ;;#ASMSTART 3111; GFX90A-NEXT: ; def v[0:3] 3112; GFX90A-NEXT: ;;#ASMEND 3113; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3114; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3115; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 3116; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3117; GFX90A-NEXT: s_waitcnt vmcnt(0) 3118; GFX90A-NEXT: s_setpc_b64 s[30:31] 3119; 3120; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_2: 3121; GFX940: ; %bb.0: 3122; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3123; GFX940-NEXT: ;;#ASMSTART 3124; GFX940-NEXT: ; def v[0:3] 3125; GFX940-NEXT: ;;#ASMEND 3126; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3127; GFX940-NEXT: v_mov_b32_e32 v4, 0 3128; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 3129; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3130; GFX940-NEXT: s_waitcnt vmcnt(0) 3131; GFX940-NEXT: s_setpc_b64 s[30:31] 3132 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3133 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 2> 3134 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3135 ret void 3136} 3137 3138define void @v_shuffle_v2i16_v8i16__3_2(ptr addrspace(1) inreg %ptr) { 3139; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_2: 3140; GFX900: ; %bb.0: 3141; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3142; GFX900-NEXT: ;;#ASMSTART 3143; GFX900-NEXT: ; def v[0:3] 3144; GFX900-NEXT: ;;#ASMEND 3145; GFX900-NEXT: v_mov_b32_e32 v4, 0 3146; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 3147; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3148; GFX900-NEXT: s_waitcnt vmcnt(0) 3149; GFX900-NEXT: s_setpc_b64 s[30:31] 3150; 3151; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_2: 3152; GFX90A: ; %bb.0: 3153; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3154; GFX90A-NEXT: ;;#ASMSTART 3155; GFX90A-NEXT: ; def v[0:3] 3156; GFX90A-NEXT: ;;#ASMEND 3157; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3158; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 3159; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3160; GFX90A-NEXT: s_waitcnt vmcnt(0) 3161; GFX90A-NEXT: s_setpc_b64 s[30:31] 3162; 3163; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_2: 3164; GFX940: ; %bb.0: 3165; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3166; GFX940-NEXT: ;;#ASMSTART 3167; GFX940-NEXT: ; def v[0:3] 3168; GFX940-NEXT: ;;#ASMEND 3169; GFX940-NEXT: v_mov_b32_e32 v4, 0 3170; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 3171; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3172; GFX940-NEXT: s_waitcnt vmcnt(0) 3173; GFX940-NEXT: s_setpc_b64 s[30:31] 3174 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3175 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 2> 3176 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3177 ret void 3178} 3179 3180define void @v_shuffle_v2i16_v8i16__4_2(ptr addrspace(1) inreg %ptr) { 3181; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_2: 3182; GFX900: ; %bb.0: 3183; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3184; GFX900-NEXT: ;;#ASMSTART 3185; GFX900-NEXT: ; def v[0:3] 3186; GFX900-NEXT: ;;#ASMEND 3187; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3188; GFX900-NEXT: v_mov_b32_e32 v4, 0 3189; GFX900-NEXT: v_perm_b32 v0, v1, v2, s4 3190; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3191; GFX900-NEXT: s_waitcnt vmcnt(0) 3192; GFX900-NEXT: s_setpc_b64 s[30:31] 3193; 3194; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_2: 3195; GFX90A: ; %bb.0: 3196; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3197; GFX90A-NEXT: ;;#ASMSTART 3198; GFX90A-NEXT: ; def v[0:3] 3199; GFX90A-NEXT: ;;#ASMEND 3200; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3201; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3202; GFX90A-NEXT: v_perm_b32 v0, v1, v2, s4 3203; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3204; GFX90A-NEXT: s_waitcnt vmcnt(0) 3205; GFX90A-NEXT: s_setpc_b64 s[30:31] 3206; 3207; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_2: 3208; GFX940: ; %bb.0: 3209; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3210; GFX940-NEXT: ;;#ASMSTART 3211; GFX940-NEXT: ; def v[0:3] 3212; GFX940-NEXT: ;;#ASMEND 3213; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3214; GFX940-NEXT: v_mov_b32_e32 v4, 0 3215; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 3216; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3217; GFX940-NEXT: s_waitcnt vmcnt(0) 3218; GFX940-NEXT: s_setpc_b64 s[30:31] 3219 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3220 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 2> 3221 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3222 ret void 3223} 3224 3225define void @v_shuffle_v2i16_v8i16__5_2(ptr addrspace(1) inreg %ptr) { 3226; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_2: 3227; GFX900: ; %bb.0: 3228; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3229; GFX900-NEXT: ;;#ASMSTART 3230; GFX900-NEXT: ; def v[0:3] 3231; GFX900-NEXT: ;;#ASMEND 3232; GFX900-NEXT: v_mov_b32_e32 v4, 0 3233; GFX900-NEXT: v_alignbit_b32 v0, v1, v2, 16 3234; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3235; GFX900-NEXT: s_waitcnt vmcnt(0) 3236; GFX900-NEXT: s_setpc_b64 s[30:31] 3237; 3238; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_2: 3239; GFX90A: ; %bb.0: 3240; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3241; GFX90A-NEXT: ;;#ASMSTART 3242; GFX90A-NEXT: ; def v[0:3] 3243; GFX90A-NEXT: ;;#ASMEND 3244; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3245; GFX90A-NEXT: v_alignbit_b32 v0, v1, v2, 16 3246; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3247; GFX90A-NEXT: s_waitcnt vmcnt(0) 3248; GFX90A-NEXT: s_setpc_b64 s[30:31] 3249; 3250; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_2: 3251; GFX940: ; %bb.0: 3252; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3253; GFX940-NEXT: ;;#ASMSTART 3254; GFX940-NEXT: ; def v[0:3] 3255; GFX940-NEXT: ;;#ASMEND 3256; GFX940-NEXT: v_mov_b32_e32 v4, 0 3257; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 3258; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3259; GFX940-NEXT: s_waitcnt vmcnt(0) 3260; GFX940-NEXT: s_setpc_b64 s[30:31] 3261 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3262 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 2> 3263 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3264 ret void 3265} 3266 3267define void @v_shuffle_v2i16_v8i16__6_2(ptr addrspace(1) inreg %ptr) { 3268; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_2: 3269; GFX900: ; %bb.0: 3270; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3271; GFX900-NEXT: ;;#ASMSTART 3272; GFX900-NEXT: ; def v[0:3] 3273; GFX900-NEXT: ;;#ASMEND 3274; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3275; GFX900-NEXT: v_mov_b32_e32 v4, 0 3276; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 3277; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3278; GFX900-NEXT: s_waitcnt vmcnt(0) 3279; GFX900-NEXT: s_setpc_b64 s[30:31] 3280; 3281; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_2: 3282; GFX90A: ; %bb.0: 3283; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3284; GFX90A-NEXT: ;;#ASMSTART 3285; GFX90A-NEXT: ; def v[0:3] 3286; GFX90A-NEXT: ;;#ASMEND 3287; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3288; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3289; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 3290; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3291; GFX90A-NEXT: s_waitcnt vmcnt(0) 3292; GFX90A-NEXT: s_setpc_b64 s[30:31] 3293; 3294; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_2: 3295; GFX940: ; %bb.0: 3296; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3297; GFX940-NEXT: ;;#ASMSTART 3298; GFX940-NEXT: ; def v[0:3] 3299; GFX940-NEXT: ;;#ASMEND 3300; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3301; GFX940-NEXT: v_mov_b32_e32 v4, 0 3302; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 3303; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3304; GFX940-NEXT: s_waitcnt vmcnt(0) 3305; GFX940-NEXT: s_setpc_b64 s[30:31] 3306 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3307 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 2> 3308 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3309 ret void 3310} 3311 3312define void @v_shuffle_v2i16_v8i16__7_2(ptr addrspace(1) inreg %ptr) { 3313; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_2: 3314; GFX900: ; %bb.0: 3315; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3316; GFX900-NEXT: ;;#ASMSTART 3317; GFX900-NEXT: ; def v[0:3] 3318; GFX900-NEXT: ;;#ASMEND 3319; GFX900-NEXT: v_mov_b32_e32 v4, 0 3320; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 3321; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3322; GFX900-NEXT: s_waitcnt vmcnt(0) 3323; GFX900-NEXT: s_setpc_b64 s[30:31] 3324; 3325; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_2: 3326; GFX90A: ; %bb.0: 3327; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3328; GFX90A-NEXT: ;;#ASMSTART 3329; GFX90A-NEXT: ; def v[0:3] 3330; GFX90A-NEXT: ;;#ASMEND 3331; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3332; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 3333; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3334; GFX90A-NEXT: s_waitcnt vmcnt(0) 3335; GFX90A-NEXT: s_setpc_b64 s[30:31] 3336; 3337; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_2: 3338; GFX940: ; %bb.0: 3339; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3340; GFX940-NEXT: ;;#ASMSTART 3341; GFX940-NEXT: ; def v[0:3] 3342; GFX940-NEXT: ;;#ASMEND 3343; GFX940-NEXT: v_mov_b32_e32 v4, 0 3344; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 3345; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3346; GFX940-NEXT: s_waitcnt vmcnt(0) 3347; GFX940-NEXT: s_setpc_b64 s[30:31] 3348 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3349 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 2> 3350 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3351 ret void 3352} 3353 3354define void @v_shuffle_v2i16_v8i16__8_2(ptr addrspace(1) inreg %ptr) { 3355; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_2: 3356; GFX900: ; %bb.0: 3357; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3358; GFX900-NEXT: ;;#ASMSTART 3359; GFX900-NEXT: ; def v[0:3] 3360; GFX900-NEXT: ;;#ASMEND 3361; GFX900-NEXT: v_mov_b32_e32 v4, 0 3362; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 3363; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3364; GFX900-NEXT: s_waitcnt vmcnt(0) 3365; GFX900-NEXT: s_setpc_b64 s[30:31] 3366; 3367; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_2: 3368; GFX90A: ; %bb.0: 3369; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3370; GFX90A-NEXT: ;;#ASMSTART 3371; GFX90A-NEXT: ; def v[0:3] 3372; GFX90A-NEXT: ;;#ASMEND 3373; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3374; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 3375; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3376; GFX90A-NEXT: s_waitcnt vmcnt(0) 3377; GFX90A-NEXT: s_setpc_b64 s[30:31] 3378; 3379; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_2: 3380; GFX940: ; %bb.0: 3381; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3382; GFX940-NEXT: ;;#ASMSTART 3383; GFX940-NEXT: ; def v[0:3] 3384; GFX940-NEXT: ;;#ASMEND 3385; GFX940-NEXT: v_mov_b32_e32 v4, 0 3386; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 3387; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3388; GFX940-NEXT: s_waitcnt vmcnt(0) 3389; GFX940-NEXT: s_setpc_b64 s[30:31] 3390 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3391 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 2> 3392 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3393 ret void 3394} 3395 3396define void @v_shuffle_v2i16_v8i16__9_2(ptr addrspace(1) inreg %ptr) { 3397; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_2: 3398; GFX900: ; %bb.0: 3399; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3400; GFX900-NEXT: ;;#ASMSTART 3401; GFX900-NEXT: ; def v[0:3] 3402; GFX900-NEXT: ;;#ASMEND 3403; GFX900-NEXT: v_mov_b32_e32 v6, 0 3404; GFX900-NEXT: ;;#ASMSTART 3405; GFX900-NEXT: ; def v[2:5] 3406; GFX900-NEXT: ;;#ASMEND 3407; GFX900-NEXT: v_alignbit_b32 v0, v1, v2, 16 3408; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 3409; GFX900-NEXT: s_waitcnt vmcnt(0) 3410; GFX900-NEXT: s_setpc_b64 s[30:31] 3411; 3412; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_2: 3413; GFX90A: ; %bb.0: 3414; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3415; GFX90A-NEXT: ;;#ASMSTART 3416; GFX90A-NEXT: ; def v[0:3] 3417; GFX90A-NEXT: ;;#ASMEND 3418; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3419; GFX90A-NEXT: ;;#ASMSTART 3420; GFX90A-NEXT: ; def v[2:5] 3421; GFX90A-NEXT: ;;#ASMEND 3422; GFX90A-NEXT: v_alignbit_b32 v0, v1, v2, 16 3423; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 3424; GFX90A-NEXT: s_waitcnt vmcnt(0) 3425; GFX90A-NEXT: s_setpc_b64 s[30:31] 3426; 3427; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_2: 3428; GFX940: ; %bb.0: 3429; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3430; GFX940-NEXT: ;;#ASMSTART 3431; GFX940-NEXT: ; def v[0:3] 3432; GFX940-NEXT: ;;#ASMEND 3433; GFX940-NEXT: v_mov_b32_e32 v6, 0 3434; GFX940-NEXT: ;;#ASMSTART 3435; GFX940-NEXT: ; def v[2:5] 3436; GFX940-NEXT: ;;#ASMEND 3437; GFX940-NEXT: s_nop 0 3438; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 3439; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 3440; GFX940-NEXT: s_waitcnt vmcnt(0) 3441; GFX940-NEXT: s_setpc_b64 s[30:31] 3442 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3443 %vec1 = call <8 x i16> asm "; def $0", "=v"() 3444 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 2> 3445 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3446 ret void 3447} 3448 3449define void @v_shuffle_v2i16_v8i16__10_2(ptr addrspace(1) inreg %ptr) { 3450; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_2: 3451; GFX900: ; %bb.0: 3452; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3453; GFX900-NEXT: ;;#ASMSTART 3454; GFX900-NEXT: ; def v[0:3] 3455; GFX900-NEXT: ;;#ASMEND 3456; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3457; GFX900-NEXT: v_mov_b32_e32 v6, 0 3458; GFX900-NEXT: ;;#ASMSTART 3459; GFX900-NEXT: ; def v[2:5] 3460; GFX900-NEXT: ;;#ASMEND 3461; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 3462; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 3463; GFX900-NEXT: s_waitcnt vmcnt(0) 3464; GFX900-NEXT: s_setpc_b64 s[30:31] 3465; 3466; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_2: 3467; GFX90A: ; %bb.0: 3468; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3469; GFX90A-NEXT: ;;#ASMSTART 3470; GFX90A-NEXT: ; def v[0:3] 3471; GFX90A-NEXT: ;;#ASMEND 3472; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3473; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3474; GFX90A-NEXT: ;;#ASMSTART 3475; GFX90A-NEXT: ; def v[2:5] 3476; GFX90A-NEXT: ;;#ASMEND 3477; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 3478; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 3479; GFX90A-NEXT: s_waitcnt vmcnt(0) 3480; GFX90A-NEXT: s_setpc_b64 s[30:31] 3481; 3482; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_2: 3483; GFX940: ; %bb.0: 3484; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3485; GFX940-NEXT: ;;#ASMSTART 3486; GFX940-NEXT: ; def v[0:3] 3487; GFX940-NEXT: ;;#ASMEND 3488; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3489; GFX940-NEXT: v_mov_b32_e32 v6, 0 3490; GFX940-NEXT: ;;#ASMSTART 3491; GFX940-NEXT: ; def v[2:5] 3492; GFX940-NEXT: ;;#ASMEND 3493; GFX940-NEXT: s_nop 0 3494; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 3495; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 3496; GFX940-NEXT: s_waitcnt vmcnt(0) 3497; GFX940-NEXT: s_setpc_b64 s[30:31] 3498 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3499 %vec1 = call <8 x i16> asm "; def $0", "=v"() 3500 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 2> 3501 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3502 ret void 3503} 3504 3505define void @v_shuffle_v2i16_v8i16__11_2(ptr addrspace(1) inreg %ptr) { 3506; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_2: 3507; GFX900: ; %bb.0: 3508; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3509; GFX900-NEXT: ;;#ASMSTART 3510; GFX900-NEXT: ; def v[0:3] 3511; GFX900-NEXT: ;;#ASMEND 3512; GFX900-NEXT: v_mov_b32_e32 v6, 0 3513; GFX900-NEXT: ;;#ASMSTART 3514; GFX900-NEXT: ; def v[2:5] 3515; GFX900-NEXT: ;;#ASMEND 3516; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 3517; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 3518; GFX900-NEXT: s_waitcnt vmcnt(0) 3519; GFX900-NEXT: s_setpc_b64 s[30:31] 3520; 3521; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_2: 3522; GFX90A: ; %bb.0: 3523; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3524; GFX90A-NEXT: ;;#ASMSTART 3525; GFX90A-NEXT: ; def v[0:3] 3526; GFX90A-NEXT: ;;#ASMEND 3527; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3528; GFX90A-NEXT: ;;#ASMSTART 3529; GFX90A-NEXT: ; def v[2:5] 3530; GFX90A-NEXT: ;;#ASMEND 3531; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 3532; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 3533; GFX90A-NEXT: s_waitcnt vmcnt(0) 3534; GFX90A-NEXT: s_setpc_b64 s[30:31] 3535; 3536; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_2: 3537; GFX940: ; %bb.0: 3538; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3539; GFX940-NEXT: ;;#ASMSTART 3540; GFX940-NEXT: ; def v[0:3] 3541; GFX940-NEXT: ;;#ASMEND 3542; GFX940-NEXT: v_mov_b32_e32 v6, 0 3543; GFX940-NEXT: ;;#ASMSTART 3544; GFX940-NEXT: ; def v[2:5] 3545; GFX940-NEXT: ;;#ASMEND 3546; GFX940-NEXT: s_nop 0 3547; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 3548; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 3549; GFX940-NEXT: s_waitcnt vmcnt(0) 3550; GFX940-NEXT: s_setpc_b64 s[30:31] 3551 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3552 %vec1 = call <8 x i16> asm "; def $0", "=v"() 3553 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 2> 3554 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3555 ret void 3556} 3557 3558define void @v_shuffle_v2i16_v8i16__12_2(ptr addrspace(1) inreg %ptr) { 3559; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_2: 3560; GFX900: ; %bb.0: 3561; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3562; GFX900-NEXT: ;;#ASMSTART 3563; GFX900-NEXT: ; def v[0:3] 3564; GFX900-NEXT: ;;#ASMEND 3565; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3566; GFX900-NEXT: v_mov_b32_e32 v6, 0 3567; GFX900-NEXT: ;;#ASMSTART 3568; GFX900-NEXT: ; def v[2:5] 3569; GFX900-NEXT: ;;#ASMEND 3570; GFX900-NEXT: v_perm_b32 v0, v1, v4, s4 3571; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 3572; GFX900-NEXT: s_waitcnt vmcnt(0) 3573; GFX900-NEXT: s_setpc_b64 s[30:31] 3574; 3575; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_2: 3576; GFX90A: ; %bb.0: 3577; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3578; GFX90A-NEXT: ;;#ASMSTART 3579; GFX90A-NEXT: ; def v[0:3] 3580; GFX90A-NEXT: ;;#ASMEND 3581; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3582; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3583; GFX90A-NEXT: ;;#ASMSTART 3584; GFX90A-NEXT: ; def v[2:5] 3585; GFX90A-NEXT: ;;#ASMEND 3586; GFX90A-NEXT: v_perm_b32 v0, v1, v4, s4 3587; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 3588; GFX90A-NEXT: s_waitcnt vmcnt(0) 3589; GFX90A-NEXT: s_setpc_b64 s[30:31] 3590; 3591; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_2: 3592; GFX940: ; %bb.0: 3593; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3594; GFX940-NEXT: ;;#ASMSTART 3595; GFX940-NEXT: ; def v[0:3] 3596; GFX940-NEXT: ;;#ASMEND 3597; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3598; GFX940-NEXT: v_mov_b32_e32 v6, 0 3599; GFX940-NEXT: ;;#ASMSTART 3600; GFX940-NEXT: ; def v[2:5] 3601; GFX940-NEXT: ;;#ASMEND 3602; GFX940-NEXT: s_nop 0 3603; GFX940-NEXT: v_perm_b32 v0, v1, v4, s2 3604; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 3605; GFX940-NEXT: s_waitcnt vmcnt(0) 3606; GFX940-NEXT: s_setpc_b64 s[30:31] 3607 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3608 %vec1 = call <8 x i16> asm "; def $0", "=v"() 3609 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 2> 3610 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3611 ret void 3612} 3613 3614define void @v_shuffle_v2i16_v8i16__13_2(ptr addrspace(1) inreg %ptr) { 3615; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_2: 3616; GFX900: ; %bb.0: 3617; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3618; GFX900-NEXT: ;;#ASMSTART 3619; GFX900-NEXT: ; def v[0:3] 3620; GFX900-NEXT: ;;#ASMEND 3621; GFX900-NEXT: v_mov_b32_e32 v6, 0 3622; GFX900-NEXT: ;;#ASMSTART 3623; GFX900-NEXT: ; def v[2:5] 3624; GFX900-NEXT: ;;#ASMEND 3625; GFX900-NEXT: v_alignbit_b32 v0, v1, v4, 16 3626; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 3627; GFX900-NEXT: s_waitcnt vmcnt(0) 3628; GFX900-NEXT: s_setpc_b64 s[30:31] 3629; 3630; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_2: 3631; GFX90A: ; %bb.0: 3632; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3633; GFX90A-NEXT: ;;#ASMSTART 3634; GFX90A-NEXT: ; def v[0:3] 3635; GFX90A-NEXT: ;;#ASMEND 3636; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3637; GFX90A-NEXT: ;;#ASMSTART 3638; GFX90A-NEXT: ; def v[2:5] 3639; GFX90A-NEXT: ;;#ASMEND 3640; GFX90A-NEXT: v_alignbit_b32 v0, v1, v4, 16 3641; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 3642; GFX90A-NEXT: s_waitcnt vmcnt(0) 3643; GFX90A-NEXT: s_setpc_b64 s[30:31] 3644; 3645; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_2: 3646; GFX940: ; %bb.0: 3647; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3648; GFX940-NEXT: ;;#ASMSTART 3649; GFX940-NEXT: ; def v[0:3] 3650; GFX940-NEXT: ;;#ASMEND 3651; GFX940-NEXT: v_mov_b32_e32 v6, 0 3652; GFX940-NEXT: ;;#ASMSTART 3653; GFX940-NEXT: ; def v[2:5] 3654; GFX940-NEXT: ;;#ASMEND 3655; GFX940-NEXT: s_nop 0 3656; GFX940-NEXT: v_alignbit_b32 v0, v1, v4, 16 3657; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 3658; GFX940-NEXT: s_waitcnt vmcnt(0) 3659; GFX940-NEXT: s_setpc_b64 s[30:31] 3660 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3661 %vec1 = call <8 x i16> asm "; def $0", "=v"() 3662 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 2> 3663 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3664 ret void 3665} 3666 3667define void @v_shuffle_v2i16_v8i16__14_2(ptr addrspace(1) inreg %ptr) { 3668; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_2: 3669; GFX900: ; %bb.0: 3670; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3671; GFX900-NEXT: ;;#ASMSTART 3672; GFX900-NEXT: ; def v[0:3] 3673; GFX900-NEXT: ;;#ASMEND 3674; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3675; GFX900-NEXT: v_mov_b32_e32 v6, 0 3676; GFX900-NEXT: ;;#ASMSTART 3677; GFX900-NEXT: ; def v[2:5] 3678; GFX900-NEXT: ;;#ASMEND 3679; GFX900-NEXT: v_perm_b32 v0, v1, v5, s4 3680; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 3681; GFX900-NEXT: s_waitcnt vmcnt(0) 3682; GFX900-NEXT: s_setpc_b64 s[30:31] 3683; 3684; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_2: 3685; GFX90A: ; %bb.0: 3686; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3687; GFX90A-NEXT: ;;#ASMSTART 3688; GFX90A-NEXT: ; def v[0:3] 3689; GFX90A-NEXT: ;;#ASMEND 3690; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3691; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3692; GFX90A-NEXT: ;;#ASMSTART 3693; GFX90A-NEXT: ; def v[2:5] 3694; GFX90A-NEXT: ;;#ASMEND 3695; GFX90A-NEXT: v_perm_b32 v0, v1, v5, s4 3696; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 3697; GFX90A-NEXT: s_waitcnt vmcnt(0) 3698; GFX90A-NEXT: s_setpc_b64 s[30:31] 3699; 3700; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_2: 3701; GFX940: ; %bb.0: 3702; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3703; GFX940-NEXT: ;;#ASMSTART 3704; GFX940-NEXT: ; def v[0:3] 3705; GFX940-NEXT: ;;#ASMEND 3706; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3707; GFX940-NEXT: v_mov_b32_e32 v6, 0 3708; GFX940-NEXT: ;;#ASMSTART 3709; GFX940-NEXT: ; def v[2:5] 3710; GFX940-NEXT: ;;#ASMEND 3711; GFX940-NEXT: s_nop 0 3712; GFX940-NEXT: v_perm_b32 v0, v1, v5, s2 3713; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 3714; GFX940-NEXT: s_waitcnt vmcnt(0) 3715; GFX940-NEXT: s_setpc_b64 s[30:31] 3716 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3717 %vec1 = call <8 x i16> asm "; def $0", "=v"() 3718 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 2> 3719 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3720 ret void 3721} 3722 3723define void @v_shuffle_v2i16_v8i16__u_3(ptr addrspace(1) inreg %ptr) { 3724; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_3: 3725; GFX900: ; %bb.0: 3726; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3727; GFX900-NEXT: v_mov_b32_e32 v4, 0 3728; GFX900-NEXT: ;;#ASMSTART 3729; GFX900-NEXT: ; def v[0:3] 3730; GFX900-NEXT: ;;#ASMEND 3731; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 3732; GFX900-NEXT: s_waitcnt vmcnt(0) 3733; GFX900-NEXT: s_setpc_b64 s[30:31] 3734; 3735; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_3: 3736; GFX90A: ; %bb.0: 3737; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3738; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3739; GFX90A-NEXT: ;;#ASMSTART 3740; GFX90A-NEXT: ; def v[0:3] 3741; GFX90A-NEXT: ;;#ASMEND 3742; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 3743; GFX90A-NEXT: s_waitcnt vmcnt(0) 3744; GFX90A-NEXT: s_setpc_b64 s[30:31] 3745; 3746; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_3: 3747; GFX940: ; %bb.0: 3748; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3749; GFX940-NEXT: v_mov_b32_e32 v4, 0 3750; GFX940-NEXT: ;;#ASMSTART 3751; GFX940-NEXT: ; def v[0:3] 3752; GFX940-NEXT: ;;#ASMEND 3753; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 3754; GFX940-NEXT: s_waitcnt vmcnt(0) 3755; GFX940-NEXT: s_setpc_b64 s[30:31] 3756 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3757 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 3> 3758 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3759 ret void 3760} 3761 3762define void @v_shuffle_v2i16_v8i16__0_3(ptr addrspace(1) inreg %ptr) { 3763; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_3: 3764; GFX900: ; %bb.0: 3765; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3766; GFX900-NEXT: ;;#ASMSTART 3767; GFX900-NEXT: ; def v[0:3] 3768; GFX900-NEXT: ;;#ASMEND 3769; GFX900-NEXT: s_mov_b32 s4, 0xffff 3770; GFX900-NEXT: v_mov_b32_e32 v4, 0 3771; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 3772; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3773; GFX900-NEXT: s_waitcnt vmcnt(0) 3774; GFX900-NEXT: s_setpc_b64 s[30:31] 3775; 3776; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_3: 3777; GFX90A: ; %bb.0: 3778; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3779; GFX90A-NEXT: ;;#ASMSTART 3780; GFX90A-NEXT: ; def v[0:3] 3781; GFX90A-NEXT: ;;#ASMEND 3782; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3783; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3784; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 3785; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3786; GFX90A-NEXT: s_waitcnt vmcnt(0) 3787; GFX90A-NEXT: s_setpc_b64 s[30:31] 3788; 3789; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_3: 3790; GFX940: ; %bb.0: 3791; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3792; GFX940-NEXT: ;;#ASMSTART 3793; GFX940-NEXT: ; def v[0:3] 3794; GFX940-NEXT: ;;#ASMEND 3795; GFX940-NEXT: s_mov_b32 s2, 0xffff 3796; GFX940-NEXT: v_mov_b32_e32 v4, 0 3797; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 3798; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3799; GFX940-NEXT: s_waitcnt vmcnt(0) 3800; GFX940-NEXT: s_setpc_b64 s[30:31] 3801 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3802 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 3> 3803 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3804 ret void 3805} 3806 3807define void @v_shuffle_v2i16_v8i16__1_3(ptr addrspace(1) inreg %ptr) { 3808; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_3: 3809; GFX900: ; %bb.0: 3810; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3811; GFX900-NEXT: ;;#ASMSTART 3812; GFX900-NEXT: ; def v[0:3] 3813; GFX900-NEXT: ;;#ASMEND 3814; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3815; GFX900-NEXT: v_mov_b32_e32 v4, 0 3816; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 3817; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3818; GFX900-NEXT: s_waitcnt vmcnt(0) 3819; GFX900-NEXT: s_setpc_b64 s[30:31] 3820; 3821; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_3: 3822; GFX90A: ; %bb.0: 3823; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3824; GFX90A-NEXT: ;;#ASMSTART 3825; GFX90A-NEXT: ; def v[0:3] 3826; GFX90A-NEXT: ;;#ASMEND 3827; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3828; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3829; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 3830; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3831; GFX90A-NEXT: s_waitcnt vmcnt(0) 3832; GFX90A-NEXT: s_setpc_b64 s[30:31] 3833; 3834; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_3: 3835; GFX940: ; %bb.0: 3836; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3837; GFX940-NEXT: ;;#ASMSTART 3838; GFX940-NEXT: ; def v[0:3] 3839; GFX940-NEXT: ;;#ASMEND 3840; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3841; GFX940-NEXT: v_mov_b32_e32 v4, 0 3842; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 3843; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3844; GFX940-NEXT: s_waitcnt vmcnt(0) 3845; GFX940-NEXT: s_setpc_b64 s[30:31] 3846 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3847 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 3> 3848 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3849 ret void 3850} 3851 3852define void @v_shuffle_v2i16_v8i16__2_3(ptr addrspace(1) inreg %ptr) { 3853; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_3: 3854; GFX900: ; %bb.0: 3855; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3856; GFX900-NEXT: v_mov_b32_e32 v4, 0 3857; GFX900-NEXT: ;;#ASMSTART 3858; GFX900-NEXT: ; def v[0:3] 3859; GFX900-NEXT: ;;#ASMEND 3860; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 3861; GFX900-NEXT: s_waitcnt vmcnt(0) 3862; GFX900-NEXT: s_setpc_b64 s[30:31] 3863; 3864; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_3: 3865; GFX90A: ; %bb.0: 3866; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3867; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3868; GFX90A-NEXT: ;;#ASMSTART 3869; GFX90A-NEXT: ; def v[0:3] 3870; GFX90A-NEXT: ;;#ASMEND 3871; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 3872; GFX90A-NEXT: s_waitcnt vmcnt(0) 3873; GFX90A-NEXT: s_setpc_b64 s[30:31] 3874; 3875; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_3: 3876; GFX940: ; %bb.0: 3877; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3878; GFX940-NEXT: v_mov_b32_e32 v4, 0 3879; GFX940-NEXT: ;;#ASMSTART 3880; GFX940-NEXT: ; def v[0:3] 3881; GFX940-NEXT: ;;#ASMEND 3882; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 3883; GFX940-NEXT: s_waitcnt vmcnt(0) 3884; GFX940-NEXT: s_setpc_b64 s[30:31] 3885 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3886 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 3> 3887 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3888 ret void 3889} 3890 3891define void @v_shuffle_v2i16_v8i16__3_3(ptr addrspace(1) inreg %ptr) { 3892; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_3: 3893; GFX900: ; %bb.0: 3894; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3895; GFX900-NEXT: ;;#ASMSTART 3896; GFX900-NEXT: ; def v[0:3] 3897; GFX900-NEXT: ;;#ASMEND 3898; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3899; GFX900-NEXT: v_mov_b32_e32 v4, 0 3900; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 3901; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3902; GFX900-NEXT: s_waitcnt vmcnt(0) 3903; GFX900-NEXT: s_setpc_b64 s[30:31] 3904; 3905; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_3: 3906; GFX90A: ; %bb.0: 3907; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3908; GFX90A-NEXT: ;;#ASMSTART 3909; GFX90A-NEXT: ; def v[0:3] 3910; GFX90A-NEXT: ;;#ASMEND 3911; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3912; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3913; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 3914; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3915; GFX90A-NEXT: s_waitcnt vmcnt(0) 3916; GFX90A-NEXT: s_setpc_b64 s[30:31] 3917; 3918; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_3: 3919; GFX940: ; %bb.0: 3920; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3921; GFX940-NEXT: ;;#ASMSTART 3922; GFX940-NEXT: ; def v[0:3] 3923; GFX940-NEXT: ;;#ASMEND 3924; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3925; GFX940-NEXT: v_mov_b32_e32 v4, 0 3926; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 3927; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3928; GFX940-NEXT: s_waitcnt vmcnt(0) 3929; GFX940-NEXT: s_setpc_b64 s[30:31] 3930 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3931 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 3> 3932 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3933 ret void 3934} 3935 3936define void @v_shuffle_v2i16_v8i16__4_3(ptr addrspace(1) inreg %ptr) { 3937; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_3: 3938; GFX900: ; %bb.0: 3939; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3940; GFX900-NEXT: ;;#ASMSTART 3941; GFX900-NEXT: ; def v[0:3] 3942; GFX900-NEXT: ;;#ASMEND 3943; GFX900-NEXT: s_mov_b32 s4, 0xffff 3944; GFX900-NEXT: v_mov_b32_e32 v4, 0 3945; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v1 3946; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3947; GFX900-NEXT: s_waitcnt vmcnt(0) 3948; GFX900-NEXT: s_setpc_b64 s[30:31] 3949; 3950; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_3: 3951; GFX90A: ; %bb.0: 3952; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3953; GFX90A-NEXT: ;;#ASMSTART 3954; GFX90A-NEXT: ; def v[0:3] 3955; GFX90A-NEXT: ;;#ASMEND 3956; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3957; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3958; GFX90A-NEXT: v_bfi_b32 v0, s4, v2, v1 3959; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3960; GFX90A-NEXT: s_waitcnt vmcnt(0) 3961; GFX90A-NEXT: s_setpc_b64 s[30:31] 3962; 3963; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_3: 3964; GFX940: ; %bb.0: 3965; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3966; GFX940-NEXT: ;;#ASMSTART 3967; GFX940-NEXT: ; def v[0:3] 3968; GFX940-NEXT: ;;#ASMEND 3969; GFX940-NEXT: s_mov_b32 s2, 0xffff 3970; GFX940-NEXT: v_mov_b32_e32 v4, 0 3971; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v1 3972; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3973; GFX940-NEXT: s_waitcnt vmcnt(0) 3974; GFX940-NEXT: s_setpc_b64 s[30:31] 3975 %vec0 = call <8 x i16> asm "; def $0", "=v"() 3976 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 3> 3977 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 3978 ret void 3979} 3980 3981define void @v_shuffle_v2i16_v8i16__5_3(ptr addrspace(1) inreg %ptr) { 3982; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_3: 3983; GFX900: ; %bb.0: 3984; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3985; GFX900-NEXT: ;;#ASMSTART 3986; GFX900-NEXT: ; def v[0:3] 3987; GFX900-NEXT: ;;#ASMEND 3988; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3989; GFX900-NEXT: v_mov_b32_e32 v4, 0 3990; GFX900-NEXT: v_perm_b32 v0, v1, v2, s4 3991; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3992; GFX900-NEXT: s_waitcnt vmcnt(0) 3993; GFX900-NEXT: s_setpc_b64 s[30:31] 3994; 3995; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_3: 3996; GFX90A: ; %bb.0: 3997; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3998; GFX90A-NEXT: ;;#ASMSTART 3999; GFX90A-NEXT: ; def v[0:3] 4000; GFX90A-NEXT: ;;#ASMEND 4001; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4002; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4003; GFX90A-NEXT: v_perm_b32 v0, v1, v2, s4 4004; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4005; GFX90A-NEXT: s_waitcnt vmcnt(0) 4006; GFX90A-NEXT: s_setpc_b64 s[30:31] 4007; 4008; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_3: 4009; GFX940: ; %bb.0: 4010; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4011; GFX940-NEXT: ;;#ASMSTART 4012; GFX940-NEXT: ; def v[0:3] 4013; GFX940-NEXT: ;;#ASMEND 4014; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4015; GFX940-NEXT: v_mov_b32_e32 v4, 0 4016; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 4017; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4018; GFX940-NEXT: s_waitcnt vmcnt(0) 4019; GFX940-NEXT: s_setpc_b64 s[30:31] 4020 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4021 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 3> 4022 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4023 ret void 4024} 4025 4026define void @v_shuffle_v2i16_v8i16__6_3(ptr addrspace(1) inreg %ptr) { 4027; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_3: 4028; GFX900: ; %bb.0: 4029; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4030; GFX900-NEXT: ;;#ASMSTART 4031; GFX900-NEXT: ; def v[0:3] 4032; GFX900-NEXT: ;;#ASMEND 4033; GFX900-NEXT: s_mov_b32 s4, 0xffff 4034; GFX900-NEXT: v_mov_b32_e32 v4, 0 4035; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v1 4036; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4037; GFX900-NEXT: s_waitcnt vmcnt(0) 4038; GFX900-NEXT: s_setpc_b64 s[30:31] 4039; 4040; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_3: 4041; GFX90A: ; %bb.0: 4042; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4043; GFX90A-NEXT: ;;#ASMSTART 4044; GFX90A-NEXT: ; def v[0:3] 4045; GFX90A-NEXT: ;;#ASMEND 4046; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4047; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4048; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v1 4049; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4050; GFX90A-NEXT: s_waitcnt vmcnt(0) 4051; GFX90A-NEXT: s_setpc_b64 s[30:31] 4052; 4053; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_3: 4054; GFX940: ; %bb.0: 4055; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4056; GFX940-NEXT: ;;#ASMSTART 4057; GFX940-NEXT: ; def v[0:3] 4058; GFX940-NEXT: ;;#ASMEND 4059; GFX940-NEXT: s_mov_b32 s2, 0xffff 4060; GFX940-NEXT: v_mov_b32_e32 v4, 0 4061; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 4062; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4063; GFX940-NEXT: s_waitcnt vmcnt(0) 4064; GFX940-NEXT: s_setpc_b64 s[30:31] 4065 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4066 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 3> 4067 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4068 ret void 4069} 4070 4071define void @v_shuffle_v2i16_v8i16__7_3(ptr addrspace(1) inreg %ptr) { 4072; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_3: 4073; GFX900: ; %bb.0: 4074; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4075; GFX900-NEXT: ;;#ASMSTART 4076; GFX900-NEXT: ; def v[0:3] 4077; GFX900-NEXT: ;;#ASMEND 4078; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4079; GFX900-NEXT: v_mov_b32_e32 v4, 0 4080; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 4081; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4082; GFX900-NEXT: s_waitcnt vmcnt(0) 4083; GFX900-NEXT: s_setpc_b64 s[30:31] 4084; 4085; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_3: 4086; GFX90A: ; %bb.0: 4087; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4088; GFX90A-NEXT: ;;#ASMSTART 4089; GFX90A-NEXT: ; def v[0:3] 4090; GFX90A-NEXT: ;;#ASMEND 4091; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4092; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4093; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 4094; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4095; GFX90A-NEXT: s_waitcnt vmcnt(0) 4096; GFX90A-NEXT: s_setpc_b64 s[30:31] 4097; 4098; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_3: 4099; GFX940: ; %bb.0: 4100; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4101; GFX940-NEXT: ;;#ASMSTART 4102; GFX940-NEXT: ; def v[0:3] 4103; GFX940-NEXT: ;;#ASMEND 4104; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4105; GFX940-NEXT: v_mov_b32_e32 v4, 0 4106; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 4107; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4108; GFX940-NEXT: s_waitcnt vmcnt(0) 4109; GFX940-NEXT: s_setpc_b64 s[30:31] 4110 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4111 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 3> 4112 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4113 ret void 4114} 4115 4116define void @v_shuffle_v2i16_v8i16__8_3(ptr addrspace(1) inreg %ptr) { 4117; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_3: 4118; GFX900: ; %bb.0: 4119; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4120; GFX900-NEXT: v_mov_b32_e32 v4, 0 4121; GFX900-NEXT: ;;#ASMSTART 4122; GFX900-NEXT: ; def v[0:3] 4123; GFX900-NEXT: ;;#ASMEND 4124; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 4125; GFX900-NEXT: s_waitcnt vmcnt(0) 4126; GFX900-NEXT: s_setpc_b64 s[30:31] 4127; 4128; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_3: 4129; GFX90A: ; %bb.0: 4130; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4131; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4132; GFX90A-NEXT: ;;#ASMSTART 4133; GFX90A-NEXT: ; def v[0:3] 4134; GFX90A-NEXT: ;;#ASMEND 4135; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 4136; GFX90A-NEXT: s_waitcnt vmcnt(0) 4137; GFX90A-NEXT: s_setpc_b64 s[30:31] 4138; 4139; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_3: 4140; GFX940: ; %bb.0: 4141; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4142; GFX940-NEXT: v_mov_b32_e32 v4, 0 4143; GFX940-NEXT: ;;#ASMSTART 4144; GFX940-NEXT: ; def v[0:3] 4145; GFX940-NEXT: ;;#ASMEND 4146; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 4147; GFX940-NEXT: s_waitcnt vmcnt(0) 4148; GFX940-NEXT: s_setpc_b64 s[30:31] 4149 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4150 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 3> 4151 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4152 ret void 4153} 4154 4155define void @v_shuffle_v2i16_v8i16__9_3(ptr addrspace(1) inreg %ptr) { 4156; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_3: 4157; GFX900: ; %bb.0: 4158; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4159; GFX900-NEXT: ;;#ASMSTART 4160; GFX900-NEXT: ; def v[0:3] 4161; GFX900-NEXT: ;;#ASMEND 4162; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4163; GFX900-NEXT: v_mov_b32_e32 v6, 0 4164; GFX900-NEXT: ;;#ASMSTART 4165; GFX900-NEXT: ; def v[2:5] 4166; GFX900-NEXT: ;;#ASMEND 4167; GFX900-NEXT: v_perm_b32 v0, v1, v2, s4 4168; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 4169; GFX900-NEXT: s_waitcnt vmcnt(0) 4170; GFX900-NEXT: s_setpc_b64 s[30:31] 4171; 4172; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_3: 4173; GFX90A: ; %bb.0: 4174; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4175; GFX90A-NEXT: ;;#ASMSTART 4176; GFX90A-NEXT: ; def v[0:3] 4177; GFX90A-NEXT: ;;#ASMEND 4178; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4179; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4180; GFX90A-NEXT: ;;#ASMSTART 4181; GFX90A-NEXT: ; def v[2:5] 4182; GFX90A-NEXT: ;;#ASMEND 4183; GFX90A-NEXT: v_perm_b32 v0, v1, v2, s4 4184; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 4185; GFX90A-NEXT: s_waitcnt vmcnt(0) 4186; GFX90A-NEXT: s_setpc_b64 s[30:31] 4187; 4188; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_3: 4189; GFX940: ; %bb.0: 4190; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4191; GFX940-NEXT: ;;#ASMSTART 4192; GFX940-NEXT: ; def v[0:3] 4193; GFX940-NEXT: ;;#ASMEND 4194; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4195; GFX940-NEXT: v_mov_b32_e32 v6, 0 4196; GFX940-NEXT: ;;#ASMSTART 4197; GFX940-NEXT: ; def v[2:5] 4198; GFX940-NEXT: ;;#ASMEND 4199; GFX940-NEXT: s_nop 0 4200; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 4201; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 4202; GFX940-NEXT: s_waitcnt vmcnt(0) 4203; GFX940-NEXT: s_setpc_b64 s[30:31] 4204 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4205 %vec1 = call <8 x i16> asm "; def $0", "=v"() 4206 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 3> 4207 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4208 ret void 4209} 4210 4211define void @v_shuffle_v2i16_v8i16__10_3(ptr addrspace(1) inreg %ptr) { 4212; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_3: 4213; GFX900: ; %bb.0: 4214; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4215; GFX900-NEXT: ;;#ASMSTART 4216; GFX900-NEXT: ; def v[0:3] 4217; GFX900-NEXT: ;;#ASMEND 4218; GFX900-NEXT: s_mov_b32 s4, 0xffff 4219; GFX900-NEXT: v_mov_b32_e32 v6, 0 4220; GFX900-NEXT: ;;#ASMSTART 4221; GFX900-NEXT: ; def v[2:5] 4222; GFX900-NEXT: ;;#ASMEND 4223; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v1 4224; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 4225; GFX900-NEXT: s_waitcnt vmcnt(0) 4226; GFX900-NEXT: s_setpc_b64 s[30:31] 4227; 4228; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_3: 4229; GFX90A: ; %bb.0: 4230; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4231; GFX90A-NEXT: ;;#ASMSTART 4232; GFX90A-NEXT: ; def v[0:3] 4233; GFX90A-NEXT: ;;#ASMEND 4234; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4235; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4236; GFX90A-NEXT: ;;#ASMSTART 4237; GFX90A-NEXT: ; def v[2:5] 4238; GFX90A-NEXT: ;;#ASMEND 4239; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v1 4240; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 4241; GFX90A-NEXT: s_waitcnt vmcnt(0) 4242; GFX90A-NEXT: s_setpc_b64 s[30:31] 4243; 4244; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_3: 4245; GFX940: ; %bb.0: 4246; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4247; GFX940-NEXT: ;;#ASMSTART 4248; GFX940-NEXT: ; def v[0:3] 4249; GFX940-NEXT: ;;#ASMEND 4250; GFX940-NEXT: s_mov_b32 s2, 0xffff 4251; GFX940-NEXT: v_mov_b32_e32 v6, 0 4252; GFX940-NEXT: ;;#ASMSTART 4253; GFX940-NEXT: ; def v[2:5] 4254; GFX940-NEXT: ;;#ASMEND 4255; GFX940-NEXT: s_nop 0 4256; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 4257; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 4258; GFX940-NEXT: s_waitcnt vmcnt(0) 4259; GFX940-NEXT: s_setpc_b64 s[30:31] 4260 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4261 %vec1 = call <8 x i16> asm "; def $0", "=v"() 4262 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 3> 4263 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4264 ret void 4265} 4266 4267define void @v_shuffle_v2i16_v8i16__11_3(ptr addrspace(1) inreg %ptr) { 4268; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_3: 4269; GFX900: ; %bb.0: 4270; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4271; GFX900-NEXT: ;;#ASMSTART 4272; GFX900-NEXT: ; def v[0:3] 4273; GFX900-NEXT: ;;#ASMEND 4274; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4275; GFX900-NEXT: v_mov_b32_e32 v6, 0 4276; GFX900-NEXT: ;;#ASMSTART 4277; GFX900-NEXT: ; def v[2:5] 4278; GFX900-NEXT: ;;#ASMEND 4279; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 4280; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 4281; GFX900-NEXT: s_waitcnt vmcnt(0) 4282; GFX900-NEXT: s_setpc_b64 s[30:31] 4283; 4284; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_3: 4285; GFX90A: ; %bb.0: 4286; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4287; GFX90A-NEXT: ;;#ASMSTART 4288; GFX90A-NEXT: ; def v[0:3] 4289; GFX90A-NEXT: ;;#ASMEND 4290; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4291; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4292; GFX90A-NEXT: ;;#ASMSTART 4293; GFX90A-NEXT: ; def v[2:5] 4294; GFX90A-NEXT: ;;#ASMEND 4295; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 4296; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 4297; GFX90A-NEXT: s_waitcnt vmcnt(0) 4298; GFX90A-NEXT: s_setpc_b64 s[30:31] 4299; 4300; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_3: 4301; GFX940: ; %bb.0: 4302; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4303; GFX940-NEXT: ;;#ASMSTART 4304; GFX940-NEXT: ; def v[0:3] 4305; GFX940-NEXT: ;;#ASMEND 4306; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4307; GFX940-NEXT: v_mov_b32_e32 v6, 0 4308; GFX940-NEXT: ;;#ASMSTART 4309; GFX940-NEXT: ; def v[2:5] 4310; GFX940-NEXT: ;;#ASMEND 4311; GFX940-NEXT: s_nop 0 4312; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 4313; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 4314; GFX940-NEXT: s_waitcnt vmcnt(0) 4315; GFX940-NEXT: s_setpc_b64 s[30:31] 4316 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4317 %vec1 = call <8 x i16> asm "; def $0", "=v"() 4318 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 3> 4319 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4320 ret void 4321} 4322 4323define void @v_shuffle_v2i16_v8i16__12_3(ptr addrspace(1) inreg %ptr) { 4324; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_3: 4325; GFX900: ; %bb.0: 4326; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4327; GFX900-NEXT: ;;#ASMSTART 4328; GFX900-NEXT: ; def v[0:3] 4329; GFX900-NEXT: ;;#ASMEND 4330; GFX900-NEXT: s_mov_b32 s4, 0xffff 4331; GFX900-NEXT: v_mov_b32_e32 v6, 0 4332; GFX900-NEXT: ;;#ASMSTART 4333; GFX900-NEXT: ; def v[2:5] 4334; GFX900-NEXT: ;;#ASMEND 4335; GFX900-NEXT: v_bfi_b32 v0, s4, v4, v1 4336; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 4337; GFX900-NEXT: s_waitcnt vmcnt(0) 4338; GFX900-NEXT: s_setpc_b64 s[30:31] 4339; 4340; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_3: 4341; GFX90A: ; %bb.0: 4342; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4343; GFX90A-NEXT: ;;#ASMSTART 4344; GFX90A-NEXT: ; def v[0:3] 4345; GFX90A-NEXT: ;;#ASMEND 4346; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4347; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4348; GFX90A-NEXT: ;;#ASMSTART 4349; GFX90A-NEXT: ; def v[2:5] 4350; GFX90A-NEXT: ;;#ASMEND 4351; GFX90A-NEXT: v_bfi_b32 v0, s4, v4, v1 4352; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 4353; GFX90A-NEXT: s_waitcnt vmcnt(0) 4354; GFX90A-NEXT: s_setpc_b64 s[30:31] 4355; 4356; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_3: 4357; GFX940: ; %bb.0: 4358; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4359; GFX940-NEXT: ;;#ASMSTART 4360; GFX940-NEXT: ; def v[0:3] 4361; GFX940-NEXT: ;;#ASMEND 4362; GFX940-NEXT: s_mov_b32 s2, 0xffff 4363; GFX940-NEXT: v_mov_b32_e32 v6, 0 4364; GFX940-NEXT: ;;#ASMSTART 4365; GFX940-NEXT: ; def v[2:5] 4366; GFX940-NEXT: ;;#ASMEND 4367; GFX940-NEXT: s_nop 0 4368; GFX940-NEXT: v_bfi_b32 v0, s2, v4, v1 4369; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 4370; GFX940-NEXT: s_waitcnt vmcnt(0) 4371; GFX940-NEXT: s_setpc_b64 s[30:31] 4372 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4373 %vec1 = call <8 x i16> asm "; def $0", "=v"() 4374 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 3> 4375 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4376 ret void 4377} 4378 4379define void @v_shuffle_v2i16_v8i16__13_3(ptr addrspace(1) inreg %ptr) { 4380; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_3: 4381; GFX900: ; %bb.0: 4382; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4383; GFX900-NEXT: ;;#ASMSTART 4384; GFX900-NEXT: ; def v[0:3] 4385; GFX900-NEXT: ;;#ASMEND 4386; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4387; GFX900-NEXT: v_mov_b32_e32 v6, 0 4388; GFX900-NEXT: ;;#ASMSTART 4389; GFX900-NEXT: ; def v[2:5] 4390; GFX900-NEXT: ;;#ASMEND 4391; GFX900-NEXT: v_perm_b32 v0, v1, v4, s4 4392; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 4393; GFX900-NEXT: s_waitcnt vmcnt(0) 4394; GFX900-NEXT: s_setpc_b64 s[30:31] 4395; 4396; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_3: 4397; GFX90A: ; %bb.0: 4398; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4399; GFX90A-NEXT: ;;#ASMSTART 4400; GFX90A-NEXT: ; def v[0:3] 4401; GFX90A-NEXT: ;;#ASMEND 4402; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4403; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4404; GFX90A-NEXT: ;;#ASMSTART 4405; GFX90A-NEXT: ; def v[2:5] 4406; GFX90A-NEXT: ;;#ASMEND 4407; GFX90A-NEXT: v_perm_b32 v0, v1, v4, s4 4408; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 4409; GFX90A-NEXT: s_waitcnt vmcnt(0) 4410; GFX90A-NEXT: s_setpc_b64 s[30:31] 4411; 4412; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_3: 4413; GFX940: ; %bb.0: 4414; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4415; GFX940-NEXT: ;;#ASMSTART 4416; GFX940-NEXT: ; def v[0:3] 4417; GFX940-NEXT: ;;#ASMEND 4418; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4419; GFX940-NEXT: v_mov_b32_e32 v6, 0 4420; GFX940-NEXT: ;;#ASMSTART 4421; GFX940-NEXT: ; def v[2:5] 4422; GFX940-NEXT: ;;#ASMEND 4423; GFX940-NEXT: s_nop 0 4424; GFX940-NEXT: v_perm_b32 v0, v1, v4, s2 4425; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 4426; GFX940-NEXT: s_waitcnt vmcnt(0) 4427; GFX940-NEXT: s_setpc_b64 s[30:31] 4428 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4429 %vec1 = call <8 x i16> asm "; def $0", "=v"() 4430 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 3> 4431 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4432 ret void 4433} 4434 4435define void @v_shuffle_v2i16_v8i16__14_3(ptr addrspace(1) inreg %ptr) { 4436; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_3: 4437; GFX900: ; %bb.0: 4438; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4439; GFX900-NEXT: ;;#ASMSTART 4440; GFX900-NEXT: ; def v[0:3] 4441; GFX900-NEXT: ;;#ASMEND 4442; GFX900-NEXT: s_mov_b32 s4, 0xffff 4443; GFX900-NEXT: v_mov_b32_e32 v6, 0 4444; GFX900-NEXT: ;;#ASMSTART 4445; GFX900-NEXT: ; def v[2:5] 4446; GFX900-NEXT: ;;#ASMEND 4447; GFX900-NEXT: v_bfi_b32 v0, s4, v5, v1 4448; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 4449; GFX900-NEXT: s_waitcnt vmcnt(0) 4450; GFX900-NEXT: s_setpc_b64 s[30:31] 4451; 4452; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_3: 4453; GFX90A: ; %bb.0: 4454; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4455; GFX90A-NEXT: ;;#ASMSTART 4456; GFX90A-NEXT: ; def v[0:3] 4457; GFX90A-NEXT: ;;#ASMEND 4458; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4459; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4460; GFX90A-NEXT: ;;#ASMSTART 4461; GFX90A-NEXT: ; def v[2:5] 4462; GFX90A-NEXT: ;;#ASMEND 4463; GFX90A-NEXT: v_bfi_b32 v0, s4, v5, v1 4464; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 4465; GFX90A-NEXT: s_waitcnt vmcnt(0) 4466; GFX90A-NEXT: s_setpc_b64 s[30:31] 4467; 4468; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_3: 4469; GFX940: ; %bb.0: 4470; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4471; GFX940-NEXT: ;;#ASMSTART 4472; GFX940-NEXT: ; def v[0:3] 4473; GFX940-NEXT: ;;#ASMEND 4474; GFX940-NEXT: s_mov_b32 s2, 0xffff 4475; GFX940-NEXT: v_mov_b32_e32 v6, 0 4476; GFX940-NEXT: ;;#ASMSTART 4477; GFX940-NEXT: ; def v[2:5] 4478; GFX940-NEXT: ;;#ASMEND 4479; GFX940-NEXT: s_nop 0 4480; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v1 4481; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 4482; GFX940-NEXT: s_waitcnt vmcnt(0) 4483; GFX940-NEXT: s_setpc_b64 s[30:31] 4484 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4485 %vec1 = call <8 x i16> asm "; def $0", "=v"() 4486 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 3> 4487 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4488 ret void 4489} 4490 4491define void @v_shuffle_v2i16_v8i16__u_4(ptr addrspace(1) inreg %ptr) { 4492; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_4: 4493; GFX900: ; %bb.0: 4494; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4495; GFX900-NEXT: ;;#ASMSTART 4496; GFX900-NEXT: ; def v[0:3] 4497; GFX900-NEXT: ;;#ASMEND 4498; GFX900-NEXT: v_mov_b32_e32 v4, 0 4499; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v2 4500; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4501; GFX900-NEXT: s_waitcnt vmcnt(0) 4502; GFX900-NEXT: s_setpc_b64 s[30:31] 4503; 4504; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_4: 4505; GFX90A: ; %bb.0: 4506; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4507; GFX90A-NEXT: ;;#ASMSTART 4508; GFX90A-NEXT: ; def v[0:3] 4509; GFX90A-NEXT: ;;#ASMEND 4510; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4511; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v2 4512; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4513; GFX90A-NEXT: s_waitcnt vmcnt(0) 4514; GFX90A-NEXT: s_setpc_b64 s[30:31] 4515; 4516; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_4: 4517; GFX940: ; %bb.0: 4518; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4519; GFX940-NEXT: ;;#ASMSTART 4520; GFX940-NEXT: ; def v[0:3] 4521; GFX940-NEXT: ;;#ASMEND 4522; GFX940-NEXT: v_mov_b32_e32 v4, 0 4523; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 4524; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4525; GFX940-NEXT: s_waitcnt vmcnt(0) 4526; GFX940-NEXT: s_setpc_b64 s[30:31] 4527 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4528 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 4> 4529 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4530 ret void 4531} 4532 4533define void @v_shuffle_v2i16_v8i16__0_4(ptr addrspace(1) inreg %ptr) { 4534; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_4: 4535; GFX900: ; %bb.0: 4536; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4537; GFX900-NEXT: ;;#ASMSTART 4538; GFX900-NEXT: ; def v[0:3] 4539; GFX900-NEXT: ;;#ASMEND 4540; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4541; GFX900-NEXT: v_mov_b32_e32 v4, 0 4542; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 4543; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4544; GFX900-NEXT: s_waitcnt vmcnt(0) 4545; GFX900-NEXT: s_setpc_b64 s[30:31] 4546; 4547; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_4: 4548; GFX90A: ; %bb.0: 4549; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4550; GFX90A-NEXT: ;;#ASMSTART 4551; GFX90A-NEXT: ; def v[0:3] 4552; GFX90A-NEXT: ;;#ASMEND 4553; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4554; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4555; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 4556; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4557; GFX90A-NEXT: s_waitcnt vmcnt(0) 4558; GFX90A-NEXT: s_setpc_b64 s[30:31] 4559; 4560; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_4: 4561; GFX940: ; %bb.0: 4562; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4563; GFX940-NEXT: ;;#ASMSTART 4564; GFX940-NEXT: ; def v[0:3] 4565; GFX940-NEXT: ;;#ASMEND 4566; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4567; GFX940-NEXT: v_mov_b32_e32 v4, 0 4568; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 4569; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4570; GFX940-NEXT: s_waitcnt vmcnt(0) 4571; GFX940-NEXT: s_setpc_b64 s[30:31] 4572 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4573 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 4> 4574 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4575 ret void 4576} 4577 4578define void @v_shuffle_v2i16_v8i16__1_4(ptr addrspace(1) inreg %ptr) { 4579; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_4: 4580; GFX900: ; %bb.0: 4581; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4582; GFX900-NEXT: ;;#ASMSTART 4583; GFX900-NEXT: ; def v[0:3] 4584; GFX900-NEXT: ;;#ASMEND 4585; GFX900-NEXT: v_mov_b32_e32 v4, 0 4586; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16 4587; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4588; GFX900-NEXT: s_waitcnt vmcnt(0) 4589; GFX900-NEXT: s_setpc_b64 s[30:31] 4590; 4591; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_4: 4592; GFX90A: ; %bb.0: 4593; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4594; GFX90A-NEXT: ;;#ASMSTART 4595; GFX90A-NEXT: ; def v[0:3] 4596; GFX90A-NEXT: ;;#ASMEND 4597; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4598; GFX90A-NEXT: v_alignbit_b32 v0, v2, v0, 16 4599; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4600; GFX90A-NEXT: s_waitcnt vmcnt(0) 4601; GFX90A-NEXT: s_setpc_b64 s[30:31] 4602; 4603; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_4: 4604; GFX940: ; %bb.0: 4605; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4606; GFX940-NEXT: ;;#ASMSTART 4607; GFX940-NEXT: ; def v[0:3] 4608; GFX940-NEXT: ;;#ASMEND 4609; GFX940-NEXT: v_mov_b32_e32 v4, 0 4610; GFX940-NEXT: v_alignbit_b32 v0, v2, v0, 16 4611; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4612; GFX940-NEXT: s_waitcnt vmcnt(0) 4613; GFX940-NEXT: s_setpc_b64 s[30:31] 4614 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4615 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 4> 4616 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4617 ret void 4618} 4619 4620define void @v_shuffle_v2i16_v8i16__2_4(ptr addrspace(1) inreg %ptr) { 4621; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_4: 4622; GFX900: ; %bb.0: 4623; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4624; GFX900-NEXT: ;;#ASMSTART 4625; GFX900-NEXT: ; def v[0:3] 4626; GFX900-NEXT: ;;#ASMEND 4627; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4628; GFX900-NEXT: v_mov_b32_e32 v4, 0 4629; GFX900-NEXT: v_perm_b32 v0, v2, v1, s4 4630; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4631; GFX900-NEXT: s_waitcnt vmcnt(0) 4632; GFX900-NEXT: s_setpc_b64 s[30:31] 4633; 4634; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_4: 4635; GFX90A: ; %bb.0: 4636; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4637; GFX90A-NEXT: ;;#ASMSTART 4638; GFX90A-NEXT: ; def v[0:3] 4639; GFX90A-NEXT: ;;#ASMEND 4640; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4641; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4642; GFX90A-NEXT: v_perm_b32 v0, v2, v1, s4 4643; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4644; GFX90A-NEXT: s_waitcnt vmcnt(0) 4645; GFX90A-NEXT: s_setpc_b64 s[30:31] 4646; 4647; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_4: 4648; GFX940: ; %bb.0: 4649; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4650; GFX940-NEXT: ;;#ASMSTART 4651; GFX940-NEXT: ; def v[0:3] 4652; GFX940-NEXT: ;;#ASMEND 4653; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4654; GFX940-NEXT: v_mov_b32_e32 v4, 0 4655; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 4656; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4657; GFX940-NEXT: s_waitcnt vmcnt(0) 4658; GFX940-NEXT: s_setpc_b64 s[30:31] 4659 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4660 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 4> 4661 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4662 ret void 4663} 4664 4665define void @v_shuffle_v2i16_v8i16__3_4(ptr addrspace(1) inreg %ptr) { 4666; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_4: 4667; GFX900: ; %bb.0: 4668; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4669; GFX900-NEXT: ;;#ASMSTART 4670; GFX900-NEXT: ; def v[0:3] 4671; GFX900-NEXT: ;;#ASMEND 4672; GFX900-NEXT: v_mov_b32_e32 v4, 0 4673; GFX900-NEXT: v_alignbit_b32 v0, v2, v1, 16 4674; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4675; GFX900-NEXT: s_waitcnt vmcnt(0) 4676; GFX900-NEXT: s_setpc_b64 s[30:31] 4677; 4678; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_4: 4679; GFX90A: ; %bb.0: 4680; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4681; GFX90A-NEXT: ;;#ASMSTART 4682; GFX90A-NEXT: ; def v[0:3] 4683; GFX90A-NEXT: ;;#ASMEND 4684; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4685; GFX90A-NEXT: v_alignbit_b32 v0, v2, v1, 16 4686; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4687; GFX90A-NEXT: s_waitcnt vmcnt(0) 4688; GFX90A-NEXT: s_setpc_b64 s[30:31] 4689; 4690; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_4: 4691; GFX940: ; %bb.0: 4692; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4693; GFX940-NEXT: ;;#ASMSTART 4694; GFX940-NEXT: ; def v[0:3] 4695; GFX940-NEXT: ;;#ASMEND 4696; GFX940-NEXT: v_mov_b32_e32 v4, 0 4697; GFX940-NEXT: v_alignbit_b32 v0, v2, v1, 16 4698; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4699; GFX940-NEXT: s_waitcnt vmcnt(0) 4700; GFX940-NEXT: s_setpc_b64 s[30:31] 4701 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4702 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 4> 4703 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4704 ret void 4705} 4706 4707define void @v_shuffle_v2i16_v8i16__4_4(ptr addrspace(1) inreg %ptr) { 4708; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_4: 4709; GFX900: ; %bb.0: 4710; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4711; GFX900-NEXT: ;;#ASMSTART 4712; GFX900-NEXT: ; def v[0:3] 4713; GFX900-NEXT: ;;#ASMEND 4714; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4715; GFX900-NEXT: v_mov_b32_e32 v4, 0 4716; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 4717; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4718; GFX900-NEXT: s_waitcnt vmcnt(0) 4719; GFX900-NEXT: s_setpc_b64 s[30:31] 4720; 4721; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_4: 4722; GFX90A: ; %bb.0: 4723; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4724; GFX90A-NEXT: ;;#ASMSTART 4725; GFX90A-NEXT: ; def v[0:3] 4726; GFX90A-NEXT: ;;#ASMEND 4727; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4728; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4729; GFX90A-NEXT: v_perm_b32 v0, v2, v2, s4 4730; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4731; GFX90A-NEXT: s_waitcnt vmcnt(0) 4732; GFX90A-NEXT: s_setpc_b64 s[30:31] 4733; 4734; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_4: 4735; GFX940: ; %bb.0: 4736; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4737; GFX940-NEXT: ;;#ASMSTART 4738; GFX940-NEXT: ; def v[0:3] 4739; GFX940-NEXT: ;;#ASMEND 4740; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4741; GFX940-NEXT: v_mov_b32_e32 v4, 0 4742; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 4743; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4744; GFX940-NEXT: s_waitcnt vmcnt(0) 4745; GFX940-NEXT: s_setpc_b64 s[30:31] 4746 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4747 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 4> 4748 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4749 ret void 4750} 4751 4752define void @v_shuffle_v2i16_v8i16__5_4(ptr addrspace(1) inreg %ptr) { 4753; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_4: 4754; GFX900: ; %bb.0: 4755; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4756; GFX900-NEXT: ;;#ASMSTART 4757; GFX900-NEXT: ; def v[0:3] 4758; GFX900-NEXT: ;;#ASMEND 4759; GFX900-NEXT: v_mov_b32_e32 v4, 0 4760; GFX900-NEXT: v_alignbit_b32 v0, v2, v2, 16 4761; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4762; GFX900-NEXT: s_waitcnt vmcnt(0) 4763; GFX900-NEXT: s_setpc_b64 s[30:31] 4764; 4765; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_4: 4766; GFX90A: ; %bb.0: 4767; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4768; GFX90A-NEXT: ;;#ASMSTART 4769; GFX90A-NEXT: ; def v[0:3] 4770; GFX90A-NEXT: ;;#ASMEND 4771; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4772; GFX90A-NEXT: v_alignbit_b32 v0, v2, v2, 16 4773; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4774; GFX90A-NEXT: s_waitcnt vmcnt(0) 4775; GFX90A-NEXT: s_setpc_b64 s[30:31] 4776; 4777; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_4: 4778; GFX940: ; %bb.0: 4779; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4780; GFX940-NEXT: ;;#ASMSTART 4781; GFX940-NEXT: ; def v[0:3] 4782; GFX940-NEXT: ;;#ASMEND 4783; GFX940-NEXT: v_mov_b32_e32 v4, 0 4784; GFX940-NEXT: v_alignbit_b32 v0, v2, v2, 16 4785; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4786; GFX940-NEXT: s_waitcnt vmcnt(0) 4787; GFX940-NEXT: s_setpc_b64 s[30:31] 4788 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4789 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 4> 4790 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4791 ret void 4792} 4793 4794define void @v_shuffle_v2i16_v8i16__6_4(ptr addrspace(1) inreg %ptr) { 4795; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_4: 4796; GFX900: ; %bb.0: 4797; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4798; GFX900-NEXT: ;;#ASMSTART 4799; GFX900-NEXT: ; def v[0:3] 4800; GFX900-NEXT: ;;#ASMEND 4801; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4802; GFX900-NEXT: v_mov_b32_e32 v4, 0 4803; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 4804; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4805; GFX900-NEXT: s_waitcnt vmcnt(0) 4806; GFX900-NEXT: s_setpc_b64 s[30:31] 4807; 4808; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_4: 4809; GFX90A: ; %bb.0: 4810; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4811; GFX90A-NEXT: ;;#ASMSTART 4812; GFX90A-NEXT: ; def v[0:3] 4813; GFX90A-NEXT: ;;#ASMEND 4814; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4815; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4816; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 4817; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4818; GFX90A-NEXT: s_waitcnt vmcnt(0) 4819; GFX90A-NEXT: s_setpc_b64 s[30:31] 4820; 4821; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_4: 4822; GFX940: ; %bb.0: 4823; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4824; GFX940-NEXT: ;;#ASMSTART 4825; GFX940-NEXT: ; def v[0:3] 4826; GFX940-NEXT: ;;#ASMEND 4827; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4828; GFX940-NEXT: v_mov_b32_e32 v4, 0 4829; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 4830; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4831; GFX940-NEXT: s_waitcnt vmcnt(0) 4832; GFX940-NEXT: s_setpc_b64 s[30:31] 4833 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4834 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 4> 4835 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4836 ret void 4837} 4838 4839define void @v_shuffle_v2i16_v8i16__7_4(ptr addrspace(1) inreg %ptr) { 4840; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_4: 4841; GFX900: ; %bb.0: 4842; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4843; GFX900-NEXT: ;;#ASMSTART 4844; GFX900-NEXT: ; def v[0:3] 4845; GFX900-NEXT: ;;#ASMEND 4846; GFX900-NEXT: v_mov_b32_e32 v4, 0 4847; GFX900-NEXT: v_alignbit_b32 v0, v2, v3, 16 4848; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4849; GFX900-NEXT: s_waitcnt vmcnt(0) 4850; GFX900-NEXT: s_setpc_b64 s[30:31] 4851; 4852; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_4: 4853; GFX90A: ; %bb.0: 4854; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4855; GFX90A-NEXT: ;;#ASMSTART 4856; GFX90A-NEXT: ; def v[0:3] 4857; GFX90A-NEXT: ;;#ASMEND 4858; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4859; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16 4860; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4861; GFX90A-NEXT: s_waitcnt vmcnt(0) 4862; GFX90A-NEXT: s_setpc_b64 s[30:31] 4863; 4864; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_4: 4865; GFX940: ; %bb.0: 4866; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4867; GFX940-NEXT: ;;#ASMSTART 4868; GFX940-NEXT: ; def v[0:3] 4869; GFX940-NEXT: ;;#ASMEND 4870; GFX940-NEXT: v_mov_b32_e32 v4, 0 4871; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 4872; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4873; GFX940-NEXT: s_waitcnt vmcnt(0) 4874; GFX940-NEXT: s_setpc_b64 s[30:31] 4875 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4876 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 4> 4877 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4878 ret void 4879} 4880 4881define void @v_shuffle_v2i16_v8i16__8_4(ptr addrspace(1) inreg %ptr) { 4882; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_4: 4883; GFX900: ; %bb.0: 4884; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4885; GFX900-NEXT: ;;#ASMSTART 4886; GFX900-NEXT: ; def v[0:3] 4887; GFX900-NEXT: ;;#ASMEND 4888; GFX900-NEXT: v_mov_b32_e32 v4, 0 4889; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v2 4890; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4891; GFX900-NEXT: s_waitcnt vmcnt(0) 4892; GFX900-NEXT: s_setpc_b64 s[30:31] 4893; 4894; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_4: 4895; GFX90A: ; %bb.0: 4896; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4897; GFX90A-NEXT: ;;#ASMSTART 4898; GFX90A-NEXT: ; def v[0:3] 4899; GFX90A-NEXT: ;;#ASMEND 4900; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4901; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v2 4902; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4903; GFX90A-NEXT: s_waitcnt vmcnt(0) 4904; GFX90A-NEXT: s_setpc_b64 s[30:31] 4905; 4906; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_4: 4907; GFX940: ; %bb.0: 4908; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4909; GFX940-NEXT: ;;#ASMSTART 4910; GFX940-NEXT: ; def v[0:3] 4911; GFX940-NEXT: ;;#ASMEND 4912; GFX940-NEXT: v_mov_b32_e32 v4, 0 4913; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 4914; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4915; GFX940-NEXT: s_waitcnt vmcnt(0) 4916; GFX940-NEXT: s_setpc_b64 s[30:31] 4917 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4918 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 4> 4919 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4920 ret void 4921} 4922 4923define void @v_shuffle_v2i16_v8i16__9_4(ptr addrspace(1) inreg %ptr) { 4924; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_4: 4925; GFX900: ; %bb.0: 4926; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4927; GFX900-NEXT: ;;#ASMSTART 4928; GFX900-NEXT: ; def v[0:3] 4929; GFX900-NEXT: ;;#ASMEND 4930; GFX900-NEXT: v_mov_b32_e32 v7, 0 4931; GFX900-NEXT: ;;#ASMSTART 4932; GFX900-NEXT: ; def v[3:6] 4933; GFX900-NEXT: ;;#ASMEND 4934; GFX900-NEXT: v_alignbit_b32 v0, v2, v3, 16 4935; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 4936; GFX900-NEXT: s_waitcnt vmcnt(0) 4937; GFX900-NEXT: s_setpc_b64 s[30:31] 4938; 4939; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_4: 4940; GFX90A: ; %bb.0: 4941; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4942; GFX90A-NEXT: ;;#ASMSTART 4943; GFX90A-NEXT: ; def v[0:3] 4944; GFX90A-NEXT: ;;#ASMEND 4945; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4946; GFX90A-NEXT: ;;#ASMSTART 4947; GFX90A-NEXT: ; def v[4:7] 4948; GFX90A-NEXT: ;;#ASMEND 4949; GFX90A-NEXT: v_alignbit_b32 v0, v2, v4, 16 4950; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 4951; GFX90A-NEXT: s_waitcnt vmcnt(0) 4952; GFX90A-NEXT: s_setpc_b64 s[30:31] 4953; 4954; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_4: 4955; GFX940: ; %bb.0: 4956; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4957; GFX940-NEXT: ;;#ASMSTART 4958; GFX940-NEXT: ; def v[0:3] 4959; GFX940-NEXT: ;;#ASMEND 4960; GFX940-NEXT: v_mov_b32_e32 v8, 0 4961; GFX940-NEXT: ;;#ASMSTART 4962; GFX940-NEXT: ; def v[4:7] 4963; GFX940-NEXT: ;;#ASMEND 4964; GFX940-NEXT: s_nop 0 4965; GFX940-NEXT: v_alignbit_b32 v0, v2, v4, 16 4966; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 4967; GFX940-NEXT: s_waitcnt vmcnt(0) 4968; GFX940-NEXT: s_setpc_b64 s[30:31] 4969 %vec0 = call <8 x i16> asm "; def $0", "=v"() 4970 %vec1 = call <8 x i16> asm "; def $0", "=v"() 4971 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 4> 4972 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 4973 ret void 4974} 4975 4976define void @v_shuffle_v2i16_v8i16__10_4(ptr addrspace(1) inreg %ptr) { 4977; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_4: 4978; GFX900: ; %bb.0: 4979; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4980; GFX900-NEXT: ;;#ASMSTART 4981; GFX900-NEXT: ; def v[0:3] 4982; GFX900-NEXT: ;;#ASMEND 4983; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4984; GFX900-NEXT: v_mov_b32_e32 v7, 0 4985; GFX900-NEXT: ;;#ASMSTART 4986; GFX900-NEXT: ; def v[3:6] 4987; GFX900-NEXT: ;;#ASMEND 4988; GFX900-NEXT: v_perm_b32 v0, v2, v4, s4 4989; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 4990; GFX900-NEXT: s_waitcnt vmcnt(0) 4991; GFX900-NEXT: s_setpc_b64 s[30:31] 4992; 4993; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_4: 4994; GFX90A: ; %bb.0: 4995; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4996; GFX90A-NEXT: ;;#ASMSTART 4997; GFX90A-NEXT: ; def v[0:3] 4998; GFX90A-NEXT: ;;#ASMEND 4999; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5000; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5001; GFX90A-NEXT: ;;#ASMSTART 5002; GFX90A-NEXT: ; def v[4:7] 5003; GFX90A-NEXT: ;;#ASMEND 5004; GFX90A-NEXT: v_perm_b32 v0, v2, v5, s4 5005; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 5006; GFX90A-NEXT: s_waitcnt vmcnt(0) 5007; GFX90A-NEXT: s_setpc_b64 s[30:31] 5008; 5009; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_4: 5010; GFX940: ; %bb.0: 5011; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5012; GFX940-NEXT: ;;#ASMSTART 5013; GFX940-NEXT: ; def v[0:3] 5014; GFX940-NEXT: ;;#ASMEND 5015; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5016; GFX940-NEXT: v_mov_b32_e32 v8, 0 5017; GFX940-NEXT: ;;#ASMSTART 5018; GFX940-NEXT: ; def v[4:7] 5019; GFX940-NEXT: ;;#ASMEND 5020; GFX940-NEXT: s_nop 0 5021; GFX940-NEXT: v_perm_b32 v0, v2, v5, s2 5022; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 5023; GFX940-NEXT: s_waitcnt vmcnt(0) 5024; GFX940-NEXT: s_setpc_b64 s[30:31] 5025 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5026 %vec1 = call <8 x i16> asm "; def $0", "=v"() 5027 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 4> 5028 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5029 ret void 5030} 5031 5032define void @v_shuffle_v2i16_v8i16__11_4(ptr addrspace(1) inreg %ptr) { 5033; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_4: 5034; GFX900: ; %bb.0: 5035; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5036; GFX900-NEXT: ;;#ASMSTART 5037; GFX900-NEXT: ; def v[0:3] 5038; GFX900-NEXT: ;;#ASMEND 5039; GFX900-NEXT: v_mov_b32_e32 v7, 0 5040; GFX900-NEXT: ;;#ASMSTART 5041; GFX900-NEXT: ; def v[3:6] 5042; GFX900-NEXT: ;;#ASMEND 5043; GFX900-NEXT: v_alignbit_b32 v0, v2, v4, 16 5044; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 5045; GFX900-NEXT: s_waitcnt vmcnt(0) 5046; GFX900-NEXT: s_setpc_b64 s[30:31] 5047; 5048; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_4: 5049; GFX90A: ; %bb.0: 5050; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5051; GFX90A-NEXT: ;;#ASMSTART 5052; GFX90A-NEXT: ; def v[0:3] 5053; GFX90A-NEXT: ;;#ASMEND 5054; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5055; GFX90A-NEXT: ;;#ASMSTART 5056; GFX90A-NEXT: ; def v[4:7] 5057; GFX90A-NEXT: ;;#ASMEND 5058; GFX90A-NEXT: v_alignbit_b32 v0, v2, v5, 16 5059; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 5060; GFX90A-NEXT: s_waitcnt vmcnt(0) 5061; GFX90A-NEXT: s_setpc_b64 s[30:31] 5062; 5063; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_4: 5064; GFX940: ; %bb.0: 5065; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5066; GFX940-NEXT: ;;#ASMSTART 5067; GFX940-NEXT: ; def v[0:3] 5068; GFX940-NEXT: ;;#ASMEND 5069; GFX940-NEXT: v_mov_b32_e32 v8, 0 5070; GFX940-NEXT: ;;#ASMSTART 5071; GFX940-NEXT: ; def v[4:7] 5072; GFX940-NEXT: ;;#ASMEND 5073; GFX940-NEXT: s_nop 0 5074; GFX940-NEXT: v_alignbit_b32 v0, v2, v5, 16 5075; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 5076; GFX940-NEXT: s_waitcnt vmcnt(0) 5077; GFX940-NEXT: s_setpc_b64 s[30:31] 5078 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5079 %vec1 = call <8 x i16> asm "; def $0", "=v"() 5080 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 4> 5081 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5082 ret void 5083} 5084 5085define void @v_shuffle_v2i16_v8i16__12_4(ptr addrspace(1) inreg %ptr) { 5086; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_4: 5087; GFX900: ; %bb.0: 5088; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5089; GFX900-NEXT: ;;#ASMSTART 5090; GFX900-NEXT: ; def v[0:3] 5091; GFX900-NEXT: ;;#ASMEND 5092; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5093; GFX900-NEXT: v_mov_b32_e32 v7, 0 5094; GFX900-NEXT: ;;#ASMSTART 5095; GFX900-NEXT: ; def v[3:6] 5096; GFX900-NEXT: ;;#ASMEND 5097; GFX900-NEXT: v_perm_b32 v0, v2, v5, s4 5098; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 5099; GFX900-NEXT: s_waitcnt vmcnt(0) 5100; GFX900-NEXT: s_setpc_b64 s[30:31] 5101; 5102; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_4: 5103; GFX90A: ; %bb.0: 5104; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5105; GFX90A-NEXT: ;;#ASMSTART 5106; GFX90A-NEXT: ; def v[0:3] 5107; GFX90A-NEXT: ;;#ASMEND 5108; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5109; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5110; GFX90A-NEXT: ;;#ASMSTART 5111; GFX90A-NEXT: ; def v[4:7] 5112; GFX90A-NEXT: ;;#ASMEND 5113; GFX90A-NEXT: v_perm_b32 v0, v2, v6, s4 5114; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 5115; GFX90A-NEXT: s_waitcnt vmcnt(0) 5116; GFX90A-NEXT: s_setpc_b64 s[30:31] 5117; 5118; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_4: 5119; GFX940: ; %bb.0: 5120; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5121; GFX940-NEXT: ;;#ASMSTART 5122; GFX940-NEXT: ; def v[0:3] 5123; GFX940-NEXT: ;;#ASMEND 5124; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5125; GFX940-NEXT: v_mov_b32_e32 v8, 0 5126; GFX940-NEXT: ;;#ASMSTART 5127; GFX940-NEXT: ; def v[4:7] 5128; GFX940-NEXT: ;;#ASMEND 5129; GFX940-NEXT: s_nop 0 5130; GFX940-NEXT: v_perm_b32 v0, v2, v6, s2 5131; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 5132; GFX940-NEXT: s_waitcnt vmcnt(0) 5133; GFX940-NEXT: s_setpc_b64 s[30:31] 5134 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5135 %vec1 = call <8 x i16> asm "; def $0", "=v"() 5136 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 4> 5137 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5138 ret void 5139} 5140 5141define void @v_shuffle_v2i16_v8i16__13_4(ptr addrspace(1) inreg %ptr) { 5142; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_4: 5143; GFX900: ; %bb.0: 5144; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5145; GFX900-NEXT: ;;#ASMSTART 5146; GFX900-NEXT: ; def v[0:3] 5147; GFX900-NEXT: ;;#ASMEND 5148; GFX900-NEXT: v_mov_b32_e32 v7, 0 5149; GFX900-NEXT: ;;#ASMSTART 5150; GFX900-NEXT: ; def v[3:6] 5151; GFX900-NEXT: ;;#ASMEND 5152; GFX900-NEXT: v_alignbit_b32 v0, v2, v5, 16 5153; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 5154; GFX900-NEXT: s_waitcnt vmcnt(0) 5155; GFX900-NEXT: s_setpc_b64 s[30:31] 5156; 5157; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_4: 5158; GFX90A: ; %bb.0: 5159; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5160; GFX90A-NEXT: ;;#ASMSTART 5161; GFX90A-NEXT: ; def v[0:3] 5162; GFX90A-NEXT: ;;#ASMEND 5163; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5164; GFX90A-NEXT: ;;#ASMSTART 5165; GFX90A-NEXT: ; def v[4:7] 5166; GFX90A-NEXT: ;;#ASMEND 5167; GFX90A-NEXT: v_alignbit_b32 v0, v2, v6, 16 5168; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 5169; GFX90A-NEXT: s_waitcnt vmcnt(0) 5170; GFX90A-NEXT: s_setpc_b64 s[30:31] 5171; 5172; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_4: 5173; GFX940: ; %bb.0: 5174; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5175; GFX940-NEXT: ;;#ASMSTART 5176; GFX940-NEXT: ; def v[0:3] 5177; GFX940-NEXT: ;;#ASMEND 5178; GFX940-NEXT: v_mov_b32_e32 v8, 0 5179; GFX940-NEXT: ;;#ASMSTART 5180; GFX940-NEXT: ; def v[4:7] 5181; GFX940-NEXT: ;;#ASMEND 5182; GFX940-NEXT: s_nop 0 5183; GFX940-NEXT: v_alignbit_b32 v0, v2, v6, 16 5184; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 5185; GFX940-NEXT: s_waitcnt vmcnt(0) 5186; GFX940-NEXT: s_setpc_b64 s[30:31] 5187 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5188 %vec1 = call <8 x i16> asm "; def $0", "=v"() 5189 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 4> 5190 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5191 ret void 5192} 5193 5194define void @v_shuffle_v2i16_v8i16__14_4(ptr addrspace(1) inreg %ptr) { 5195; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_4: 5196; GFX900: ; %bb.0: 5197; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5198; GFX900-NEXT: ;;#ASMSTART 5199; GFX900-NEXT: ; def v[0:3] 5200; GFX900-NEXT: ;;#ASMEND 5201; GFX900-NEXT: s_mov_b32 s4, 0x5040100 5202; GFX900-NEXT: v_mov_b32_e32 v7, 0 5203; GFX900-NEXT: ;;#ASMSTART 5204; GFX900-NEXT: ; def v[3:6] 5205; GFX900-NEXT: ;;#ASMEND 5206; GFX900-NEXT: v_perm_b32 v0, v2, v6, s4 5207; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 5208; GFX900-NEXT: s_waitcnt vmcnt(0) 5209; GFX900-NEXT: s_setpc_b64 s[30:31] 5210; 5211; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_4: 5212; GFX90A: ; %bb.0: 5213; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5214; GFX90A-NEXT: ;;#ASMSTART 5215; GFX90A-NEXT: ; def v[0:3] 5216; GFX90A-NEXT: ;;#ASMEND 5217; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 5218; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5219; GFX90A-NEXT: ;;#ASMSTART 5220; GFX90A-NEXT: ; def v[4:7] 5221; GFX90A-NEXT: ;;#ASMEND 5222; GFX90A-NEXT: v_perm_b32 v0, v2, v7, s4 5223; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 5224; GFX90A-NEXT: s_waitcnt vmcnt(0) 5225; GFX90A-NEXT: s_setpc_b64 s[30:31] 5226; 5227; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_4: 5228; GFX940: ; %bb.0: 5229; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5230; GFX940-NEXT: ;;#ASMSTART 5231; GFX940-NEXT: ; def v[0:3] 5232; GFX940-NEXT: ;;#ASMEND 5233; GFX940-NEXT: s_mov_b32 s2, 0x5040100 5234; GFX940-NEXT: v_mov_b32_e32 v8, 0 5235; GFX940-NEXT: ;;#ASMSTART 5236; GFX940-NEXT: ; def v[4:7] 5237; GFX940-NEXT: ;;#ASMEND 5238; GFX940-NEXT: s_nop 0 5239; GFX940-NEXT: v_perm_b32 v0, v2, v7, s2 5240; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 5241; GFX940-NEXT: s_waitcnt vmcnt(0) 5242; GFX940-NEXT: s_setpc_b64 s[30:31] 5243 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5244 %vec1 = call <8 x i16> asm "; def $0", "=v"() 5245 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 4> 5246 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5247 ret void 5248} 5249 5250define void @v_shuffle_v2i16_v8i16__u_5(ptr addrspace(1) inreg %ptr) { 5251; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_5: 5252; GFX900: ; %bb.0: 5253; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5254; GFX900-NEXT: v_mov_b32_e32 v4, 0 5255; GFX900-NEXT: ;;#ASMSTART 5256; GFX900-NEXT: ; def v[0:3] 5257; GFX900-NEXT: ;;#ASMEND 5258; GFX900-NEXT: global_store_dword v4, v2, s[16:17] 5259; GFX900-NEXT: s_waitcnt vmcnt(0) 5260; GFX900-NEXT: s_setpc_b64 s[30:31] 5261; 5262; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_5: 5263; GFX90A: ; %bb.0: 5264; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5265; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5266; GFX90A-NEXT: ;;#ASMSTART 5267; GFX90A-NEXT: ; def v[0:3] 5268; GFX90A-NEXT: ;;#ASMEND 5269; GFX90A-NEXT: global_store_dword v4, v2, s[16:17] 5270; GFX90A-NEXT: s_waitcnt vmcnt(0) 5271; GFX90A-NEXT: s_setpc_b64 s[30:31] 5272; 5273; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_5: 5274; GFX940: ; %bb.0: 5275; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5276; GFX940-NEXT: v_mov_b32_e32 v4, 0 5277; GFX940-NEXT: ;;#ASMSTART 5278; GFX940-NEXT: ; def v[0:3] 5279; GFX940-NEXT: ;;#ASMEND 5280; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 5281; GFX940-NEXT: s_waitcnt vmcnt(0) 5282; GFX940-NEXT: s_setpc_b64 s[30:31] 5283 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5284 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 5> 5285 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5286 ret void 5287} 5288 5289define void @v_shuffle_v2i16_v8i16__0_5(ptr addrspace(1) inreg %ptr) { 5290; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_5: 5291; GFX900: ; %bb.0: 5292; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5293; GFX900-NEXT: ;;#ASMSTART 5294; GFX900-NEXT: ; def v[0:3] 5295; GFX900-NEXT: ;;#ASMEND 5296; GFX900-NEXT: s_mov_b32 s4, 0xffff 5297; GFX900-NEXT: v_mov_b32_e32 v4, 0 5298; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v2 5299; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5300; GFX900-NEXT: s_waitcnt vmcnt(0) 5301; GFX900-NEXT: s_setpc_b64 s[30:31] 5302; 5303; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_5: 5304; GFX90A: ; %bb.0: 5305; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5306; GFX90A-NEXT: ;;#ASMSTART 5307; GFX90A-NEXT: ; def v[0:3] 5308; GFX90A-NEXT: ;;#ASMEND 5309; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5310; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5311; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2 5312; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5313; GFX90A-NEXT: s_waitcnt vmcnt(0) 5314; GFX90A-NEXT: s_setpc_b64 s[30:31] 5315; 5316; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_5: 5317; GFX940: ; %bb.0: 5318; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5319; GFX940-NEXT: ;;#ASMSTART 5320; GFX940-NEXT: ; def v[0:3] 5321; GFX940-NEXT: ;;#ASMEND 5322; GFX940-NEXT: s_mov_b32 s2, 0xffff 5323; GFX940-NEXT: v_mov_b32_e32 v4, 0 5324; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 5325; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5326; GFX940-NEXT: s_waitcnt vmcnt(0) 5327; GFX940-NEXT: s_setpc_b64 s[30:31] 5328 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5329 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 5> 5330 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5331 ret void 5332} 5333 5334define void @v_shuffle_v2i16_v8i16__1_5(ptr addrspace(1) inreg %ptr) { 5335; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_5: 5336; GFX900: ; %bb.0: 5337; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5338; GFX900-NEXT: ;;#ASMSTART 5339; GFX900-NEXT: ; def v[0:3] 5340; GFX900-NEXT: ;;#ASMEND 5341; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5342; GFX900-NEXT: v_mov_b32_e32 v4, 0 5343; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 5344; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5345; GFX900-NEXT: s_waitcnt vmcnt(0) 5346; GFX900-NEXT: s_setpc_b64 s[30:31] 5347; 5348; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_5: 5349; GFX90A: ; %bb.0: 5350; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5351; GFX90A-NEXT: ;;#ASMSTART 5352; GFX90A-NEXT: ; def v[0:3] 5353; GFX90A-NEXT: ;;#ASMEND 5354; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5355; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5356; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 5357; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5358; GFX90A-NEXT: s_waitcnt vmcnt(0) 5359; GFX90A-NEXT: s_setpc_b64 s[30:31] 5360; 5361; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_5: 5362; GFX940: ; %bb.0: 5363; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5364; GFX940-NEXT: ;;#ASMSTART 5365; GFX940-NEXT: ; def v[0:3] 5366; GFX940-NEXT: ;;#ASMEND 5367; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5368; GFX940-NEXT: v_mov_b32_e32 v4, 0 5369; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 5370; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5371; GFX940-NEXT: s_waitcnt vmcnt(0) 5372; GFX940-NEXT: s_setpc_b64 s[30:31] 5373 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5374 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 5> 5375 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5376 ret void 5377} 5378 5379define void @v_shuffle_v2i16_v8i16__2_5(ptr addrspace(1) inreg %ptr) { 5380; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_5: 5381; GFX900: ; %bb.0: 5382; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5383; GFX900-NEXT: ;;#ASMSTART 5384; GFX900-NEXT: ; def v[0:3] 5385; GFX900-NEXT: ;;#ASMEND 5386; GFX900-NEXT: s_mov_b32 s4, 0xffff 5387; GFX900-NEXT: v_mov_b32_e32 v4, 0 5388; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2 5389; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5390; GFX900-NEXT: s_waitcnt vmcnt(0) 5391; GFX900-NEXT: s_setpc_b64 s[30:31] 5392; 5393; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_5: 5394; GFX90A: ; %bb.0: 5395; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5396; GFX90A-NEXT: ;;#ASMSTART 5397; GFX90A-NEXT: ; def v[0:3] 5398; GFX90A-NEXT: ;;#ASMEND 5399; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5400; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5401; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2 5402; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5403; GFX90A-NEXT: s_waitcnt vmcnt(0) 5404; GFX90A-NEXT: s_setpc_b64 s[30:31] 5405; 5406; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_5: 5407; GFX940: ; %bb.0: 5408; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5409; GFX940-NEXT: ;;#ASMSTART 5410; GFX940-NEXT: ; def v[0:3] 5411; GFX940-NEXT: ;;#ASMEND 5412; GFX940-NEXT: s_mov_b32 s2, 0xffff 5413; GFX940-NEXT: v_mov_b32_e32 v4, 0 5414; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 5415; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5416; GFX940-NEXT: s_waitcnt vmcnt(0) 5417; GFX940-NEXT: s_setpc_b64 s[30:31] 5418 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5419 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 5> 5420 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5421 ret void 5422} 5423 5424define void @v_shuffle_v2i16_v8i16__3_5(ptr addrspace(1) inreg %ptr) { 5425; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_5: 5426; GFX900: ; %bb.0: 5427; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5428; GFX900-NEXT: ;;#ASMSTART 5429; GFX900-NEXT: ; def v[0:3] 5430; GFX900-NEXT: ;;#ASMEND 5431; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5432; GFX900-NEXT: v_mov_b32_e32 v4, 0 5433; GFX900-NEXT: v_perm_b32 v0, v2, v1, s4 5434; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5435; GFX900-NEXT: s_waitcnt vmcnt(0) 5436; GFX900-NEXT: s_setpc_b64 s[30:31] 5437; 5438; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_5: 5439; GFX90A: ; %bb.0: 5440; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5441; GFX90A-NEXT: ;;#ASMSTART 5442; GFX90A-NEXT: ; def v[0:3] 5443; GFX90A-NEXT: ;;#ASMEND 5444; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5445; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5446; GFX90A-NEXT: v_perm_b32 v0, v2, v1, s4 5447; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5448; GFX90A-NEXT: s_waitcnt vmcnt(0) 5449; GFX90A-NEXT: s_setpc_b64 s[30:31] 5450; 5451; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_5: 5452; GFX940: ; %bb.0: 5453; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5454; GFX940-NEXT: ;;#ASMSTART 5455; GFX940-NEXT: ; def v[0:3] 5456; GFX940-NEXT: ;;#ASMEND 5457; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5458; GFX940-NEXT: v_mov_b32_e32 v4, 0 5459; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 5460; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5461; GFX940-NEXT: s_waitcnt vmcnt(0) 5462; GFX940-NEXT: s_setpc_b64 s[30:31] 5463 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5464 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 5> 5465 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5466 ret void 5467} 5468 5469define void @v_shuffle_v2i16_v8i16__4_5(ptr addrspace(1) inreg %ptr) { 5470; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_5: 5471; GFX900: ; %bb.0: 5472; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5473; GFX900-NEXT: v_mov_b32_e32 v4, 0 5474; GFX900-NEXT: ;;#ASMSTART 5475; GFX900-NEXT: ; def v[0:3] 5476; GFX900-NEXT: ;;#ASMEND 5477; GFX900-NEXT: global_store_dword v4, v2, s[16:17] 5478; GFX900-NEXT: s_waitcnt vmcnt(0) 5479; GFX900-NEXT: s_setpc_b64 s[30:31] 5480; 5481; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_5: 5482; GFX90A: ; %bb.0: 5483; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5484; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5485; GFX90A-NEXT: ;;#ASMSTART 5486; GFX90A-NEXT: ; def v[0:3] 5487; GFX90A-NEXT: ;;#ASMEND 5488; GFX90A-NEXT: global_store_dword v4, v2, s[16:17] 5489; GFX90A-NEXT: s_waitcnt vmcnt(0) 5490; GFX90A-NEXT: s_setpc_b64 s[30:31] 5491; 5492; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_5: 5493; GFX940: ; %bb.0: 5494; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5495; GFX940-NEXT: v_mov_b32_e32 v4, 0 5496; GFX940-NEXT: ;;#ASMSTART 5497; GFX940-NEXT: ; def v[0:3] 5498; GFX940-NEXT: ;;#ASMEND 5499; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 5500; GFX940-NEXT: s_waitcnt vmcnt(0) 5501; GFX940-NEXT: s_setpc_b64 s[30:31] 5502 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5503 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 5> 5504 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5505 ret void 5506} 5507 5508define void @v_shuffle_v2i16_v8i16__5_5(ptr addrspace(1) inreg %ptr) { 5509; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_5: 5510; GFX900: ; %bb.0: 5511; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5512; GFX900-NEXT: ;;#ASMSTART 5513; GFX900-NEXT: ; def v[0:3] 5514; GFX900-NEXT: ;;#ASMEND 5515; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5516; GFX900-NEXT: v_mov_b32_e32 v4, 0 5517; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 5518; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5519; GFX900-NEXT: s_waitcnt vmcnt(0) 5520; GFX900-NEXT: s_setpc_b64 s[30:31] 5521; 5522; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_5: 5523; GFX90A: ; %bb.0: 5524; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5525; GFX90A-NEXT: ;;#ASMSTART 5526; GFX90A-NEXT: ; def v[0:3] 5527; GFX90A-NEXT: ;;#ASMEND 5528; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5529; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5530; GFX90A-NEXT: v_perm_b32 v0, v2, v2, s4 5531; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5532; GFX90A-NEXT: s_waitcnt vmcnt(0) 5533; GFX90A-NEXT: s_setpc_b64 s[30:31] 5534; 5535; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_5: 5536; GFX940: ; %bb.0: 5537; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5538; GFX940-NEXT: ;;#ASMSTART 5539; GFX940-NEXT: ; def v[0:3] 5540; GFX940-NEXT: ;;#ASMEND 5541; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5542; GFX940-NEXT: v_mov_b32_e32 v4, 0 5543; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 5544; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5545; GFX940-NEXT: s_waitcnt vmcnt(0) 5546; GFX940-NEXT: s_setpc_b64 s[30:31] 5547 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5548 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 5> 5549 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5550 ret void 5551} 5552 5553define void @v_shuffle_v2i16_v8i16__6_5(ptr addrspace(1) inreg %ptr) { 5554; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_5: 5555; GFX900: ; %bb.0: 5556; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5557; GFX900-NEXT: ;;#ASMSTART 5558; GFX900-NEXT: ; def v[0:3] 5559; GFX900-NEXT: ;;#ASMEND 5560; GFX900-NEXT: s_mov_b32 s4, 0xffff 5561; GFX900-NEXT: v_mov_b32_e32 v4, 0 5562; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v2 5563; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5564; GFX900-NEXT: s_waitcnt vmcnt(0) 5565; GFX900-NEXT: s_setpc_b64 s[30:31] 5566; 5567; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_5: 5568; GFX90A: ; %bb.0: 5569; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5570; GFX90A-NEXT: ;;#ASMSTART 5571; GFX90A-NEXT: ; def v[0:3] 5572; GFX90A-NEXT: ;;#ASMEND 5573; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5574; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5575; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v2 5576; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5577; GFX90A-NEXT: s_waitcnt vmcnt(0) 5578; GFX90A-NEXT: s_setpc_b64 s[30:31] 5579; 5580; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_5: 5581; GFX940: ; %bb.0: 5582; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5583; GFX940-NEXT: ;;#ASMSTART 5584; GFX940-NEXT: ; def v[0:3] 5585; GFX940-NEXT: ;;#ASMEND 5586; GFX940-NEXT: s_mov_b32 s2, 0xffff 5587; GFX940-NEXT: v_mov_b32_e32 v4, 0 5588; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 5589; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5590; GFX940-NEXT: s_waitcnt vmcnt(0) 5591; GFX940-NEXT: s_setpc_b64 s[30:31] 5592 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5593 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 5> 5594 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5595 ret void 5596} 5597 5598define void @v_shuffle_v2i16_v8i16__7_5(ptr addrspace(1) inreg %ptr) { 5599; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_5: 5600; GFX900: ; %bb.0: 5601; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5602; GFX900-NEXT: ;;#ASMSTART 5603; GFX900-NEXT: ; def v[0:3] 5604; GFX900-NEXT: ;;#ASMEND 5605; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5606; GFX900-NEXT: v_mov_b32_e32 v4, 0 5607; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 5608; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5609; GFX900-NEXT: s_waitcnt vmcnt(0) 5610; GFX900-NEXT: s_setpc_b64 s[30:31] 5611; 5612; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_5: 5613; GFX90A: ; %bb.0: 5614; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5615; GFX90A-NEXT: ;;#ASMSTART 5616; GFX90A-NEXT: ; def v[0:3] 5617; GFX90A-NEXT: ;;#ASMEND 5618; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5619; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5620; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 5621; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5622; GFX90A-NEXT: s_waitcnt vmcnt(0) 5623; GFX90A-NEXT: s_setpc_b64 s[30:31] 5624; 5625; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_5: 5626; GFX940: ; %bb.0: 5627; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5628; GFX940-NEXT: ;;#ASMSTART 5629; GFX940-NEXT: ; def v[0:3] 5630; GFX940-NEXT: ;;#ASMEND 5631; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5632; GFX940-NEXT: v_mov_b32_e32 v4, 0 5633; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 5634; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5635; GFX940-NEXT: s_waitcnt vmcnt(0) 5636; GFX940-NEXT: s_setpc_b64 s[30:31] 5637 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5638 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 5> 5639 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5640 ret void 5641} 5642 5643define void @v_shuffle_v2i16_v8i16__8_5(ptr addrspace(1) inreg %ptr) { 5644; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_5: 5645; GFX900: ; %bb.0: 5646; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5647; GFX900-NEXT: v_mov_b32_e32 v4, 0 5648; GFX900-NEXT: ;;#ASMSTART 5649; GFX900-NEXT: ; def v[0:3] 5650; GFX900-NEXT: ;;#ASMEND 5651; GFX900-NEXT: global_store_dword v4, v2, s[16:17] 5652; GFX900-NEXT: s_waitcnt vmcnt(0) 5653; GFX900-NEXT: s_setpc_b64 s[30:31] 5654; 5655; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_5: 5656; GFX90A: ; %bb.0: 5657; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5658; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5659; GFX90A-NEXT: ;;#ASMSTART 5660; GFX90A-NEXT: ; def v[0:3] 5661; GFX90A-NEXT: ;;#ASMEND 5662; GFX90A-NEXT: global_store_dword v4, v2, s[16:17] 5663; GFX90A-NEXT: s_waitcnt vmcnt(0) 5664; GFX90A-NEXT: s_setpc_b64 s[30:31] 5665; 5666; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_5: 5667; GFX940: ; %bb.0: 5668; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5669; GFX940-NEXT: v_mov_b32_e32 v4, 0 5670; GFX940-NEXT: ;;#ASMSTART 5671; GFX940-NEXT: ; def v[0:3] 5672; GFX940-NEXT: ;;#ASMEND 5673; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 5674; GFX940-NEXT: s_waitcnt vmcnt(0) 5675; GFX940-NEXT: s_setpc_b64 s[30:31] 5676 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5677 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 5> 5678 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5679 ret void 5680} 5681 5682define void @v_shuffle_v2i16_v8i16__9_5(ptr addrspace(1) inreg %ptr) { 5683; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_5: 5684; GFX900: ; %bb.0: 5685; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5686; GFX900-NEXT: ;;#ASMSTART 5687; GFX900-NEXT: ; def v[0:3] 5688; GFX900-NEXT: ;;#ASMEND 5689; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5690; GFX900-NEXT: v_mov_b32_e32 v7, 0 5691; GFX900-NEXT: ;;#ASMSTART 5692; GFX900-NEXT: ; def v[3:6] 5693; GFX900-NEXT: ;;#ASMEND 5694; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 5695; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 5696; GFX900-NEXT: s_waitcnt vmcnt(0) 5697; GFX900-NEXT: s_setpc_b64 s[30:31] 5698; 5699; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_5: 5700; GFX90A: ; %bb.0: 5701; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5702; GFX90A-NEXT: ;;#ASMSTART 5703; GFX90A-NEXT: ; def v[0:3] 5704; GFX90A-NEXT: ;;#ASMEND 5705; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5706; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5707; GFX90A-NEXT: ;;#ASMSTART 5708; GFX90A-NEXT: ; def v[4:7] 5709; GFX90A-NEXT: ;;#ASMEND 5710; GFX90A-NEXT: v_perm_b32 v0, v2, v4, s4 5711; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 5712; GFX90A-NEXT: s_waitcnt vmcnt(0) 5713; GFX90A-NEXT: s_setpc_b64 s[30:31] 5714; 5715; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_5: 5716; GFX940: ; %bb.0: 5717; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5718; GFX940-NEXT: ;;#ASMSTART 5719; GFX940-NEXT: ; def v[0:3] 5720; GFX940-NEXT: ;;#ASMEND 5721; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5722; GFX940-NEXT: v_mov_b32_e32 v8, 0 5723; GFX940-NEXT: ;;#ASMSTART 5724; GFX940-NEXT: ; def v[4:7] 5725; GFX940-NEXT: ;;#ASMEND 5726; GFX940-NEXT: s_nop 0 5727; GFX940-NEXT: v_perm_b32 v0, v2, v4, s2 5728; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 5729; GFX940-NEXT: s_waitcnt vmcnt(0) 5730; GFX940-NEXT: s_setpc_b64 s[30:31] 5731 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5732 %vec1 = call <8 x i16> asm "; def $0", "=v"() 5733 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 5> 5734 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5735 ret void 5736} 5737 5738define void @v_shuffle_v2i16_v8i16__10_5(ptr addrspace(1) inreg %ptr) { 5739; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_5: 5740; GFX900: ; %bb.0: 5741; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5742; GFX900-NEXT: ;;#ASMSTART 5743; GFX900-NEXT: ; def v[0:3] 5744; GFX900-NEXT: ;;#ASMEND 5745; GFX900-NEXT: s_mov_b32 s4, 0xffff 5746; GFX900-NEXT: v_mov_b32_e32 v7, 0 5747; GFX900-NEXT: ;;#ASMSTART 5748; GFX900-NEXT: ; def v[3:6] 5749; GFX900-NEXT: ;;#ASMEND 5750; GFX900-NEXT: v_bfi_b32 v0, s4, v4, v2 5751; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 5752; GFX900-NEXT: s_waitcnt vmcnt(0) 5753; GFX900-NEXT: s_setpc_b64 s[30:31] 5754; 5755; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_5: 5756; GFX90A: ; %bb.0: 5757; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5758; GFX90A-NEXT: ;;#ASMSTART 5759; GFX90A-NEXT: ; def v[0:3] 5760; GFX90A-NEXT: ;;#ASMEND 5761; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5762; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5763; GFX90A-NEXT: ;;#ASMSTART 5764; GFX90A-NEXT: ; def v[4:7] 5765; GFX90A-NEXT: ;;#ASMEND 5766; GFX90A-NEXT: v_bfi_b32 v0, s4, v5, v2 5767; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 5768; GFX90A-NEXT: s_waitcnt vmcnt(0) 5769; GFX90A-NEXT: s_setpc_b64 s[30:31] 5770; 5771; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_5: 5772; GFX940: ; %bb.0: 5773; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5774; GFX940-NEXT: ;;#ASMSTART 5775; GFX940-NEXT: ; def v[0:3] 5776; GFX940-NEXT: ;;#ASMEND 5777; GFX940-NEXT: s_mov_b32 s2, 0xffff 5778; GFX940-NEXT: v_mov_b32_e32 v8, 0 5779; GFX940-NEXT: ;;#ASMSTART 5780; GFX940-NEXT: ; def v[4:7] 5781; GFX940-NEXT: ;;#ASMEND 5782; GFX940-NEXT: s_nop 0 5783; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v2 5784; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 5785; GFX940-NEXT: s_waitcnt vmcnt(0) 5786; GFX940-NEXT: s_setpc_b64 s[30:31] 5787 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5788 %vec1 = call <8 x i16> asm "; def $0", "=v"() 5789 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 5> 5790 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5791 ret void 5792} 5793 5794define void @v_shuffle_v2i16_v8i16__11_5(ptr addrspace(1) inreg %ptr) { 5795; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_5: 5796; GFX900: ; %bb.0: 5797; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5798; GFX900-NEXT: ;;#ASMSTART 5799; GFX900-NEXT: ; def v[0:3] 5800; GFX900-NEXT: ;;#ASMEND 5801; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5802; GFX900-NEXT: v_mov_b32_e32 v7, 0 5803; GFX900-NEXT: ;;#ASMSTART 5804; GFX900-NEXT: ; def v[3:6] 5805; GFX900-NEXT: ;;#ASMEND 5806; GFX900-NEXT: v_perm_b32 v0, v2, v4, s4 5807; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 5808; GFX900-NEXT: s_waitcnt vmcnt(0) 5809; GFX900-NEXT: s_setpc_b64 s[30:31] 5810; 5811; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_5: 5812; GFX90A: ; %bb.0: 5813; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5814; GFX90A-NEXT: ;;#ASMSTART 5815; GFX90A-NEXT: ; def v[0:3] 5816; GFX90A-NEXT: ;;#ASMEND 5817; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5818; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5819; GFX90A-NEXT: ;;#ASMSTART 5820; GFX90A-NEXT: ; def v[4:7] 5821; GFX90A-NEXT: ;;#ASMEND 5822; GFX90A-NEXT: v_perm_b32 v0, v2, v5, s4 5823; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 5824; GFX90A-NEXT: s_waitcnt vmcnt(0) 5825; GFX90A-NEXT: s_setpc_b64 s[30:31] 5826; 5827; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_5: 5828; GFX940: ; %bb.0: 5829; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5830; GFX940-NEXT: ;;#ASMSTART 5831; GFX940-NEXT: ; def v[0:3] 5832; GFX940-NEXT: ;;#ASMEND 5833; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5834; GFX940-NEXT: v_mov_b32_e32 v8, 0 5835; GFX940-NEXT: ;;#ASMSTART 5836; GFX940-NEXT: ; def v[4:7] 5837; GFX940-NEXT: ;;#ASMEND 5838; GFX940-NEXT: s_nop 0 5839; GFX940-NEXT: v_perm_b32 v0, v2, v5, s2 5840; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 5841; GFX940-NEXT: s_waitcnt vmcnt(0) 5842; GFX940-NEXT: s_setpc_b64 s[30:31] 5843 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5844 %vec1 = call <8 x i16> asm "; def $0", "=v"() 5845 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 5> 5846 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5847 ret void 5848} 5849 5850define void @v_shuffle_v2i16_v8i16__12_5(ptr addrspace(1) inreg %ptr) { 5851; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_5: 5852; GFX900: ; %bb.0: 5853; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5854; GFX900-NEXT: ;;#ASMSTART 5855; GFX900-NEXT: ; def v[0:3] 5856; GFX900-NEXT: ;;#ASMEND 5857; GFX900-NEXT: s_mov_b32 s4, 0xffff 5858; GFX900-NEXT: v_mov_b32_e32 v7, 0 5859; GFX900-NEXT: ;;#ASMSTART 5860; GFX900-NEXT: ; def v[3:6] 5861; GFX900-NEXT: ;;#ASMEND 5862; GFX900-NEXT: v_bfi_b32 v0, s4, v5, v2 5863; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 5864; GFX900-NEXT: s_waitcnt vmcnt(0) 5865; GFX900-NEXT: s_setpc_b64 s[30:31] 5866; 5867; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_5: 5868; GFX90A: ; %bb.0: 5869; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5870; GFX90A-NEXT: ;;#ASMSTART 5871; GFX90A-NEXT: ; def v[0:3] 5872; GFX90A-NEXT: ;;#ASMEND 5873; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5874; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5875; GFX90A-NEXT: ;;#ASMSTART 5876; GFX90A-NEXT: ; def v[4:7] 5877; GFX90A-NEXT: ;;#ASMEND 5878; GFX90A-NEXT: v_bfi_b32 v0, s4, v6, v2 5879; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 5880; GFX90A-NEXT: s_waitcnt vmcnt(0) 5881; GFX90A-NEXT: s_setpc_b64 s[30:31] 5882; 5883; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_5: 5884; GFX940: ; %bb.0: 5885; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5886; GFX940-NEXT: ;;#ASMSTART 5887; GFX940-NEXT: ; def v[0:3] 5888; GFX940-NEXT: ;;#ASMEND 5889; GFX940-NEXT: s_mov_b32 s2, 0xffff 5890; GFX940-NEXT: v_mov_b32_e32 v8, 0 5891; GFX940-NEXT: ;;#ASMSTART 5892; GFX940-NEXT: ; def v[4:7] 5893; GFX940-NEXT: ;;#ASMEND 5894; GFX940-NEXT: s_nop 0 5895; GFX940-NEXT: v_bfi_b32 v0, s2, v6, v2 5896; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 5897; GFX940-NEXT: s_waitcnt vmcnt(0) 5898; GFX940-NEXT: s_setpc_b64 s[30:31] 5899 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5900 %vec1 = call <8 x i16> asm "; def $0", "=v"() 5901 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 5> 5902 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5903 ret void 5904} 5905 5906define void @v_shuffle_v2i16_v8i16__13_5(ptr addrspace(1) inreg %ptr) { 5907; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_5: 5908; GFX900: ; %bb.0: 5909; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5910; GFX900-NEXT: ;;#ASMSTART 5911; GFX900-NEXT: ; def v[0:3] 5912; GFX900-NEXT: ;;#ASMEND 5913; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5914; GFX900-NEXT: v_mov_b32_e32 v7, 0 5915; GFX900-NEXT: ;;#ASMSTART 5916; GFX900-NEXT: ; def v[3:6] 5917; GFX900-NEXT: ;;#ASMEND 5918; GFX900-NEXT: v_perm_b32 v0, v2, v5, s4 5919; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 5920; GFX900-NEXT: s_waitcnt vmcnt(0) 5921; GFX900-NEXT: s_setpc_b64 s[30:31] 5922; 5923; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_5: 5924; GFX90A: ; %bb.0: 5925; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5926; GFX90A-NEXT: ;;#ASMSTART 5927; GFX90A-NEXT: ; def v[0:3] 5928; GFX90A-NEXT: ;;#ASMEND 5929; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5930; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5931; GFX90A-NEXT: ;;#ASMSTART 5932; GFX90A-NEXT: ; def v[4:7] 5933; GFX90A-NEXT: ;;#ASMEND 5934; GFX90A-NEXT: v_perm_b32 v0, v2, v6, s4 5935; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 5936; GFX90A-NEXT: s_waitcnt vmcnt(0) 5937; GFX90A-NEXT: s_setpc_b64 s[30:31] 5938; 5939; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_5: 5940; GFX940: ; %bb.0: 5941; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5942; GFX940-NEXT: ;;#ASMSTART 5943; GFX940-NEXT: ; def v[0:3] 5944; GFX940-NEXT: ;;#ASMEND 5945; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5946; GFX940-NEXT: v_mov_b32_e32 v8, 0 5947; GFX940-NEXT: ;;#ASMSTART 5948; GFX940-NEXT: ; def v[4:7] 5949; GFX940-NEXT: ;;#ASMEND 5950; GFX940-NEXT: s_nop 0 5951; GFX940-NEXT: v_perm_b32 v0, v2, v6, s2 5952; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 5953; GFX940-NEXT: s_waitcnt vmcnt(0) 5954; GFX940-NEXT: s_setpc_b64 s[30:31] 5955 %vec0 = call <8 x i16> asm "; def $0", "=v"() 5956 %vec1 = call <8 x i16> asm "; def $0", "=v"() 5957 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 5> 5958 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 5959 ret void 5960} 5961 5962define void @v_shuffle_v2i16_v8i16__14_5(ptr addrspace(1) inreg %ptr) { 5963; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_5: 5964; GFX900: ; %bb.0: 5965; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5966; GFX900-NEXT: ;;#ASMSTART 5967; GFX900-NEXT: ; def v[0:3] 5968; GFX900-NEXT: ;;#ASMEND 5969; GFX900-NEXT: s_mov_b32 s4, 0xffff 5970; GFX900-NEXT: v_mov_b32_e32 v7, 0 5971; GFX900-NEXT: ;;#ASMSTART 5972; GFX900-NEXT: ; def v[3:6] 5973; GFX900-NEXT: ;;#ASMEND 5974; GFX900-NEXT: v_bfi_b32 v0, s4, v6, v2 5975; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 5976; GFX900-NEXT: s_waitcnt vmcnt(0) 5977; GFX900-NEXT: s_setpc_b64 s[30:31] 5978; 5979; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_5: 5980; GFX90A: ; %bb.0: 5981; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5982; GFX90A-NEXT: ;;#ASMSTART 5983; GFX90A-NEXT: ; def v[0:3] 5984; GFX90A-NEXT: ;;#ASMEND 5985; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5986; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5987; GFX90A-NEXT: ;;#ASMSTART 5988; GFX90A-NEXT: ; def v[4:7] 5989; GFX90A-NEXT: ;;#ASMEND 5990; GFX90A-NEXT: v_bfi_b32 v0, s4, v7, v2 5991; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 5992; GFX90A-NEXT: s_waitcnt vmcnt(0) 5993; GFX90A-NEXT: s_setpc_b64 s[30:31] 5994; 5995; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_5: 5996; GFX940: ; %bb.0: 5997; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5998; GFX940-NEXT: ;;#ASMSTART 5999; GFX940-NEXT: ; def v[0:3] 6000; GFX940-NEXT: ;;#ASMEND 6001; GFX940-NEXT: s_mov_b32 s2, 0xffff 6002; GFX940-NEXT: v_mov_b32_e32 v8, 0 6003; GFX940-NEXT: ;;#ASMSTART 6004; GFX940-NEXT: ; def v[4:7] 6005; GFX940-NEXT: ;;#ASMEND 6006; GFX940-NEXT: s_nop 0 6007; GFX940-NEXT: v_bfi_b32 v0, s2, v7, v2 6008; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 6009; GFX940-NEXT: s_waitcnt vmcnt(0) 6010; GFX940-NEXT: s_setpc_b64 s[30:31] 6011 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6012 %vec1 = call <8 x i16> asm "; def $0", "=v"() 6013 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 5> 6014 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6015 ret void 6016} 6017 6018define void @v_shuffle_v2i16_v8i16__u_6(ptr addrspace(1) inreg %ptr) { 6019; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_6: 6020; GFX900: ; %bb.0: 6021; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6022; GFX900-NEXT: ;;#ASMSTART 6023; GFX900-NEXT: ; def v[0:3] 6024; GFX900-NEXT: ;;#ASMEND 6025; GFX900-NEXT: v_mov_b32_e32 v4, 0 6026; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v3 6027; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6028; GFX900-NEXT: s_waitcnt vmcnt(0) 6029; GFX900-NEXT: s_setpc_b64 s[30:31] 6030; 6031; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_6: 6032; GFX90A: ; %bb.0: 6033; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6034; GFX90A-NEXT: ;;#ASMSTART 6035; GFX90A-NEXT: ; def v[0:3] 6036; GFX90A-NEXT: ;;#ASMEND 6037; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6038; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v3 6039; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6040; GFX90A-NEXT: s_waitcnt vmcnt(0) 6041; GFX90A-NEXT: s_setpc_b64 s[30:31] 6042; 6043; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_6: 6044; GFX940: ; %bb.0: 6045; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6046; GFX940-NEXT: ;;#ASMSTART 6047; GFX940-NEXT: ; def v[0:3] 6048; GFX940-NEXT: ;;#ASMEND 6049; GFX940-NEXT: v_mov_b32_e32 v4, 0 6050; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 6051; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6052; GFX940-NEXT: s_waitcnt vmcnt(0) 6053; GFX940-NEXT: s_setpc_b64 s[30:31] 6054 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6055 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 6> 6056 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6057 ret void 6058} 6059 6060define void @v_shuffle_v2i16_v8i16__0_6(ptr addrspace(1) inreg %ptr) { 6061; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_6: 6062; GFX900: ; %bb.0: 6063; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6064; GFX900-NEXT: ;;#ASMSTART 6065; GFX900-NEXT: ; def v[0:3] 6066; GFX900-NEXT: ;;#ASMEND 6067; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6068; GFX900-NEXT: v_mov_b32_e32 v4, 0 6069; GFX900-NEXT: v_perm_b32 v0, v3, v0, s4 6070; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6071; GFX900-NEXT: s_waitcnt vmcnt(0) 6072; GFX900-NEXT: s_setpc_b64 s[30:31] 6073; 6074; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_6: 6075; GFX90A: ; %bb.0: 6076; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6077; GFX90A-NEXT: ;;#ASMSTART 6078; GFX90A-NEXT: ; def v[0:3] 6079; GFX90A-NEXT: ;;#ASMEND 6080; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6081; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6082; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 6083; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6084; GFX90A-NEXT: s_waitcnt vmcnt(0) 6085; GFX90A-NEXT: s_setpc_b64 s[30:31] 6086; 6087; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_6: 6088; GFX940: ; %bb.0: 6089; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6090; GFX940-NEXT: ;;#ASMSTART 6091; GFX940-NEXT: ; def v[0:3] 6092; GFX940-NEXT: ;;#ASMEND 6093; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6094; GFX940-NEXT: v_mov_b32_e32 v4, 0 6095; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 6096; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6097; GFX940-NEXT: s_waitcnt vmcnt(0) 6098; GFX940-NEXT: s_setpc_b64 s[30:31] 6099 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6100 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 6> 6101 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6102 ret void 6103} 6104 6105define void @v_shuffle_v2i16_v8i16__1_6(ptr addrspace(1) inreg %ptr) { 6106; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_6: 6107; GFX900: ; %bb.0: 6108; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6109; GFX900-NEXT: ;;#ASMSTART 6110; GFX900-NEXT: ; def v[0:3] 6111; GFX900-NEXT: ;;#ASMEND 6112; GFX900-NEXT: v_mov_b32_e32 v4, 0 6113; GFX900-NEXT: v_alignbit_b32 v0, v3, v0, 16 6114; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6115; GFX900-NEXT: s_waitcnt vmcnt(0) 6116; GFX900-NEXT: s_setpc_b64 s[30:31] 6117; 6118; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_6: 6119; GFX90A: ; %bb.0: 6120; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6121; GFX90A-NEXT: ;;#ASMSTART 6122; GFX90A-NEXT: ; def v[0:3] 6123; GFX90A-NEXT: ;;#ASMEND 6124; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6125; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16 6126; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6127; GFX90A-NEXT: s_waitcnt vmcnt(0) 6128; GFX90A-NEXT: s_setpc_b64 s[30:31] 6129; 6130; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_6: 6131; GFX940: ; %bb.0: 6132; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6133; GFX940-NEXT: ;;#ASMSTART 6134; GFX940-NEXT: ; def v[0:3] 6135; GFX940-NEXT: ;;#ASMEND 6136; GFX940-NEXT: v_mov_b32_e32 v4, 0 6137; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 6138; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6139; GFX940-NEXT: s_waitcnt vmcnt(0) 6140; GFX940-NEXT: s_setpc_b64 s[30:31] 6141 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6142 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 6> 6143 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6144 ret void 6145} 6146 6147define void @v_shuffle_v2i16_v8i16__2_6(ptr addrspace(1) inreg %ptr) { 6148; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_6: 6149; GFX900: ; %bb.0: 6150; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6151; GFX900-NEXT: ;;#ASMSTART 6152; GFX900-NEXT: ; def v[0:3] 6153; GFX900-NEXT: ;;#ASMEND 6154; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6155; GFX900-NEXT: v_mov_b32_e32 v4, 0 6156; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 6157; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6158; GFX900-NEXT: s_waitcnt vmcnt(0) 6159; GFX900-NEXT: s_setpc_b64 s[30:31] 6160; 6161; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_6: 6162; GFX90A: ; %bb.0: 6163; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6164; GFX90A-NEXT: ;;#ASMSTART 6165; GFX90A-NEXT: ; def v[0:3] 6166; GFX90A-NEXT: ;;#ASMEND 6167; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6168; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6169; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 6170; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6171; GFX90A-NEXT: s_waitcnt vmcnt(0) 6172; GFX90A-NEXT: s_setpc_b64 s[30:31] 6173; 6174; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_6: 6175; GFX940: ; %bb.0: 6176; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6177; GFX940-NEXT: ;;#ASMSTART 6178; GFX940-NEXT: ; def v[0:3] 6179; GFX940-NEXT: ;;#ASMEND 6180; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6181; GFX940-NEXT: v_mov_b32_e32 v4, 0 6182; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 6183; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6184; GFX940-NEXT: s_waitcnt vmcnt(0) 6185; GFX940-NEXT: s_setpc_b64 s[30:31] 6186 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6187 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 6> 6188 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6189 ret void 6190} 6191 6192define void @v_shuffle_v2i16_v8i16__3_6(ptr addrspace(1) inreg %ptr) { 6193; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_6: 6194; GFX900: ; %bb.0: 6195; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6196; GFX900-NEXT: ;;#ASMSTART 6197; GFX900-NEXT: ; def v[0:3] 6198; GFX900-NEXT: ;;#ASMEND 6199; GFX900-NEXT: v_mov_b32_e32 v4, 0 6200; GFX900-NEXT: v_alignbit_b32 v0, v3, v1, 16 6201; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6202; GFX900-NEXT: s_waitcnt vmcnt(0) 6203; GFX900-NEXT: s_setpc_b64 s[30:31] 6204; 6205; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_6: 6206; GFX90A: ; %bb.0: 6207; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6208; GFX90A-NEXT: ;;#ASMSTART 6209; GFX90A-NEXT: ; def v[0:3] 6210; GFX90A-NEXT: ;;#ASMEND 6211; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6212; GFX90A-NEXT: v_alignbit_b32 v0, v3, v1, 16 6213; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6214; GFX90A-NEXT: s_waitcnt vmcnt(0) 6215; GFX90A-NEXT: s_setpc_b64 s[30:31] 6216; 6217; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_6: 6218; GFX940: ; %bb.0: 6219; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6220; GFX940-NEXT: ;;#ASMSTART 6221; GFX940-NEXT: ; def v[0:3] 6222; GFX940-NEXT: ;;#ASMEND 6223; GFX940-NEXT: v_mov_b32_e32 v4, 0 6224; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 6225; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6226; GFX940-NEXT: s_waitcnt vmcnt(0) 6227; GFX940-NEXT: s_setpc_b64 s[30:31] 6228 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6229 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 6> 6230 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6231 ret void 6232} 6233 6234define void @v_shuffle_v2i16_v8i16__4_6(ptr addrspace(1) inreg %ptr) { 6235; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_6: 6236; GFX900: ; %bb.0: 6237; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6238; GFX900-NEXT: ;;#ASMSTART 6239; GFX900-NEXT: ; def v[0:3] 6240; GFX900-NEXT: ;;#ASMEND 6241; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6242; GFX900-NEXT: v_mov_b32_e32 v4, 0 6243; GFX900-NEXT: v_perm_b32 v0, v3, v2, s4 6244; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6245; GFX900-NEXT: s_waitcnt vmcnt(0) 6246; GFX900-NEXT: s_setpc_b64 s[30:31] 6247; 6248; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_6: 6249; GFX90A: ; %bb.0: 6250; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6251; GFX90A-NEXT: ;;#ASMSTART 6252; GFX90A-NEXT: ; def v[0:3] 6253; GFX90A-NEXT: ;;#ASMEND 6254; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6255; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6256; GFX90A-NEXT: v_perm_b32 v0, v3, v2, s4 6257; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6258; GFX90A-NEXT: s_waitcnt vmcnt(0) 6259; GFX90A-NEXT: s_setpc_b64 s[30:31] 6260; 6261; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_6: 6262; GFX940: ; %bb.0: 6263; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6264; GFX940-NEXT: ;;#ASMSTART 6265; GFX940-NEXT: ; def v[0:3] 6266; GFX940-NEXT: ;;#ASMEND 6267; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6268; GFX940-NEXT: v_mov_b32_e32 v4, 0 6269; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 6270; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6271; GFX940-NEXT: s_waitcnt vmcnt(0) 6272; GFX940-NEXT: s_setpc_b64 s[30:31] 6273 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6274 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 6> 6275 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6276 ret void 6277} 6278 6279define void @v_shuffle_v2i16_v8i16__5_6(ptr addrspace(1) inreg %ptr) { 6280; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_6: 6281; GFX900: ; %bb.0: 6282; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6283; GFX900-NEXT: ;;#ASMSTART 6284; GFX900-NEXT: ; def v[0:3] 6285; GFX900-NEXT: ;;#ASMEND 6286; GFX900-NEXT: v_mov_b32_e32 v4, 0 6287; GFX900-NEXT: v_alignbit_b32 v0, v3, v2, 16 6288; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6289; GFX900-NEXT: s_waitcnt vmcnt(0) 6290; GFX900-NEXT: s_setpc_b64 s[30:31] 6291; 6292; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_6: 6293; GFX90A: ; %bb.0: 6294; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6295; GFX90A-NEXT: ;;#ASMSTART 6296; GFX90A-NEXT: ; def v[0:3] 6297; GFX90A-NEXT: ;;#ASMEND 6298; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6299; GFX90A-NEXT: v_alignbit_b32 v0, v3, v2, 16 6300; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6301; GFX90A-NEXT: s_waitcnt vmcnt(0) 6302; GFX90A-NEXT: s_setpc_b64 s[30:31] 6303; 6304; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_6: 6305; GFX940: ; %bb.0: 6306; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6307; GFX940-NEXT: ;;#ASMSTART 6308; GFX940-NEXT: ; def v[0:3] 6309; GFX940-NEXT: ;;#ASMEND 6310; GFX940-NEXT: v_mov_b32_e32 v4, 0 6311; GFX940-NEXT: v_alignbit_b32 v0, v3, v2, 16 6312; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6313; GFX940-NEXT: s_waitcnt vmcnt(0) 6314; GFX940-NEXT: s_setpc_b64 s[30:31] 6315 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6316 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 6> 6317 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6318 ret void 6319} 6320 6321define void @v_shuffle_v2i16_v8i16__6_6(ptr addrspace(1) inreg %ptr) { 6322; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_6: 6323; GFX900: ; %bb.0: 6324; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6325; GFX900-NEXT: ;;#ASMSTART 6326; GFX900-NEXT: ; def v[0:3] 6327; GFX900-NEXT: ;;#ASMEND 6328; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6329; GFX900-NEXT: v_mov_b32_e32 v4, 0 6330; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 6331; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6332; GFX900-NEXT: s_waitcnt vmcnt(0) 6333; GFX900-NEXT: s_setpc_b64 s[30:31] 6334; 6335; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_6: 6336; GFX90A: ; %bb.0: 6337; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6338; GFX90A-NEXT: ;;#ASMSTART 6339; GFX90A-NEXT: ; def v[0:3] 6340; GFX90A-NEXT: ;;#ASMEND 6341; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6342; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6343; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 6344; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6345; GFX90A-NEXT: s_waitcnt vmcnt(0) 6346; GFX90A-NEXT: s_setpc_b64 s[30:31] 6347; 6348; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_6: 6349; GFX940: ; %bb.0: 6350; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6351; GFX940-NEXT: ;;#ASMSTART 6352; GFX940-NEXT: ; def v[0:3] 6353; GFX940-NEXT: ;;#ASMEND 6354; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6355; GFX940-NEXT: v_mov_b32_e32 v4, 0 6356; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 6357; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6358; GFX940-NEXT: s_waitcnt vmcnt(0) 6359; GFX940-NEXT: s_setpc_b64 s[30:31] 6360 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6361 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 6> 6362 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6363 ret void 6364} 6365 6366define void @v_shuffle_v2i16_v8i16__7_6(ptr addrspace(1) inreg %ptr) { 6367; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_6: 6368; GFX900: ; %bb.0: 6369; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6370; GFX900-NEXT: ;;#ASMSTART 6371; GFX900-NEXT: ; def v[0:3] 6372; GFX900-NEXT: ;;#ASMEND 6373; GFX900-NEXT: v_mov_b32_e32 v4, 0 6374; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16 6375; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6376; GFX900-NEXT: s_waitcnt vmcnt(0) 6377; GFX900-NEXT: s_setpc_b64 s[30:31] 6378; 6379; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_6: 6380; GFX90A: ; %bb.0: 6381; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6382; GFX90A-NEXT: ;;#ASMSTART 6383; GFX90A-NEXT: ; def v[0:3] 6384; GFX90A-NEXT: ;;#ASMEND 6385; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6386; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16 6387; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6388; GFX90A-NEXT: s_waitcnt vmcnt(0) 6389; GFX90A-NEXT: s_setpc_b64 s[30:31] 6390; 6391; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_6: 6392; GFX940: ; %bb.0: 6393; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6394; GFX940-NEXT: ;;#ASMSTART 6395; GFX940-NEXT: ; def v[0:3] 6396; GFX940-NEXT: ;;#ASMEND 6397; GFX940-NEXT: v_mov_b32_e32 v4, 0 6398; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 6399; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6400; GFX940-NEXT: s_waitcnt vmcnt(0) 6401; GFX940-NEXT: s_setpc_b64 s[30:31] 6402 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6403 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 6> 6404 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6405 ret void 6406} 6407 6408define void @v_shuffle_v2i16_v8i16__8_6(ptr addrspace(1) inreg %ptr) { 6409; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_6: 6410; GFX900: ; %bb.0: 6411; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6412; GFX900-NEXT: ;;#ASMSTART 6413; GFX900-NEXT: ; def v[0:3] 6414; GFX900-NEXT: ;;#ASMEND 6415; GFX900-NEXT: v_mov_b32_e32 v4, 0 6416; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v3 6417; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6418; GFX900-NEXT: s_waitcnt vmcnt(0) 6419; GFX900-NEXT: s_setpc_b64 s[30:31] 6420; 6421; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_6: 6422; GFX90A: ; %bb.0: 6423; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6424; GFX90A-NEXT: ;;#ASMSTART 6425; GFX90A-NEXT: ; def v[0:3] 6426; GFX90A-NEXT: ;;#ASMEND 6427; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6428; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v3 6429; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6430; GFX90A-NEXT: s_waitcnt vmcnt(0) 6431; GFX90A-NEXT: s_setpc_b64 s[30:31] 6432; 6433; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_6: 6434; GFX940: ; %bb.0: 6435; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6436; GFX940-NEXT: ;;#ASMSTART 6437; GFX940-NEXT: ; def v[0:3] 6438; GFX940-NEXT: ;;#ASMEND 6439; GFX940-NEXT: v_mov_b32_e32 v4, 0 6440; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 6441; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6442; GFX940-NEXT: s_waitcnt vmcnt(0) 6443; GFX940-NEXT: s_setpc_b64 s[30:31] 6444 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6445 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 6> 6446 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6447 ret void 6448} 6449 6450define void @v_shuffle_v2i16_v8i16__9_6(ptr addrspace(1) inreg %ptr) { 6451; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_6: 6452; GFX900: ; %bb.0: 6453; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6454; GFX900-NEXT: ;;#ASMSTART 6455; GFX900-NEXT: ; def v[0:3] 6456; GFX900-NEXT: ;;#ASMEND 6457; GFX900-NEXT: v_mov_b32_e32 v8, 0 6458; GFX900-NEXT: ;;#ASMSTART 6459; GFX900-NEXT: ; def v[4:7] 6460; GFX900-NEXT: ;;#ASMEND 6461; GFX900-NEXT: v_alignbit_b32 v0, v3, v4, 16 6462; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 6463; GFX900-NEXT: s_waitcnt vmcnt(0) 6464; GFX900-NEXT: s_setpc_b64 s[30:31] 6465; 6466; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_6: 6467; GFX90A: ; %bb.0: 6468; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6469; GFX90A-NEXT: ;;#ASMSTART 6470; GFX90A-NEXT: ; def v[0:3] 6471; GFX90A-NEXT: ;;#ASMEND 6472; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6473; GFX90A-NEXT: ;;#ASMSTART 6474; GFX90A-NEXT: ; def v[4:7] 6475; GFX90A-NEXT: ;;#ASMEND 6476; GFX90A-NEXT: v_alignbit_b32 v0, v3, v4, 16 6477; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 6478; GFX90A-NEXT: s_waitcnt vmcnt(0) 6479; GFX90A-NEXT: s_setpc_b64 s[30:31] 6480; 6481; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_6: 6482; GFX940: ; %bb.0: 6483; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6484; GFX940-NEXT: ;;#ASMSTART 6485; GFX940-NEXT: ; def v[0:3] 6486; GFX940-NEXT: ;;#ASMEND 6487; GFX940-NEXT: v_mov_b32_e32 v8, 0 6488; GFX940-NEXT: ;;#ASMSTART 6489; GFX940-NEXT: ; def v[4:7] 6490; GFX940-NEXT: ;;#ASMEND 6491; GFX940-NEXT: s_nop 0 6492; GFX940-NEXT: v_alignbit_b32 v0, v3, v4, 16 6493; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 6494; GFX940-NEXT: s_waitcnt vmcnt(0) 6495; GFX940-NEXT: s_setpc_b64 s[30:31] 6496 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6497 %vec1 = call <8 x i16> asm "; def $0", "=v"() 6498 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 6> 6499 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6500 ret void 6501} 6502 6503define void @v_shuffle_v2i16_v8i16__10_6(ptr addrspace(1) inreg %ptr) { 6504; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_6: 6505; GFX900: ; %bb.0: 6506; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6507; GFX900-NEXT: ;;#ASMSTART 6508; GFX900-NEXT: ; def v[0:3] 6509; GFX900-NEXT: ;;#ASMEND 6510; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6511; GFX900-NEXT: v_mov_b32_e32 v8, 0 6512; GFX900-NEXT: ;;#ASMSTART 6513; GFX900-NEXT: ; def v[4:7] 6514; GFX900-NEXT: ;;#ASMEND 6515; GFX900-NEXT: v_perm_b32 v0, v3, v5, s4 6516; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 6517; GFX900-NEXT: s_waitcnt vmcnt(0) 6518; GFX900-NEXT: s_setpc_b64 s[30:31] 6519; 6520; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_6: 6521; GFX90A: ; %bb.0: 6522; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6523; GFX90A-NEXT: ;;#ASMSTART 6524; GFX90A-NEXT: ; def v[0:3] 6525; GFX90A-NEXT: ;;#ASMEND 6526; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6527; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6528; GFX90A-NEXT: ;;#ASMSTART 6529; GFX90A-NEXT: ; def v[4:7] 6530; GFX90A-NEXT: ;;#ASMEND 6531; GFX90A-NEXT: v_perm_b32 v0, v3, v5, s4 6532; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 6533; GFX90A-NEXT: s_waitcnt vmcnt(0) 6534; GFX90A-NEXT: s_setpc_b64 s[30:31] 6535; 6536; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_6: 6537; GFX940: ; %bb.0: 6538; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6539; GFX940-NEXT: ;;#ASMSTART 6540; GFX940-NEXT: ; def v[0:3] 6541; GFX940-NEXT: ;;#ASMEND 6542; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6543; GFX940-NEXT: v_mov_b32_e32 v8, 0 6544; GFX940-NEXT: ;;#ASMSTART 6545; GFX940-NEXT: ; def v[4:7] 6546; GFX940-NEXT: ;;#ASMEND 6547; GFX940-NEXT: s_nop 0 6548; GFX940-NEXT: v_perm_b32 v0, v3, v5, s2 6549; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 6550; GFX940-NEXT: s_waitcnt vmcnt(0) 6551; GFX940-NEXT: s_setpc_b64 s[30:31] 6552 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6553 %vec1 = call <8 x i16> asm "; def $0", "=v"() 6554 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 6> 6555 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6556 ret void 6557} 6558 6559define void @v_shuffle_v2i16_v8i16__11_6(ptr addrspace(1) inreg %ptr) { 6560; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_6: 6561; GFX900: ; %bb.0: 6562; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6563; GFX900-NEXT: ;;#ASMSTART 6564; GFX900-NEXT: ; def v[0:3] 6565; GFX900-NEXT: ;;#ASMEND 6566; GFX900-NEXT: v_mov_b32_e32 v8, 0 6567; GFX900-NEXT: ;;#ASMSTART 6568; GFX900-NEXT: ; def v[4:7] 6569; GFX900-NEXT: ;;#ASMEND 6570; GFX900-NEXT: v_alignbit_b32 v0, v3, v5, 16 6571; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 6572; GFX900-NEXT: s_waitcnt vmcnt(0) 6573; GFX900-NEXT: s_setpc_b64 s[30:31] 6574; 6575; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_6: 6576; GFX90A: ; %bb.0: 6577; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6578; GFX90A-NEXT: ;;#ASMSTART 6579; GFX90A-NEXT: ; def v[0:3] 6580; GFX90A-NEXT: ;;#ASMEND 6581; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6582; GFX90A-NEXT: ;;#ASMSTART 6583; GFX90A-NEXT: ; def v[4:7] 6584; GFX90A-NEXT: ;;#ASMEND 6585; GFX90A-NEXT: v_alignbit_b32 v0, v3, v5, 16 6586; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 6587; GFX90A-NEXT: s_waitcnt vmcnt(0) 6588; GFX90A-NEXT: s_setpc_b64 s[30:31] 6589; 6590; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_6: 6591; GFX940: ; %bb.0: 6592; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6593; GFX940-NEXT: ;;#ASMSTART 6594; GFX940-NEXT: ; def v[0:3] 6595; GFX940-NEXT: ;;#ASMEND 6596; GFX940-NEXT: v_mov_b32_e32 v8, 0 6597; GFX940-NEXT: ;;#ASMSTART 6598; GFX940-NEXT: ; def v[4:7] 6599; GFX940-NEXT: ;;#ASMEND 6600; GFX940-NEXT: s_nop 0 6601; GFX940-NEXT: v_alignbit_b32 v0, v3, v5, 16 6602; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 6603; GFX940-NEXT: s_waitcnt vmcnt(0) 6604; GFX940-NEXT: s_setpc_b64 s[30:31] 6605 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6606 %vec1 = call <8 x i16> asm "; def $0", "=v"() 6607 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 6> 6608 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6609 ret void 6610} 6611 6612define void @v_shuffle_v2i16_v8i16__12_6(ptr addrspace(1) inreg %ptr) { 6613; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_6: 6614; GFX900: ; %bb.0: 6615; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6616; GFX900-NEXT: ;;#ASMSTART 6617; GFX900-NEXT: ; def v[0:3] 6618; GFX900-NEXT: ;;#ASMEND 6619; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6620; GFX900-NEXT: v_mov_b32_e32 v8, 0 6621; GFX900-NEXT: ;;#ASMSTART 6622; GFX900-NEXT: ; def v[4:7] 6623; GFX900-NEXT: ;;#ASMEND 6624; GFX900-NEXT: v_perm_b32 v0, v3, v6, s4 6625; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 6626; GFX900-NEXT: s_waitcnt vmcnt(0) 6627; GFX900-NEXT: s_setpc_b64 s[30:31] 6628; 6629; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_6: 6630; GFX90A: ; %bb.0: 6631; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6632; GFX90A-NEXT: ;;#ASMSTART 6633; GFX90A-NEXT: ; def v[0:3] 6634; GFX90A-NEXT: ;;#ASMEND 6635; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6636; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6637; GFX90A-NEXT: ;;#ASMSTART 6638; GFX90A-NEXT: ; def v[4:7] 6639; GFX90A-NEXT: ;;#ASMEND 6640; GFX90A-NEXT: v_perm_b32 v0, v3, v6, s4 6641; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 6642; GFX90A-NEXT: s_waitcnt vmcnt(0) 6643; GFX90A-NEXT: s_setpc_b64 s[30:31] 6644; 6645; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_6: 6646; GFX940: ; %bb.0: 6647; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6648; GFX940-NEXT: ;;#ASMSTART 6649; GFX940-NEXT: ; def v[0:3] 6650; GFX940-NEXT: ;;#ASMEND 6651; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6652; GFX940-NEXT: v_mov_b32_e32 v8, 0 6653; GFX940-NEXT: ;;#ASMSTART 6654; GFX940-NEXT: ; def v[4:7] 6655; GFX940-NEXT: ;;#ASMEND 6656; GFX940-NEXT: s_nop 0 6657; GFX940-NEXT: v_perm_b32 v0, v3, v6, s2 6658; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 6659; GFX940-NEXT: s_waitcnt vmcnt(0) 6660; GFX940-NEXT: s_setpc_b64 s[30:31] 6661 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6662 %vec1 = call <8 x i16> asm "; def $0", "=v"() 6663 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 6> 6664 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6665 ret void 6666} 6667 6668define void @v_shuffle_v2i16_v8i16__13_6(ptr addrspace(1) inreg %ptr) { 6669; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_6: 6670; GFX900: ; %bb.0: 6671; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6672; GFX900-NEXT: ;;#ASMSTART 6673; GFX900-NEXT: ; def v[0:3] 6674; GFX900-NEXT: ;;#ASMEND 6675; GFX900-NEXT: v_mov_b32_e32 v8, 0 6676; GFX900-NEXT: ;;#ASMSTART 6677; GFX900-NEXT: ; def v[4:7] 6678; GFX900-NEXT: ;;#ASMEND 6679; GFX900-NEXT: v_alignbit_b32 v0, v3, v6, 16 6680; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 6681; GFX900-NEXT: s_waitcnt vmcnt(0) 6682; GFX900-NEXT: s_setpc_b64 s[30:31] 6683; 6684; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_6: 6685; GFX90A: ; %bb.0: 6686; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6687; GFX90A-NEXT: ;;#ASMSTART 6688; GFX90A-NEXT: ; def v[0:3] 6689; GFX90A-NEXT: ;;#ASMEND 6690; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6691; GFX90A-NEXT: ;;#ASMSTART 6692; GFX90A-NEXT: ; def v[4:7] 6693; GFX90A-NEXT: ;;#ASMEND 6694; GFX90A-NEXT: v_alignbit_b32 v0, v3, v6, 16 6695; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 6696; GFX90A-NEXT: s_waitcnt vmcnt(0) 6697; GFX90A-NEXT: s_setpc_b64 s[30:31] 6698; 6699; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_6: 6700; GFX940: ; %bb.0: 6701; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6702; GFX940-NEXT: ;;#ASMSTART 6703; GFX940-NEXT: ; def v[0:3] 6704; GFX940-NEXT: ;;#ASMEND 6705; GFX940-NEXT: v_mov_b32_e32 v8, 0 6706; GFX940-NEXT: ;;#ASMSTART 6707; GFX940-NEXT: ; def v[4:7] 6708; GFX940-NEXT: ;;#ASMEND 6709; GFX940-NEXT: s_nop 0 6710; GFX940-NEXT: v_alignbit_b32 v0, v3, v6, 16 6711; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 6712; GFX940-NEXT: s_waitcnt vmcnt(0) 6713; GFX940-NEXT: s_setpc_b64 s[30:31] 6714 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6715 %vec1 = call <8 x i16> asm "; def $0", "=v"() 6716 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 6> 6717 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6718 ret void 6719} 6720 6721define void @v_shuffle_v2i16_v8i16__14_6(ptr addrspace(1) inreg %ptr) { 6722; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_6: 6723; GFX900: ; %bb.0: 6724; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6725; GFX900-NEXT: ;;#ASMSTART 6726; GFX900-NEXT: ; def v[0:3] 6727; GFX900-NEXT: ;;#ASMEND 6728; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6729; GFX900-NEXT: v_mov_b32_e32 v8, 0 6730; GFX900-NEXT: ;;#ASMSTART 6731; GFX900-NEXT: ; def v[4:7] 6732; GFX900-NEXT: ;;#ASMEND 6733; GFX900-NEXT: v_perm_b32 v0, v3, v7, s4 6734; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 6735; GFX900-NEXT: s_waitcnt vmcnt(0) 6736; GFX900-NEXT: s_setpc_b64 s[30:31] 6737; 6738; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_6: 6739; GFX90A: ; %bb.0: 6740; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6741; GFX90A-NEXT: ;;#ASMSTART 6742; GFX90A-NEXT: ; def v[0:3] 6743; GFX90A-NEXT: ;;#ASMEND 6744; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6745; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6746; GFX90A-NEXT: ;;#ASMSTART 6747; GFX90A-NEXT: ; def v[4:7] 6748; GFX90A-NEXT: ;;#ASMEND 6749; GFX90A-NEXT: v_perm_b32 v0, v3, v7, s4 6750; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 6751; GFX90A-NEXT: s_waitcnt vmcnt(0) 6752; GFX90A-NEXT: s_setpc_b64 s[30:31] 6753; 6754; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_6: 6755; GFX940: ; %bb.0: 6756; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6757; GFX940-NEXT: ;;#ASMSTART 6758; GFX940-NEXT: ; def v[0:3] 6759; GFX940-NEXT: ;;#ASMEND 6760; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6761; GFX940-NEXT: v_mov_b32_e32 v8, 0 6762; GFX940-NEXT: ;;#ASMSTART 6763; GFX940-NEXT: ; def v[4:7] 6764; GFX940-NEXT: ;;#ASMEND 6765; GFX940-NEXT: s_nop 0 6766; GFX940-NEXT: v_perm_b32 v0, v3, v7, s2 6767; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 6768; GFX940-NEXT: s_waitcnt vmcnt(0) 6769; GFX940-NEXT: s_setpc_b64 s[30:31] 6770 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6771 %vec1 = call <8 x i16> asm "; def $0", "=v"() 6772 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 6> 6773 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6774 ret void 6775} 6776 6777define void @v_shuffle_v2i16_v8i16__u_7(ptr addrspace(1) inreg %ptr) { 6778; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_7: 6779; GFX900: ; %bb.0: 6780; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6781; GFX900-NEXT: v_mov_b32_e32 v4, 0 6782; GFX900-NEXT: ;;#ASMSTART 6783; GFX900-NEXT: ; def v[0:3] 6784; GFX900-NEXT: ;;#ASMEND 6785; GFX900-NEXT: global_store_dword v4, v3, s[16:17] 6786; GFX900-NEXT: s_waitcnt vmcnt(0) 6787; GFX900-NEXT: s_setpc_b64 s[30:31] 6788; 6789; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_7: 6790; GFX90A: ; %bb.0: 6791; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6792; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6793; GFX90A-NEXT: ;;#ASMSTART 6794; GFX90A-NEXT: ; def v[0:3] 6795; GFX90A-NEXT: ;;#ASMEND 6796; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] 6797; GFX90A-NEXT: s_waitcnt vmcnt(0) 6798; GFX90A-NEXT: s_setpc_b64 s[30:31] 6799; 6800; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_7: 6801; GFX940: ; %bb.0: 6802; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6803; GFX940-NEXT: v_mov_b32_e32 v4, 0 6804; GFX940-NEXT: ;;#ASMSTART 6805; GFX940-NEXT: ; def v[0:3] 6806; GFX940-NEXT: ;;#ASMEND 6807; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 6808; GFX940-NEXT: s_waitcnt vmcnt(0) 6809; GFX940-NEXT: s_setpc_b64 s[30:31] 6810 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6811 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 7> 6812 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6813 ret void 6814} 6815 6816define void @v_shuffle_v2i16_v8i16__0_7(ptr addrspace(1) inreg %ptr) { 6817; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_7: 6818; GFX900: ; %bb.0: 6819; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6820; GFX900-NEXT: ;;#ASMSTART 6821; GFX900-NEXT: ; def v[0:3] 6822; GFX900-NEXT: ;;#ASMEND 6823; GFX900-NEXT: s_mov_b32 s4, 0xffff 6824; GFX900-NEXT: v_mov_b32_e32 v4, 0 6825; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v3 6826; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6827; GFX900-NEXT: s_waitcnt vmcnt(0) 6828; GFX900-NEXT: s_setpc_b64 s[30:31] 6829; 6830; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_7: 6831; GFX90A: ; %bb.0: 6832; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6833; GFX90A-NEXT: ;;#ASMSTART 6834; GFX90A-NEXT: ; def v[0:3] 6835; GFX90A-NEXT: ;;#ASMEND 6836; GFX90A-NEXT: s_mov_b32 s4, 0xffff 6837; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6838; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v3 6839; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6840; GFX90A-NEXT: s_waitcnt vmcnt(0) 6841; GFX90A-NEXT: s_setpc_b64 s[30:31] 6842; 6843; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_7: 6844; GFX940: ; %bb.0: 6845; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6846; GFX940-NEXT: ;;#ASMSTART 6847; GFX940-NEXT: ; def v[0:3] 6848; GFX940-NEXT: ;;#ASMEND 6849; GFX940-NEXT: s_mov_b32 s2, 0xffff 6850; GFX940-NEXT: v_mov_b32_e32 v4, 0 6851; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 6852; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6853; GFX940-NEXT: s_waitcnt vmcnt(0) 6854; GFX940-NEXT: s_setpc_b64 s[30:31] 6855 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6856 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 7> 6857 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6858 ret void 6859} 6860 6861define void @v_shuffle_v2i16_v8i16__1_7(ptr addrspace(1) inreg %ptr) { 6862; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_7: 6863; GFX900: ; %bb.0: 6864; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6865; GFX900-NEXT: ;;#ASMSTART 6866; GFX900-NEXT: ; def v[0:3] 6867; GFX900-NEXT: ;;#ASMEND 6868; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6869; GFX900-NEXT: v_mov_b32_e32 v4, 0 6870; GFX900-NEXT: v_perm_b32 v0, v3, v0, s4 6871; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6872; GFX900-NEXT: s_waitcnt vmcnt(0) 6873; GFX900-NEXT: s_setpc_b64 s[30:31] 6874; 6875; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_7: 6876; GFX90A: ; %bb.0: 6877; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6878; GFX90A-NEXT: ;;#ASMSTART 6879; GFX90A-NEXT: ; def v[0:3] 6880; GFX90A-NEXT: ;;#ASMEND 6881; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6882; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6883; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 6884; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6885; GFX90A-NEXT: s_waitcnt vmcnt(0) 6886; GFX90A-NEXT: s_setpc_b64 s[30:31] 6887; 6888; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_7: 6889; GFX940: ; %bb.0: 6890; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6891; GFX940-NEXT: ;;#ASMSTART 6892; GFX940-NEXT: ; def v[0:3] 6893; GFX940-NEXT: ;;#ASMEND 6894; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6895; GFX940-NEXT: v_mov_b32_e32 v4, 0 6896; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 6897; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6898; GFX940-NEXT: s_waitcnt vmcnt(0) 6899; GFX940-NEXT: s_setpc_b64 s[30:31] 6900 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6901 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 7> 6902 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6903 ret void 6904} 6905 6906define void @v_shuffle_v2i16_v8i16__2_7(ptr addrspace(1) inreg %ptr) { 6907; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_7: 6908; GFX900: ; %bb.0: 6909; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6910; GFX900-NEXT: ;;#ASMSTART 6911; GFX900-NEXT: ; def v[0:3] 6912; GFX900-NEXT: ;;#ASMEND 6913; GFX900-NEXT: s_mov_b32 s4, 0xffff 6914; GFX900-NEXT: v_mov_b32_e32 v4, 0 6915; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v3 6916; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6917; GFX900-NEXT: s_waitcnt vmcnt(0) 6918; GFX900-NEXT: s_setpc_b64 s[30:31] 6919; 6920; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_7: 6921; GFX90A: ; %bb.0: 6922; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6923; GFX90A-NEXT: ;;#ASMSTART 6924; GFX90A-NEXT: ; def v[0:3] 6925; GFX90A-NEXT: ;;#ASMEND 6926; GFX90A-NEXT: s_mov_b32 s4, 0xffff 6927; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6928; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v3 6929; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6930; GFX90A-NEXT: s_waitcnt vmcnt(0) 6931; GFX90A-NEXT: s_setpc_b64 s[30:31] 6932; 6933; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_7: 6934; GFX940: ; %bb.0: 6935; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6936; GFX940-NEXT: ;;#ASMSTART 6937; GFX940-NEXT: ; def v[0:3] 6938; GFX940-NEXT: ;;#ASMEND 6939; GFX940-NEXT: s_mov_b32 s2, 0xffff 6940; GFX940-NEXT: v_mov_b32_e32 v4, 0 6941; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 6942; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6943; GFX940-NEXT: s_waitcnt vmcnt(0) 6944; GFX940-NEXT: s_setpc_b64 s[30:31] 6945 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6946 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 7> 6947 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6948 ret void 6949} 6950 6951define void @v_shuffle_v2i16_v8i16__3_7(ptr addrspace(1) inreg %ptr) { 6952; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_7: 6953; GFX900: ; %bb.0: 6954; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6955; GFX900-NEXT: ;;#ASMSTART 6956; GFX900-NEXT: ; def v[0:3] 6957; GFX900-NEXT: ;;#ASMEND 6958; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6959; GFX900-NEXT: v_mov_b32_e32 v4, 0 6960; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 6961; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6962; GFX900-NEXT: s_waitcnt vmcnt(0) 6963; GFX900-NEXT: s_setpc_b64 s[30:31] 6964; 6965; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_7: 6966; GFX90A: ; %bb.0: 6967; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6968; GFX90A-NEXT: ;;#ASMSTART 6969; GFX90A-NEXT: ; def v[0:3] 6970; GFX90A-NEXT: ;;#ASMEND 6971; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6972; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6973; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 6974; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6975; GFX90A-NEXT: s_waitcnt vmcnt(0) 6976; GFX90A-NEXT: s_setpc_b64 s[30:31] 6977; 6978; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_7: 6979; GFX940: ; %bb.0: 6980; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6981; GFX940-NEXT: ;;#ASMSTART 6982; GFX940-NEXT: ; def v[0:3] 6983; GFX940-NEXT: ;;#ASMEND 6984; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6985; GFX940-NEXT: v_mov_b32_e32 v4, 0 6986; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 6987; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6988; GFX940-NEXT: s_waitcnt vmcnt(0) 6989; GFX940-NEXT: s_setpc_b64 s[30:31] 6990 %vec0 = call <8 x i16> asm "; def $0", "=v"() 6991 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 7> 6992 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 6993 ret void 6994} 6995 6996define void @v_shuffle_v2i16_v8i16__4_7(ptr addrspace(1) inreg %ptr) { 6997; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_7: 6998; GFX900: ; %bb.0: 6999; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7000; GFX900-NEXT: ;;#ASMSTART 7001; GFX900-NEXT: ; def v[0:3] 7002; GFX900-NEXT: ;;#ASMEND 7003; GFX900-NEXT: s_mov_b32 s4, 0xffff 7004; GFX900-NEXT: v_mov_b32_e32 v4, 0 7005; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v3 7006; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7007; GFX900-NEXT: s_waitcnt vmcnt(0) 7008; GFX900-NEXT: s_setpc_b64 s[30:31] 7009; 7010; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_7: 7011; GFX90A: ; %bb.0: 7012; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7013; GFX90A-NEXT: ;;#ASMSTART 7014; GFX90A-NEXT: ; def v[0:3] 7015; GFX90A-NEXT: ;;#ASMEND 7016; GFX90A-NEXT: s_mov_b32 s4, 0xffff 7017; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7018; GFX90A-NEXT: v_bfi_b32 v0, s4, v2, v3 7019; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7020; GFX90A-NEXT: s_waitcnt vmcnt(0) 7021; GFX90A-NEXT: s_setpc_b64 s[30:31] 7022; 7023; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_7: 7024; GFX940: ; %bb.0: 7025; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7026; GFX940-NEXT: ;;#ASMSTART 7027; GFX940-NEXT: ; def v[0:3] 7028; GFX940-NEXT: ;;#ASMEND 7029; GFX940-NEXT: s_mov_b32 s2, 0xffff 7030; GFX940-NEXT: v_mov_b32_e32 v4, 0 7031; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v3 7032; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7033; GFX940-NEXT: s_waitcnt vmcnt(0) 7034; GFX940-NEXT: s_setpc_b64 s[30:31] 7035 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7036 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 7> 7037 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7038 ret void 7039} 7040 7041define void @v_shuffle_v2i16_v8i16__5_7(ptr addrspace(1) inreg %ptr) { 7042; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_7: 7043; GFX900: ; %bb.0: 7044; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7045; GFX900-NEXT: ;;#ASMSTART 7046; GFX900-NEXT: ; def v[0:3] 7047; GFX900-NEXT: ;;#ASMEND 7048; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7049; GFX900-NEXT: v_mov_b32_e32 v4, 0 7050; GFX900-NEXT: v_perm_b32 v0, v3, v2, s4 7051; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7052; GFX900-NEXT: s_waitcnt vmcnt(0) 7053; GFX900-NEXT: s_setpc_b64 s[30:31] 7054; 7055; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_7: 7056; GFX90A: ; %bb.0: 7057; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7058; GFX90A-NEXT: ;;#ASMSTART 7059; GFX90A-NEXT: ; def v[0:3] 7060; GFX90A-NEXT: ;;#ASMEND 7061; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7062; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7063; GFX90A-NEXT: v_perm_b32 v0, v3, v2, s4 7064; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7065; GFX90A-NEXT: s_waitcnt vmcnt(0) 7066; GFX90A-NEXT: s_setpc_b64 s[30:31] 7067; 7068; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_7: 7069; GFX940: ; %bb.0: 7070; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7071; GFX940-NEXT: ;;#ASMSTART 7072; GFX940-NEXT: ; def v[0:3] 7073; GFX940-NEXT: ;;#ASMEND 7074; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7075; GFX940-NEXT: v_mov_b32_e32 v4, 0 7076; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 7077; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7078; GFX940-NEXT: s_waitcnt vmcnt(0) 7079; GFX940-NEXT: s_setpc_b64 s[30:31] 7080 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7081 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 7> 7082 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7083 ret void 7084} 7085 7086define void @v_shuffle_v2i16_v8i16__6_7(ptr addrspace(1) inreg %ptr) { 7087; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_7: 7088; GFX900: ; %bb.0: 7089; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7090; GFX900-NEXT: v_mov_b32_e32 v4, 0 7091; GFX900-NEXT: ;;#ASMSTART 7092; GFX900-NEXT: ; def v[0:3] 7093; GFX900-NEXT: ;;#ASMEND 7094; GFX900-NEXT: global_store_dword v4, v3, s[16:17] 7095; GFX900-NEXT: s_waitcnt vmcnt(0) 7096; GFX900-NEXT: s_setpc_b64 s[30:31] 7097; 7098; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_7: 7099; GFX90A: ; %bb.0: 7100; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7101; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7102; GFX90A-NEXT: ;;#ASMSTART 7103; GFX90A-NEXT: ; def v[0:3] 7104; GFX90A-NEXT: ;;#ASMEND 7105; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] 7106; GFX90A-NEXT: s_waitcnt vmcnt(0) 7107; GFX90A-NEXT: s_setpc_b64 s[30:31] 7108; 7109; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_7: 7110; GFX940: ; %bb.0: 7111; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7112; GFX940-NEXT: v_mov_b32_e32 v4, 0 7113; GFX940-NEXT: ;;#ASMSTART 7114; GFX940-NEXT: ; def v[0:3] 7115; GFX940-NEXT: ;;#ASMEND 7116; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 7117; GFX940-NEXT: s_waitcnt vmcnt(0) 7118; GFX940-NEXT: s_setpc_b64 s[30:31] 7119 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7120 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 7> 7121 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7122 ret void 7123} 7124 7125define void @v_shuffle_v2i16_v8i16__7_7(ptr addrspace(1) inreg %ptr) { 7126; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_7: 7127; GFX900: ; %bb.0: 7128; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7129; GFX900-NEXT: ;;#ASMSTART 7130; GFX900-NEXT: ; def v[0:3] 7131; GFX900-NEXT: ;;#ASMEND 7132; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7133; GFX900-NEXT: v_mov_b32_e32 v4, 0 7134; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 7135; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7136; GFX900-NEXT: s_waitcnt vmcnt(0) 7137; GFX900-NEXT: s_setpc_b64 s[30:31] 7138; 7139; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_7: 7140; GFX90A: ; %bb.0: 7141; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7142; GFX90A-NEXT: ;;#ASMSTART 7143; GFX90A-NEXT: ; def v[0:3] 7144; GFX90A-NEXT: ;;#ASMEND 7145; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7146; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7147; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 7148; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7149; GFX90A-NEXT: s_waitcnt vmcnt(0) 7150; GFX90A-NEXT: s_setpc_b64 s[30:31] 7151; 7152; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_7: 7153; GFX940: ; %bb.0: 7154; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7155; GFX940-NEXT: ;;#ASMSTART 7156; GFX940-NEXT: ; def v[0:3] 7157; GFX940-NEXT: ;;#ASMEND 7158; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7159; GFX940-NEXT: v_mov_b32_e32 v4, 0 7160; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 7161; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7162; GFX940-NEXT: s_waitcnt vmcnt(0) 7163; GFX940-NEXT: s_setpc_b64 s[30:31] 7164 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7165 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 7> 7166 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7167 ret void 7168} 7169 7170define void @v_shuffle_v2i16_v8i16__8_7(ptr addrspace(1) inreg %ptr) { 7171; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_7: 7172; GFX900: ; %bb.0: 7173; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7174; GFX900-NEXT: v_mov_b32_e32 v4, 0 7175; GFX900-NEXT: ;;#ASMSTART 7176; GFX900-NEXT: ; def v[0:3] 7177; GFX900-NEXT: ;;#ASMEND 7178; GFX900-NEXT: global_store_dword v4, v3, s[16:17] 7179; GFX900-NEXT: s_waitcnt vmcnt(0) 7180; GFX900-NEXT: s_setpc_b64 s[30:31] 7181; 7182; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_7: 7183; GFX90A: ; %bb.0: 7184; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7185; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7186; GFX90A-NEXT: ;;#ASMSTART 7187; GFX90A-NEXT: ; def v[0:3] 7188; GFX90A-NEXT: ;;#ASMEND 7189; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] 7190; GFX90A-NEXT: s_waitcnt vmcnt(0) 7191; GFX90A-NEXT: s_setpc_b64 s[30:31] 7192; 7193; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_7: 7194; GFX940: ; %bb.0: 7195; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7196; GFX940-NEXT: v_mov_b32_e32 v4, 0 7197; GFX940-NEXT: ;;#ASMSTART 7198; GFX940-NEXT: ; def v[0:3] 7199; GFX940-NEXT: ;;#ASMEND 7200; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 7201; GFX940-NEXT: s_waitcnt vmcnt(0) 7202; GFX940-NEXT: s_setpc_b64 s[30:31] 7203 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7204 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 7> 7205 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7206 ret void 7207} 7208 7209define void @v_shuffle_v2i16_v8i16__9_7(ptr addrspace(1) inreg %ptr) { 7210; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_7: 7211; GFX900: ; %bb.0: 7212; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7213; GFX900-NEXT: ;;#ASMSTART 7214; GFX900-NEXT: ; def v[0:3] 7215; GFX900-NEXT: ;;#ASMEND 7216; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7217; GFX900-NEXT: v_mov_b32_e32 v8, 0 7218; GFX900-NEXT: ;;#ASMSTART 7219; GFX900-NEXT: ; def v[4:7] 7220; GFX900-NEXT: ;;#ASMEND 7221; GFX900-NEXT: v_perm_b32 v0, v3, v4, s4 7222; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 7223; GFX900-NEXT: s_waitcnt vmcnt(0) 7224; GFX900-NEXT: s_setpc_b64 s[30:31] 7225; 7226; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_7: 7227; GFX90A: ; %bb.0: 7228; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7229; GFX90A-NEXT: ;;#ASMSTART 7230; GFX90A-NEXT: ; def v[0:3] 7231; GFX90A-NEXT: ;;#ASMEND 7232; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7233; GFX90A-NEXT: v_mov_b32_e32 v8, 0 7234; GFX90A-NEXT: ;;#ASMSTART 7235; GFX90A-NEXT: ; def v[4:7] 7236; GFX90A-NEXT: ;;#ASMEND 7237; GFX90A-NEXT: v_perm_b32 v0, v3, v4, s4 7238; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 7239; GFX90A-NEXT: s_waitcnt vmcnt(0) 7240; GFX90A-NEXT: s_setpc_b64 s[30:31] 7241; 7242; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_7: 7243; GFX940: ; %bb.0: 7244; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7245; GFX940-NEXT: ;;#ASMSTART 7246; GFX940-NEXT: ; def v[0:3] 7247; GFX940-NEXT: ;;#ASMEND 7248; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7249; GFX940-NEXT: v_mov_b32_e32 v8, 0 7250; GFX940-NEXT: ;;#ASMSTART 7251; GFX940-NEXT: ; def v[4:7] 7252; GFX940-NEXT: ;;#ASMEND 7253; GFX940-NEXT: s_nop 0 7254; GFX940-NEXT: v_perm_b32 v0, v3, v4, s2 7255; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 7256; GFX940-NEXT: s_waitcnt vmcnt(0) 7257; GFX940-NEXT: s_setpc_b64 s[30:31] 7258 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7259 %vec1 = call <8 x i16> asm "; def $0", "=v"() 7260 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 7> 7261 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7262 ret void 7263} 7264 7265define void @v_shuffle_v2i16_v8i16__10_7(ptr addrspace(1) inreg %ptr) { 7266; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_7: 7267; GFX900: ; %bb.0: 7268; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7269; GFX900-NEXT: ;;#ASMSTART 7270; GFX900-NEXT: ; def v[0:3] 7271; GFX900-NEXT: ;;#ASMEND 7272; GFX900-NEXT: s_mov_b32 s4, 0xffff 7273; GFX900-NEXT: v_mov_b32_e32 v8, 0 7274; GFX900-NEXT: ;;#ASMSTART 7275; GFX900-NEXT: ; def v[4:7] 7276; GFX900-NEXT: ;;#ASMEND 7277; GFX900-NEXT: v_bfi_b32 v0, s4, v5, v3 7278; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 7279; GFX900-NEXT: s_waitcnt vmcnt(0) 7280; GFX900-NEXT: s_setpc_b64 s[30:31] 7281; 7282; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_7: 7283; GFX90A: ; %bb.0: 7284; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7285; GFX90A-NEXT: ;;#ASMSTART 7286; GFX90A-NEXT: ; def v[0:3] 7287; GFX90A-NEXT: ;;#ASMEND 7288; GFX90A-NEXT: s_mov_b32 s4, 0xffff 7289; GFX90A-NEXT: v_mov_b32_e32 v8, 0 7290; GFX90A-NEXT: ;;#ASMSTART 7291; GFX90A-NEXT: ; def v[4:7] 7292; GFX90A-NEXT: ;;#ASMEND 7293; GFX90A-NEXT: v_bfi_b32 v0, s4, v5, v3 7294; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 7295; GFX90A-NEXT: s_waitcnt vmcnt(0) 7296; GFX90A-NEXT: s_setpc_b64 s[30:31] 7297; 7298; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_7: 7299; GFX940: ; %bb.0: 7300; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7301; GFX940-NEXT: ;;#ASMSTART 7302; GFX940-NEXT: ; def v[0:3] 7303; GFX940-NEXT: ;;#ASMEND 7304; GFX940-NEXT: s_mov_b32 s2, 0xffff 7305; GFX940-NEXT: v_mov_b32_e32 v8, 0 7306; GFX940-NEXT: ;;#ASMSTART 7307; GFX940-NEXT: ; def v[4:7] 7308; GFX940-NEXT: ;;#ASMEND 7309; GFX940-NEXT: s_nop 0 7310; GFX940-NEXT: v_bfi_b32 v0, s2, v5, v3 7311; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 7312; GFX940-NEXT: s_waitcnt vmcnt(0) 7313; GFX940-NEXT: s_setpc_b64 s[30:31] 7314 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7315 %vec1 = call <8 x i16> asm "; def $0", "=v"() 7316 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 7> 7317 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7318 ret void 7319} 7320 7321define void @v_shuffle_v2i16_v8i16__11_7(ptr addrspace(1) inreg %ptr) { 7322; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_7: 7323; GFX900: ; %bb.0: 7324; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7325; GFX900-NEXT: ;;#ASMSTART 7326; GFX900-NEXT: ; def v[0:3] 7327; GFX900-NEXT: ;;#ASMEND 7328; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7329; GFX900-NEXT: v_mov_b32_e32 v8, 0 7330; GFX900-NEXT: ;;#ASMSTART 7331; GFX900-NEXT: ; def v[4:7] 7332; GFX900-NEXT: ;;#ASMEND 7333; GFX900-NEXT: v_perm_b32 v0, v3, v5, s4 7334; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 7335; GFX900-NEXT: s_waitcnt vmcnt(0) 7336; GFX900-NEXT: s_setpc_b64 s[30:31] 7337; 7338; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_7: 7339; GFX90A: ; %bb.0: 7340; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7341; GFX90A-NEXT: ;;#ASMSTART 7342; GFX90A-NEXT: ; def v[0:3] 7343; GFX90A-NEXT: ;;#ASMEND 7344; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7345; GFX90A-NEXT: v_mov_b32_e32 v8, 0 7346; GFX90A-NEXT: ;;#ASMSTART 7347; GFX90A-NEXT: ; def v[4:7] 7348; GFX90A-NEXT: ;;#ASMEND 7349; GFX90A-NEXT: v_perm_b32 v0, v3, v5, s4 7350; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 7351; GFX90A-NEXT: s_waitcnt vmcnt(0) 7352; GFX90A-NEXT: s_setpc_b64 s[30:31] 7353; 7354; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_7: 7355; GFX940: ; %bb.0: 7356; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7357; GFX940-NEXT: ;;#ASMSTART 7358; GFX940-NEXT: ; def v[0:3] 7359; GFX940-NEXT: ;;#ASMEND 7360; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7361; GFX940-NEXT: v_mov_b32_e32 v8, 0 7362; GFX940-NEXT: ;;#ASMSTART 7363; GFX940-NEXT: ; def v[4:7] 7364; GFX940-NEXT: ;;#ASMEND 7365; GFX940-NEXT: s_nop 0 7366; GFX940-NEXT: v_perm_b32 v0, v3, v5, s2 7367; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 7368; GFX940-NEXT: s_waitcnt vmcnt(0) 7369; GFX940-NEXT: s_setpc_b64 s[30:31] 7370 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7371 %vec1 = call <8 x i16> asm "; def $0", "=v"() 7372 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 7> 7373 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7374 ret void 7375} 7376 7377define void @v_shuffle_v2i16_v8i16__12_7(ptr addrspace(1) inreg %ptr) { 7378; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_7: 7379; GFX900: ; %bb.0: 7380; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7381; GFX900-NEXT: ;;#ASMSTART 7382; GFX900-NEXT: ; def v[0:3] 7383; GFX900-NEXT: ;;#ASMEND 7384; GFX900-NEXT: s_mov_b32 s4, 0xffff 7385; GFX900-NEXT: v_mov_b32_e32 v8, 0 7386; GFX900-NEXT: ;;#ASMSTART 7387; GFX900-NEXT: ; def v[4:7] 7388; GFX900-NEXT: ;;#ASMEND 7389; GFX900-NEXT: v_bfi_b32 v0, s4, v6, v3 7390; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 7391; GFX900-NEXT: s_waitcnt vmcnt(0) 7392; GFX900-NEXT: s_setpc_b64 s[30:31] 7393; 7394; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_7: 7395; GFX90A: ; %bb.0: 7396; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7397; GFX90A-NEXT: ;;#ASMSTART 7398; GFX90A-NEXT: ; def v[0:3] 7399; GFX90A-NEXT: ;;#ASMEND 7400; GFX90A-NEXT: s_mov_b32 s4, 0xffff 7401; GFX90A-NEXT: v_mov_b32_e32 v8, 0 7402; GFX90A-NEXT: ;;#ASMSTART 7403; GFX90A-NEXT: ; def v[4:7] 7404; GFX90A-NEXT: ;;#ASMEND 7405; GFX90A-NEXT: v_bfi_b32 v0, s4, v6, v3 7406; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 7407; GFX90A-NEXT: s_waitcnt vmcnt(0) 7408; GFX90A-NEXT: s_setpc_b64 s[30:31] 7409; 7410; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_7: 7411; GFX940: ; %bb.0: 7412; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7413; GFX940-NEXT: ;;#ASMSTART 7414; GFX940-NEXT: ; def v[0:3] 7415; GFX940-NEXT: ;;#ASMEND 7416; GFX940-NEXT: s_mov_b32 s2, 0xffff 7417; GFX940-NEXT: v_mov_b32_e32 v8, 0 7418; GFX940-NEXT: ;;#ASMSTART 7419; GFX940-NEXT: ; def v[4:7] 7420; GFX940-NEXT: ;;#ASMEND 7421; GFX940-NEXT: s_nop 0 7422; GFX940-NEXT: v_bfi_b32 v0, s2, v6, v3 7423; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 7424; GFX940-NEXT: s_waitcnt vmcnt(0) 7425; GFX940-NEXT: s_setpc_b64 s[30:31] 7426 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7427 %vec1 = call <8 x i16> asm "; def $0", "=v"() 7428 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 7> 7429 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7430 ret void 7431} 7432 7433define void @v_shuffle_v2i16_v8i16__13_7(ptr addrspace(1) inreg %ptr) { 7434; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_7: 7435; GFX900: ; %bb.0: 7436; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7437; GFX900-NEXT: ;;#ASMSTART 7438; GFX900-NEXT: ; def v[0:3] 7439; GFX900-NEXT: ;;#ASMEND 7440; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7441; GFX900-NEXT: v_mov_b32_e32 v8, 0 7442; GFX900-NEXT: ;;#ASMSTART 7443; GFX900-NEXT: ; def v[4:7] 7444; GFX900-NEXT: ;;#ASMEND 7445; GFX900-NEXT: v_perm_b32 v0, v3, v6, s4 7446; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 7447; GFX900-NEXT: s_waitcnt vmcnt(0) 7448; GFX900-NEXT: s_setpc_b64 s[30:31] 7449; 7450; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_7: 7451; GFX90A: ; %bb.0: 7452; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7453; GFX90A-NEXT: ;;#ASMSTART 7454; GFX90A-NEXT: ; def v[0:3] 7455; GFX90A-NEXT: ;;#ASMEND 7456; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7457; GFX90A-NEXT: v_mov_b32_e32 v8, 0 7458; GFX90A-NEXT: ;;#ASMSTART 7459; GFX90A-NEXT: ; def v[4:7] 7460; GFX90A-NEXT: ;;#ASMEND 7461; GFX90A-NEXT: v_perm_b32 v0, v3, v6, s4 7462; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 7463; GFX90A-NEXT: s_waitcnt vmcnt(0) 7464; GFX90A-NEXT: s_setpc_b64 s[30:31] 7465; 7466; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_7: 7467; GFX940: ; %bb.0: 7468; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7469; GFX940-NEXT: ;;#ASMSTART 7470; GFX940-NEXT: ; def v[0:3] 7471; GFX940-NEXT: ;;#ASMEND 7472; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7473; GFX940-NEXT: v_mov_b32_e32 v8, 0 7474; GFX940-NEXT: ;;#ASMSTART 7475; GFX940-NEXT: ; def v[4:7] 7476; GFX940-NEXT: ;;#ASMEND 7477; GFX940-NEXT: s_nop 0 7478; GFX940-NEXT: v_perm_b32 v0, v3, v6, s2 7479; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 7480; GFX940-NEXT: s_waitcnt vmcnt(0) 7481; GFX940-NEXT: s_setpc_b64 s[30:31] 7482 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7483 %vec1 = call <8 x i16> asm "; def $0", "=v"() 7484 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 7> 7485 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7486 ret void 7487} 7488 7489define void @v_shuffle_v2i16_v8i16__14_7(ptr addrspace(1) inreg %ptr) { 7490; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_7: 7491; GFX900: ; %bb.0: 7492; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7493; GFX900-NEXT: ;;#ASMSTART 7494; GFX900-NEXT: ; def v[0:3] 7495; GFX900-NEXT: ;;#ASMEND 7496; GFX900-NEXT: s_mov_b32 s4, 0xffff 7497; GFX900-NEXT: v_mov_b32_e32 v8, 0 7498; GFX900-NEXT: ;;#ASMSTART 7499; GFX900-NEXT: ; def v[4:7] 7500; GFX900-NEXT: ;;#ASMEND 7501; GFX900-NEXT: v_bfi_b32 v0, s4, v7, v3 7502; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 7503; GFX900-NEXT: s_waitcnt vmcnt(0) 7504; GFX900-NEXT: s_setpc_b64 s[30:31] 7505; 7506; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_7: 7507; GFX90A: ; %bb.0: 7508; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7509; GFX90A-NEXT: ;;#ASMSTART 7510; GFX90A-NEXT: ; def v[0:3] 7511; GFX90A-NEXT: ;;#ASMEND 7512; GFX90A-NEXT: s_mov_b32 s4, 0xffff 7513; GFX90A-NEXT: v_mov_b32_e32 v8, 0 7514; GFX90A-NEXT: ;;#ASMSTART 7515; GFX90A-NEXT: ; def v[4:7] 7516; GFX90A-NEXT: ;;#ASMEND 7517; GFX90A-NEXT: v_bfi_b32 v0, s4, v7, v3 7518; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 7519; GFX90A-NEXT: s_waitcnt vmcnt(0) 7520; GFX90A-NEXT: s_setpc_b64 s[30:31] 7521; 7522; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_7: 7523; GFX940: ; %bb.0: 7524; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7525; GFX940-NEXT: ;;#ASMSTART 7526; GFX940-NEXT: ; def v[0:3] 7527; GFX940-NEXT: ;;#ASMEND 7528; GFX940-NEXT: s_mov_b32 s2, 0xffff 7529; GFX940-NEXT: v_mov_b32_e32 v8, 0 7530; GFX940-NEXT: ;;#ASMSTART 7531; GFX940-NEXT: ; def v[4:7] 7532; GFX940-NEXT: ;;#ASMEND 7533; GFX940-NEXT: s_nop 0 7534; GFX940-NEXT: v_bfi_b32 v0, s2, v7, v3 7535; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 7536; GFX940-NEXT: s_waitcnt vmcnt(0) 7537; GFX940-NEXT: s_setpc_b64 s[30:31] 7538 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7539 %vec1 = call <8 x i16> asm "; def $0", "=v"() 7540 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 7> 7541 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7542 ret void 7543} 7544 7545define void @v_shuffle_v2i16_v8i16__u_8(ptr addrspace(1) inreg %ptr) { 7546; GFX9-LABEL: v_shuffle_v2i16_v8i16__u_8: 7547; GFX9: ; %bb.0: 7548; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7549; GFX9-NEXT: s_setpc_b64 s[30:31] 7550 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7551 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 8> 7552 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7553 ret void 7554} 7555 7556define void @v_shuffle_v2i16_v8i16__0_8(ptr addrspace(1) inreg %ptr) { 7557; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_8: 7558; GFX900: ; %bb.0: 7559; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7560; GFX900-NEXT: v_mov_b32_e32 v4, 0 7561; GFX900-NEXT: ;;#ASMSTART 7562; GFX900-NEXT: ; def v[0:3] 7563; GFX900-NEXT: ;;#ASMEND 7564; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7565; GFX900-NEXT: s_waitcnt vmcnt(0) 7566; GFX900-NEXT: s_setpc_b64 s[30:31] 7567; 7568; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_8: 7569; GFX90A: ; %bb.0: 7570; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7571; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7572; GFX90A-NEXT: ;;#ASMSTART 7573; GFX90A-NEXT: ; def v[0:3] 7574; GFX90A-NEXT: ;;#ASMEND 7575; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7576; GFX90A-NEXT: s_waitcnt vmcnt(0) 7577; GFX90A-NEXT: s_setpc_b64 s[30:31] 7578; 7579; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_8: 7580; GFX940: ; %bb.0: 7581; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7582; GFX940-NEXT: v_mov_b32_e32 v4, 0 7583; GFX940-NEXT: ;;#ASMSTART 7584; GFX940-NEXT: ; def v[0:3] 7585; GFX940-NEXT: ;;#ASMEND 7586; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7587; GFX940-NEXT: s_waitcnt vmcnt(0) 7588; GFX940-NEXT: s_setpc_b64 s[30:31] 7589 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7590 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 8> 7591 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7592 ret void 7593} 7594 7595define void @v_shuffle_v2i16_v8i16__1_8(ptr addrspace(1) inreg %ptr) { 7596; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_8: 7597; GFX900: ; %bb.0: 7598; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7599; GFX900-NEXT: ;;#ASMSTART 7600; GFX900-NEXT: ; def v[0:3] 7601; GFX900-NEXT: ;;#ASMEND 7602; GFX900-NEXT: v_mov_b32_e32 v4, 0 7603; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 7604; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7605; GFX900-NEXT: s_waitcnt vmcnt(0) 7606; GFX900-NEXT: s_setpc_b64 s[30:31] 7607; 7608; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_8: 7609; GFX90A: ; %bb.0: 7610; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7611; GFX90A-NEXT: ;;#ASMSTART 7612; GFX90A-NEXT: ; def v[0:3] 7613; GFX90A-NEXT: ;;#ASMEND 7614; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7615; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 7616; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7617; GFX90A-NEXT: s_waitcnt vmcnt(0) 7618; GFX90A-NEXT: s_setpc_b64 s[30:31] 7619; 7620; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_8: 7621; GFX940: ; %bb.0: 7622; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7623; GFX940-NEXT: ;;#ASMSTART 7624; GFX940-NEXT: ; def v[0:3] 7625; GFX940-NEXT: ;;#ASMEND 7626; GFX940-NEXT: v_mov_b32_e32 v4, 0 7627; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 7628; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7629; GFX940-NEXT: s_waitcnt vmcnt(0) 7630; GFX940-NEXT: s_setpc_b64 s[30:31] 7631 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7632 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 8> 7633 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7634 ret void 7635} 7636 7637define void @v_shuffle_v2i16_v8i16__2_8(ptr addrspace(1) inreg %ptr) { 7638; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_8: 7639; GFX900: ; %bb.0: 7640; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7641; GFX900-NEXT: v_mov_b32_e32 v4, 0 7642; GFX900-NEXT: ;;#ASMSTART 7643; GFX900-NEXT: ; def v[0:3] 7644; GFX900-NEXT: ;;#ASMEND 7645; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 7646; GFX900-NEXT: s_waitcnt vmcnt(0) 7647; GFX900-NEXT: s_setpc_b64 s[30:31] 7648; 7649; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_8: 7650; GFX90A: ; %bb.0: 7651; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7652; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7653; GFX90A-NEXT: ;;#ASMSTART 7654; GFX90A-NEXT: ; def v[0:3] 7655; GFX90A-NEXT: ;;#ASMEND 7656; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 7657; GFX90A-NEXT: s_waitcnt vmcnt(0) 7658; GFX90A-NEXT: s_setpc_b64 s[30:31] 7659; 7660; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_8: 7661; GFX940: ; %bb.0: 7662; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7663; GFX940-NEXT: v_mov_b32_e32 v4, 0 7664; GFX940-NEXT: ;;#ASMSTART 7665; GFX940-NEXT: ; def v[0:3] 7666; GFX940-NEXT: ;;#ASMEND 7667; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 7668; GFX940-NEXT: s_waitcnt vmcnt(0) 7669; GFX940-NEXT: s_setpc_b64 s[30:31] 7670 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7671 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 8> 7672 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7673 ret void 7674} 7675 7676define void @v_shuffle_v2i16_v8i16__3_8(ptr addrspace(1) inreg %ptr) { 7677; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_8: 7678; GFX900: ; %bb.0: 7679; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7680; GFX900-NEXT: ;;#ASMSTART 7681; GFX900-NEXT: ; def v[0:3] 7682; GFX900-NEXT: ;;#ASMEND 7683; GFX900-NEXT: v_mov_b32_e32 v4, 0 7684; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 7685; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7686; GFX900-NEXT: s_waitcnt vmcnt(0) 7687; GFX900-NEXT: s_setpc_b64 s[30:31] 7688; 7689; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_8: 7690; GFX90A: ; %bb.0: 7691; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7692; GFX90A-NEXT: ;;#ASMSTART 7693; GFX90A-NEXT: ; def v[0:3] 7694; GFX90A-NEXT: ;;#ASMEND 7695; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7696; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 7697; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7698; GFX90A-NEXT: s_waitcnt vmcnt(0) 7699; GFX90A-NEXT: s_setpc_b64 s[30:31] 7700; 7701; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_8: 7702; GFX940: ; %bb.0: 7703; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7704; GFX940-NEXT: ;;#ASMSTART 7705; GFX940-NEXT: ; def v[0:3] 7706; GFX940-NEXT: ;;#ASMEND 7707; GFX940-NEXT: v_mov_b32_e32 v4, 0 7708; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 7709; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7710; GFX940-NEXT: s_waitcnt vmcnt(0) 7711; GFX940-NEXT: s_setpc_b64 s[30:31] 7712 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7713 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 8> 7714 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7715 ret void 7716} 7717 7718define void @v_shuffle_v2i16_v8i16__4_8(ptr addrspace(1) inreg %ptr) { 7719; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_8: 7720; GFX900: ; %bb.0: 7721; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7722; GFX900-NEXT: v_mov_b32_e32 v4, 0 7723; GFX900-NEXT: ;;#ASMSTART 7724; GFX900-NEXT: ; def v[0:3] 7725; GFX900-NEXT: ;;#ASMEND 7726; GFX900-NEXT: global_store_dword v4, v2, s[16:17] 7727; GFX900-NEXT: s_waitcnt vmcnt(0) 7728; GFX900-NEXT: s_setpc_b64 s[30:31] 7729; 7730; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_8: 7731; GFX90A: ; %bb.0: 7732; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7733; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7734; GFX90A-NEXT: ;;#ASMSTART 7735; GFX90A-NEXT: ; def v[0:3] 7736; GFX90A-NEXT: ;;#ASMEND 7737; GFX90A-NEXT: global_store_dword v4, v2, s[16:17] 7738; GFX90A-NEXT: s_waitcnt vmcnt(0) 7739; GFX90A-NEXT: s_setpc_b64 s[30:31] 7740; 7741; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_8: 7742; GFX940: ; %bb.0: 7743; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7744; GFX940-NEXT: v_mov_b32_e32 v4, 0 7745; GFX940-NEXT: ;;#ASMSTART 7746; GFX940-NEXT: ; def v[0:3] 7747; GFX940-NEXT: ;;#ASMEND 7748; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 7749; GFX940-NEXT: s_waitcnt vmcnt(0) 7750; GFX940-NEXT: s_setpc_b64 s[30:31] 7751 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7752 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 8> 7753 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7754 ret void 7755} 7756 7757define void @v_shuffle_v2i16_v8i16__5_8(ptr addrspace(1) inreg %ptr) { 7758; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_8: 7759; GFX900: ; %bb.0: 7760; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7761; GFX900-NEXT: ;;#ASMSTART 7762; GFX900-NEXT: ; def v[0:3] 7763; GFX900-NEXT: ;;#ASMEND 7764; GFX900-NEXT: v_mov_b32_e32 v4, 0 7765; GFX900-NEXT: v_alignbit_b32 v0, s4, v2, 16 7766; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7767; GFX900-NEXT: s_waitcnt vmcnt(0) 7768; GFX900-NEXT: s_setpc_b64 s[30:31] 7769; 7770; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_8: 7771; GFX90A: ; %bb.0: 7772; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7773; GFX90A-NEXT: ;;#ASMSTART 7774; GFX90A-NEXT: ; def v[0:3] 7775; GFX90A-NEXT: ;;#ASMEND 7776; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7777; GFX90A-NEXT: v_alignbit_b32 v0, s4, v2, 16 7778; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7779; GFX90A-NEXT: s_waitcnt vmcnt(0) 7780; GFX90A-NEXT: s_setpc_b64 s[30:31] 7781; 7782; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_8: 7783; GFX940: ; %bb.0: 7784; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7785; GFX940-NEXT: ;;#ASMSTART 7786; GFX940-NEXT: ; def v[0:3] 7787; GFX940-NEXT: ;;#ASMEND 7788; GFX940-NEXT: v_mov_b32_e32 v4, 0 7789; GFX940-NEXT: v_alignbit_b32 v0, s0, v2, 16 7790; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7791; GFX940-NEXT: s_waitcnt vmcnt(0) 7792; GFX940-NEXT: s_setpc_b64 s[30:31] 7793 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7794 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 8> 7795 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7796 ret void 7797} 7798 7799define void @v_shuffle_v2i16_v8i16__6_8(ptr addrspace(1) inreg %ptr) { 7800; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_8: 7801; GFX900: ; %bb.0: 7802; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7803; GFX900-NEXT: v_mov_b32_e32 v4, 0 7804; GFX900-NEXT: ;;#ASMSTART 7805; GFX900-NEXT: ; def v[0:3] 7806; GFX900-NEXT: ;;#ASMEND 7807; GFX900-NEXT: global_store_dword v4, v3, s[16:17] 7808; GFX900-NEXT: s_waitcnt vmcnt(0) 7809; GFX900-NEXT: s_setpc_b64 s[30:31] 7810; 7811; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_8: 7812; GFX90A: ; %bb.0: 7813; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7814; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7815; GFX90A-NEXT: ;;#ASMSTART 7816; GFX90A-NEXT: ; def v[0:3] 7817; GFX90A-NEXT: ;;#ASMEND 7818; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] 7819; GFX90A-NEXT: s_waitcnt vmcnt(0) 7820; GFX90A-NEXT: s_setpc_b64 s[30:31] 7821; 7822; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_8: 7823; GFX940: ; %bb.0: 7824; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7825; GFX940-NEXT: v_mov_b32_e32 v4, 0 7826; GFX940-NEXT: ;;#ASMSTART 7827; GFX940-NEXT: ; def v[0:3] 7828; GFX940-NEXT: ;;#ASMEND 7829; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 7830; GFX940-NEXT: s_waitcnt vmcnt(0) 7831; GFX940-NEXT: s_setpc_b64 s[30:31] 7832 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7833 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 8> 7834 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7835 ret void 7836} 7837 7838define void @v_shuffle_v2i16_v8i16__7_8(ptr addrspace(1) inreg %ptr) { 7839; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_8: 7840; GFX900: ; %bb.0: 7841; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7842; GFX900-NEXT: ;;#ASMSTART 7843; GFX900-NEXT: ; def v[0:3] 7844; GFX900-NEXT: ;;#ASMEND 7845; GFX900-NEXT: v_mov_b32_e32 v4, 0 7846; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16 7847; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7848; GFX900-NEXT: s_waitcnt vmcnt(0) 7849; GFX900-NEXT: s_setpc_b64 s[30:31] 7850; 7851; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_8: 7852; GFX90A: ; %bb.0: 7853; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7854; GFX90A-NEXT: ;;#ASMSTART 7855; GFX90A-NEXT: ; def v[0:3] 7856; GFX90A-NEXT: ;;#ASMEND 7857; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7858; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16 7859; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7860; GFX90A-NEXT: s_waitcnt vmcnt(0) 7861; GFX90A-NEXT: s_setpc_b64 s[30:31] 7862; 7863; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_8: 7864; GFX940: ; %bb.0: 7865; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7866; GFX940-NEXT: ;;#ASMSTART 7867; GFX940-NEXT: ; def v[0:3] 7868; GFX940-NEXT: ;;#ASMEND 7869; GFX940-NEXT: v_mov_b32_e32 v4, 0 7870; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 7871; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7872; GFX940-NEXT: s_waitcnt vmcnt(0) 7873; GFX940-NEXT: s_setpc_b64 s[30:31] 7874 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7875 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 8> 7876 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7877 ret void 7878} 7879 7880define void @v_shuffle_v2i16_v8i16__8_8(ptr addrspace(1) inreg %ptr) { 7881; GFX9-LABEL: v_shuffle_v2i16_v8i16__8_8: 7882; GFX9: ; %bb.0: 7883; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7884; GFX9-NEXT: s_setpc_b64 s[30:31] 7885 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7886 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 8> 7887 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7888 ret void 7889} 7890 7891define void @v_shuffle_v2i16_v8i16__9_8(ptr addrspace(1) inreg %ptr) { 7892; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_8: 7893; GFX900: ; %bb.0: 7894; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7895; GFX900-NEXT: ;;#ASMSTART 7896; GFX900-NEXT: ; def v[0:3] 7897; GFX900-NEXT: ;;#ASMEND 7898; GFX900-NEXT: v_mov_b32_e32 v4, 0 7899; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16 7900; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7901; GFX900-NEXT: s_waitcnt vmcnt(0) 7902; GFX900-NEXT: s_setpc_b64 s[30:31] 7903; 7904; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_8: 7905; GFX90A: ; %bb.0: 7906; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7907; GFX90A-NEXT: ;;#ASMSTART 7908; GFX90A-NEXT: ; def v[0:3] 7909; GFX90A-NEXT: ;;#ASMEND 7910; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7911; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16 7912; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7913; GFX90A-NEXT: s_waitcnt vmcnt(0) 7914; GFX90A-NEXT: s_setpc_b64 s[30:31] 7915; 7916; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_8: 7917; GFX940: ; %bb.0: 7918; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7919; GFX940-NEXT: ;;#ASMSTART 7920; GFX940-NEXT: ; def v[0:3] 7921; GFX940-NEXT: ;;#ASMEND 7922; GFX940-NEXT: v_mov_b32_e32 v4, 0 7923; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 7924; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7925; GFX940-NEXT: s_waitcnt vmcnt(0) 7926; GFX940-NEXT: s_setpc_b64 s[30:31] 7927 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7928 %vec1 = call <8 x i16> asm "; def $0", "=v"() 7929 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 8> 7930 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7931 ret void 7932} 7933 7934define void @v_shuffle_v2i16_v8i16__10_8(ptr addrspace(1) inreg %ptr) { 7935; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_8: 7936; GFX900: ; %bb.0: 7937; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7938; GFX900-NEXT: ;;#ASMSTART 7939; GFX900-NEXT: ; def v[0:3] 7940; GFX900-NEXT: ;;#ASMEND 7941; GFX900-NEXT: s_mov_b32 s4, 0x5040100 7942; GFX900-NEXT: v_mov_b32_e32 v4, 0 7943; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 7944; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7945; GFX900-NEXT: s_waitcnt vmcnt(0) 7946; GFX900-NEXT: s_setpc_b64 s[30:31] 7947; 7948; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_8: 7949; GFX90A: ; %bb.0: 7950; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7951; GFX90A-NEXT: ;;#ASMSTART 7952; GFX90A-NEXT: ; def v[0:3] 7953; GFX90A-NEXT: ;;#ASMEND 7954; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 7955; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7956; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 7957; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7958; GFX90A-NEXT: s_waitcnt vmcnt(0) 7959; GFX90A-NEXT: s_setpc_b64 s[30:31] 7960; 7961; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_8: 7962; GFX940: ; %bb.0: 7963; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7964; GFX940-NEXT: ;;#ASMSTART 7965; GFX940-NEXT: ; def v[0:3] 7966; GFX940-NEXT: ;;#ASMEND 7967; GFX940-NEXT: s_mov_b32 s2, 0x5040100 7968; GFX940-NEXT: v_mov_b32_e32 v4, 0 7969; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 7970; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7971; GFX940-NEXT: s_waitcnt vmcnt(0) 7972; GFX940-NEXT: s_setpc_b64 s[30:31] 7973 %vec0 = call <8 x i16> asm "; def $0", "=v"() 7974 %vec1 = call <8 x i16> asm "; def $0", "=v"() 7975 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 8> 7976 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 7977 ret void 7978} 7979 7980define void @v_shuffle_v2i16_v8i16__11_8(ptr addrspace(1) inreg %ptr) { 7981; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_8: 7982; GFX900: ; %bb.0: 7983; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7984; GFX900-NEXT: ;;#ASMSTART 7985; GFX900-NEXT: ; def v[0:3] 7986; GFX900-NEXT: ;;#ASMEND 7987; GFX900-NEXT: v_mov_b32_e32 v4, 0 7988; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 7989; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7990; GFX900-NEXT: s_waitcnt vmcnt(0) 7991; GFX900-NEXT: s_setpc_b64 s[30:31] 7992; 7993; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_8: 7994; GFX90A: ; %bb.0: 7995; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7996; GFX90A-NEXT: ;;#ASMSTART 7997; GFX90A-NEXT: ; def v[0:3] 7998; GFX90A-NEXT: ;;#ASMEND 7999; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8000; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 8001; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8002; GFX90A-NEXT: s_waitcnt vmcnt(0) 8003; GFX90A-NEXT: s_setpc_b64 s[30:31] 8004; 8005; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_8: 8006; GFX940: ; %bb.0: 8007; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8008; GFX940-NEXT: ;;#ASMSTART 8009; GFX940-NEXT: ; def v[0:3] 8010; GFX940-NEXT: ;;#ASMEND 8011; GFX940-NEXT: v_mov_b32_e32 v4, 0 8012; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 8013; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8014; GFX940-NEXT: s_waitcnt vmcnt(0) 8015; GFX940-NEXT: s_setpc_b64 s[30:31] 8016 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8017 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8018 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 8> 8019 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8020 ret void 8021} 8022 8023define void @v_shuffle_v2i16_v8i16__12_8(ptr addrspace(1) inreg %ptr) { 8024; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_8: 8025; GFX900: ; %bb.0: 8026; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8027; GFX900-NEXT: ;;#ASMSTART 8028; GFX900-NEXT: ; def v[0:3] 8029; GFX900-NEXT: ;;#ASMEND 8030; GFX900-NEXT: s_mov_b32 s4, 0x5040100 8031; GFX900-NEXT: v_mov_b32_e32 v4, 0 8032; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 8033; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8034; GFX900-NEXT: s_waitcnt vmcnt(0) 8035; GFX900-NEXT: s_setpc_b64 s[30:31] 8036; 8037; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_8: 8038; GFX90A: ; %bb.0: 8039; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8040; GFX90A-NEXT: ;;#ASMSTART 8041; GFX90A-NEXT: ; def v[0:3] 8042; GFX90A-NEXT: ;;#ASMEND 8043; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 8044; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8045; GFX90A-NEXT: v_perm_b32 v0, v0, v2, s4 8046; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8047; GFX90A-NEXT: s_waitcnt vmcnt(0) 8048; GFX90A-NEXT: s_setpc_b64 s[30:31] 8049; 8050; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_8: 8051; GFX940: ; %bb.0: 8052; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8053; GFX940-NEXT: ;;#ASMSTART 8054; GFX940-NEXT: ; def v[0:3] 8055; GFX940-NEXT: ;;#ASMEND 8056; GFX940-NEXT: s_mov_b32 s2, 0x5040100 8057; GFX940-NEXT: v_mov_b32_e32 v4, 0 8058; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 8059; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8060; GFX940-NEXT: s_waitcnt vmcnt(0) 8061; GFX940-NEXT: s_setpc_b64 s[30:31] 8062 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8063 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8064 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 8> 8065 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8066 ret void 8067} 8068 8069define void @v_shuffle_v2i16_v8i16__13_8(ptr addrspace(1) inreg %ptr) { 8070; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_8: 8071; GFX900: ; %bb.0: 8072; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8073; GFX900-NEXT: ;;#ASMSTART 8074; GFX900-NEXT: ; def v[0:3] 8075; GFX900-NEXT: ;;#ASMEND 8076; GFX900-NEXT: v_mov_b32_e32 v4, 0 8077; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 8078; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8079; GFX900-NEXT: s_waitcnt vmcnt(0) 8080; GFX900-NEXT: s_setpc_b64 s[30:31] 8081; 8082; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_8: 8083; GFX90A: ; %bb.0: 8084; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8085; GFX90A-NEXT: ;;#ASMSTART 8086; GFX90A-NEXT: ; def v[0:3] 8087; GFX90A-NEXT: ;;#ASMEND 8088; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8089; GFX90A-NEXT: v_alignbit_b32 v0, v0, v2, 16 8090; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8091; GFX90A-NEXT: s_waitcnt vmcnt(0) 8092; GFX90A-NEXT: s_setpc_b64 s[30:31] 8093; 8094; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_8: 8095; GFX940: ; %bb.0: 8096; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8097; GFX940-NEXT: ;;#ASMSTART 8098; GFX940-NEXT: ; def v[0:3] 8099; GFX940-NEXT: ;;#ASMEND 8100; GFX940-NEXT: v_mov_b32_e32 v4, 0 8101; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 8102; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8103; GFX940-NEXT: s_waitcnt vmcnt(0) 8104; GFX940-NEXT: s_setpc_b64 s[30:31] 8105 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8106 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8107 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 8> 8108 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8109 ret void 8110} 8111 8112define void @v_shuffle_v2i16_v8i16__14_8(ptr addrspace(1) inreg %ptr) { 8113; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_8: 8114; GFX900: ; %bb.0: 8115; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8116; GFX900-NEXT: ;;#ASMSTART 8117; GFX900-NEXT: ; def v[0:3] 8118; GFX900-NEXT: ;;#ASMEND 8119; GFX900-NEXT: s_mov_b32 s4, 0x5040100 8120; GFX900-NEXT: v_mov_b32_e32 v4, 0 8121; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 8122; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8123; GFX900-NEXT: s_waitcnt vmcnt(0) 8124; GFX900-NEXT: s_setpc_b64 s[30:31] 8125; 8126; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_8: 8127; GFX90A: ; %bb.0: 8128; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8129; GFX90A-NEXT: ;;#ASMSTART 8130; GFX90A-NEXT: ; def v[0:3] 8131; GFX90A-NEXT: ;;#ASMEND 8132; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 8133; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8134; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 8135; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8136; GFX90A-NEXT: s_waitcnt vmcnt(0) 8137; GFX90A-NEXT: s_setpc_b64 s[30:31] 8138; 8139; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_8: 8140; GFX940: ; %bb.0: 8141; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8142; GFX940-NEXT: ;;#ASMSTART 8143; GFX940-NEXT: ; def v[0:3] 8144; GFX940-NEXT: ;;#ASMEND 8145; GFX940-NEXT: s_mov_b32 s2, 0x5040100 8146; GFX940-NEXT: v_mov_b32_e32 v4, 0 8147; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 8148; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8149; GFX940-NEXT: s_waitcnt vmcnt(0) 8150; GFX940-NEXT: s_setpc_b64 s[30:31] 8151 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8152 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8153 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 8> 8154 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8155 ret void 8156} 8157 8158define void @v_shuffle_v2i16_v8i16__u_9(ptr addrspace(1) inreg %ptr) { 8159; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_9: 8160; GFX900: ; %bb.0: 8161; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8162; GFX900-NEXT: v_mov_b32_e32 v4, 0 8163; GFX900-NEXT: ;;#ASMSTART 8164; GFX900-NEXT: ; def v[0:3] 8165; GFX900-NEXT: ;;#ASMEND 8166; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8167; GFX900-NEXT: s_waitcnt vmcnt(0) 8168; GFX900-NEXT: s_setpc_b64 s[30:31] 8169; 8170; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_9: 8171; GFX90A: ; %bb.0: 8172; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8173; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8174; GFX90A-NEXT: ;;#ASMSTART 8175; GFX90A-NEXT: ; def v[0:3] 8176; GFX90A-NEXT: ;;#ASMEND 8177; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8178; GFX90A-NEXT: s_waitcnt vmcnt(0) 8179; GFX90A-NEXT: s_setpc_b64 s[30:31] 8180; 8181; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_9: 8182; GFX940: ; %bb.0: 8183; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8184; GFX940-NEXT: v_mov_b32_e32 v4, 0 8185; GFX940-NEXT: ;;#ASMSTART 8186; GFX940-NEXT: ; def v[0:3] 8187; GFX940-NEXT: ;;#ASMEND 8188; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8189; GFX940-NEXT: s_waitcnt vmcnt(0) 8190; GFX940-NEXT: s_setpc_b64 s[30:31] 8191 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8192 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8193 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 9> 8194 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8195 ret void 8196} 8197 8198define void @v_shuffle_v2i16_v8i16__0_9(ptr addrspace(1) inreg %ptr) { 8199; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_9: 8200; GFX900: ; %bb.0: 8201; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8202; GFX900-NEXT: ;;#ASMSTART 8203; GFX900-NEXT: ; def v[0:3] 8204; GFX900-NEXT: ;;#ASMEND 8205; GFX900-NEXT: s_mov_b32 s4, 0xffff 8206; GFX900-NEXT: v_mov_b32_e32 v5, 0 8207; GFX900-NEXT: ;;#ASMSTART 8208; GFX900-NEXT: ; def v[1:4] 8209; GFX900-NEXT: ;;#ASMEND 8210; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 8211; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 8212; GFX900-NEXT: s_waitcnt vmcnt(0) 8213; GFX900-NEXT: s_setpc_b64 s[30:31] 8214; 8215; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_9: 8216; GFX90A: ; %bb.0: 8217; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8218; GFX90A-NEXT: ;;#ASMSTART 8219; GFX90A-NEXT: ; def v[0:3] 8220; GFX90A-NEXT: ;;#ASMEND 8221; GFX90A-NEXT: s_mov_b32 s4, 0xffff 8222; GFX90A-NEXT: v_mov_b32_e32 v6, 0 8223; GFX90A-NEXT: ;;#ASMSTART 8224; GFX90A-NEXT: ; def v[2:5] 8225; GFX90A-NEXT: ;;#ASMEND 8226; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2 8227; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 8228; GFX90A-NEXT: s_waitcnt vmcnt(0) 8229; GFX90A-NEXT: s_setpc_b64 s[30:31] 8230; 8231; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_9: 8232; GFX940: ; %bb.0: 8233; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8234; GFX940-NEXT: ;;#ASMSTART 8235; GFX940-NEXT: ; def v[0:3] 8236; GFX940-NEXT: ;;#ASMEND 8237; GFX940-NEXT: s_mov_b32 s2, 0xffff 8238; GFX940-NEXT: v_mov_b32_e32 v6, 0 8239; GFX940-NEXT: ;;#ASMSTART 8240; GFX940-NEXT: ; def v[2:5] 8241; GFX940-NEXT: ;;#ASMEND 8242; GFX940-NEXT: s_nop 0 8243; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 8244; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 8245; GFX940-NEXT: s_waitcnt vmcnt(0) 8246; GFX940-NEXT: s_setpc_b64 s[30:31] 8247 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8248 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8249 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 9> 8250 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8251 ret void 8252} 8253 8254define void @v_shuffle_v2i16_v8i16__1_9(ptr addrspace(1) inreg %ptr) { 8255; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_9: 8256; GFX900: ; %bb.0: 8257; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8258; GFX900-NEXT: ;;#ASMSTART 8259; GFX900-NEXT: ; def v[0:3] 8260; GFX900-NEXT: ;;#ASMEND 8261; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8262; GFX900-NEXT: v_mov_b32_e32 v5, 0 8263; GFX900-NEXT: ;;#ASMSTART 8264; GFX900-NEXT: ; def v[1:4] 8265; GFX900-NEXT: ;;#ASMEND 8266; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 8267; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 8268; GFX900-NEXT: s_waitcnt vmcnt(0) 8269; GFX900-NEXT: s_setpc_b64 s[30:31] 8270; 8271; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_9: 8272; GFX90A: ; %bb.0: 8273; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8274; GFX90A-NEXT: ;;#ASMSTART 8275; GFX90A-NEXT: ; def v[0:3] 8276; GFX90A-NEXT: ;;#ASMEND 8277; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8278; GFX90A-NEXT: v_mov_b32_e32 v6, 0 8279; GFX90A-NEXT: ;;#ASMSTART 8280; GFX90A-NEXT: ; def v[2:5] 8281; GFX90A-NEXT: ;;#ASMEND 8282; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 8283; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 8284; GFX90A-NEXT: s_waitcnt vmcnt(0) 8285; GFX90A-NEXT: s_setpc_b64 s[30:31] 8286; 8287; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_9: 8288; GFX940: ; %bb.0: 8289; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8290; GFX940-NEXT: ;;#ASMSTART 8291; GFX940-NEXT: ; def v[0:3] 8292; GFX940-NEXT: ;;#ASMEND 8293; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8294; GFX940-NEXT: v_mov_b32_e32 v6, 0 8295; GFX940-NEXT: ;;#ASMSTART 8296; GFX940-NEXT: ; def v[2:5] 8297; GFX940-NEXT: ;;#ASMEND 8298; GFX940-NEXT: s_nop 0 8299; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 8300; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 8301; GFX940-NEXT: s_waitcnt vmcnt(0) 8302; GFX940-NEXT: s_setpc_b64 s[30:31] 8303 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8304 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8305 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 9> 8306 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8307 ret void 8308} 8309 8310define void @v_shuffle_v2i16_v8i16__2_9(ptr addrspace(1) inreg %ptr) { 8311; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_9: 8312; GFX900: ; %bb.0: 8313; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8314; GFX900-NEXT: ;;#ASMSTART 8315; GFX900-NEXT: ; def v[0:3] 8316; GFX900-NEXT: ;;#ASMEND 8317; GFX900-NEXT: s_mov_b32 s4, 0xffff 8318; GFX900-NEXT: v_mov_b32_e32 v6, 0 8319; GFX900-NEXT: ;;#ASMSTART 8320; GFX900-NEXT: ; def v[2:5] 8321; GFX900-NEXT: ;;#ASMEND 8322; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2 8323; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 8324; GFX900-NEXT: s_waitcnt vmcnt(0) 8325; GFX900-NEXT: s_setpc_b64 s[30:31] 8326; 8327; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_9: 8328; GFX90A: ; %bb.0: 8329; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8330; GFX90A-NEXT: ;;#ASMSTART 8331; GFX90A-NEXT: ; def v[0:3] 8332; GFX90A-NEXT: ;;#ASMEND 8333; GFX90A-NEXT: s_mov_b32 s4, 0xffff 8334; GFX90A-NEXT: v_mov_b32_e32 v6, 0 8335; GFX90A-NEXT: ;;#ASMSTART 8336; GFX90A-NEXT: ; def v[2:5] 8337; GFX90A-NEXT: ;;#ASMEND 8338; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2 8339; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 8340; GFX90A-NEXT: s_waitcnt vmcnt(0) 8341; GFX90A-NEXT: s_setpc_b64 s[30:31] 8342; 8343; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_9: 8344; GFX940: ; %bb.0: 8345; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8346; GFX940-NEXT: ;;#ASMSTART 8347; GFX940-NEXT: ; def v[0:3] 8348; GFX940-NEXT: ;;#ASMEND 8349; GFX940-NEXT: s_mov_b32 s2, 0xffff 8350; GFX940-NEXT: v_mov_b32_e32 v6, 0 8351; GFX940-NEXT: ;;#ASMSTART 8352; GFX940-NEXT: ; def v[2:5] 8353; GFX940-NEXT: ;;#ASMEND 8354; GFX940-NEXT: s_nop 0 8355; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 8356; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 8357; GFX940-NEXT: s_waitcnt vmcnt(0) 8358; GFX940-NEXT: s_setpc_b64 s[30:31] 8359 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8360 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8361 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 9> 8362 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8363 ret void 8364} 8365 8366define void @v_shuffle_v2i16_v8i16__3_9(ptr addrspace(1) inreg %ptr) { 8367; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_9: 8368; GFX900: ; %bb.0: 8369; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8370; GFX900-NEXT: ;;#ASMSTART 8371; GFX900-NEXT: ; def v[0:3] 8372; GFX900-NEXT: ;;#ASMEND 8373; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8374; GFX900-NEXT: v_mov_b32_e32 v6, 0 8375; GFX900-NEXT: ;;#ASMSTART 8376; GFX900-NEXT: ; def v[2:5] 8377; GFX900-NEXT: ;;#ASMEND 8378; GFX900-NEXT: v_perm_b32 v0, v2, v1, s4 8379; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 8380; GFX900-NEXT: s_waitcnt vmcnt(0) 8381; GFX900-NEXT: s_setpc_b64 s[30:31] 8382; 8383; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_9: 8384; GFX90A: ; %bb.0: 8385; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8386; GFX90A-NEXT: ;;#ASMSTART 8387; GFX90A-NEXT: ; def v[0:3] 8388; GFX90A-NEXT: ;;#ASMEND 8389; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8390; GFX90A-NEXT: v_mov_b32_e32 v6, 0 8391; GFX90A-NEXT: ;;#ASMSTART 8392; GFX90A-NEXT: ; def v[2:5] 8393; GFX90A-NEXT: ;;#ASMEND 8394; GFX90A-NEXT: v_perm_b32 v0, v2, v1, s4 8395; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 8396; GFX90A-NEXT: s_waitcnt vmcnt(0) 8397; GFX90A-NEXT: s_setpc_b64 s[30:31] 8398; 8399; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_9: 8400; GFX940: ; %bb.0: 8401; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8402; GFX940-NEXT: ;;#ASMSTART 8403; GFX940-NEXT: ; def v[0:3] 8404; GFX940-NEXT: ;;#ASMEND 8405; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8406; GFX940-NEXT: v_mov_b32_e32 v6, 0 8407; GFX940-NEXT: ;;#ASMSTART 8408; GFX940-NEXT: ; def v[2:5] 8409; GFX940-NEXT: ;;#ASMEND 8410; GFX940-NEXT: s_nop 0 8411; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 8412; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 8413; GFX940-NEXT: s_waitcnt vmcnt(0) 8414; GFX940-NEXT: s_setpc_b64 s[30:31] 8415 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8416 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8417 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 9> 8418 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8419 ret void 8420} 8421 8422define void @v_shuffle_v2i16_v8i16__4_9(ptr addrspace(1) inreg %ptr) { 8423; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_9: 8424; GFX900: ; %bb.0: 8425; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8426; GFX900-NEXT: ;;#ASMSTART 8427; GFX900-NEXT: ; def v[0:3] 8428; GFX900-NEXT: ;;#ASMEND 8429; GFX900-NEXT: s_mov_b32 s4, 0xffff 8430; GFX900-NEXT: v_mov_b32_e32 v7, 0 8431; GFX900-NEXT: ;;#ASMSTART 8432; GFX900-NEXT: ; def v[3:6] 8433; GFX900-NEXT: ;;#ASMEND 8434; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v3 8435; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 8436; GFX900-NEXT: s_waitcnt vmcnt(0) 8437; GFX900-NEXT: s_setpc_b64 s[30:31] 8438; 8439; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_9: 8440; GFX90A: ; %bb.0: 8441; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8442; GFX90A-NEXT: ;;#ASMSTART 8443; GFX90A-NEXT: ; def v[0:3] 8444; GFX90A-NEXT: ;;#ASMEND 8445; GFX90A-NEXT: s_mov_b32 s4, 0xffff 8446; GFX90A-NEXT: v_mov_b32_e32 v8, 0 8447; GFX90A-NEXT: ;;#ASMSTART 8448; GFX90A-NEXT: ; def v[4:7] 8449; GFX90A-NEXT: ;;#ASMEND 8450; GFX90A-NEXT: v_bfi_b32 v0, s4, v2, v4 8451; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 8452; GFX90A-NEXT: s_waitcnt vmcnt(0) 8453; GFX90A-NEXT: s_setpc_b64 s[30:31] 8454; 8455; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_9: 8456; GFX940: ; %bb.0: 8457; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8458; GFX940-NEXT: ;;#ASMSTART 8459; GFX940-NEXT: ; def v[0:3] 8460; GFX940-NEXT: ;;#ASMEND 8461; GFX940-NEXT: s_mov_b32 s2, 0xffff 8462; GFX940-NEXT: v_mov_b32_e32 v8, 0 8463; GFX940-NEXT: ;;#ASMSTART 8464; GFX940-NEXT: ; def v[4:7] 8465; GFX940-NEXT: ;;#ASMEND 8466; GFX940-NEXT: s_nop 0 8467; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v4 8468; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 8469; GFX940-NEXT: s_waitcnt vmcnt(0) 8470; GFX940-NEXT: s_setpc_b64 s[30:31] 8471 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8472 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8473 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 9> 8474 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8475 ret void 8476} 8477 8478define void @v_shuffle_v2i16_v8i16__5_9(ptr addrspace(1) inreg %ptr) { 8479; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_9: 8480; GFX900: ; %bb.0: 8481; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8482; GFX900-NEXT: ;;#ASMSTART 8483; GFX900-NEXT: ; def v[0:3] 8484; GFX900-NEXT: ;;#ASMEND 8485; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8486; GFX900-NEXT: v_mov_b32_e32 v7, 0 8487; GFX900-NEXT: ;;#ASMSTART 8488; GFX900-NEXT: ; def v[3:6] 8489; GFX900-NEXT: ;;#ASMEND 8490; GFX900-NEXT: v_perm_b32 v0, v3, v2, s4 8491; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 8492; GFX900-NEXT: s_waitcnt vmcnt(0) 8493; GFX900-NEXT: s_setpc_b64 s[30:31] 8494; 8495; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_9: 8496; GFX90A: ; %bb.0: 8497; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8498; GFX90A-NEXT: ;;#ASMSTART 8499; GFX90A-NEXT: ; def v[0:3] 8500; GFX90A-NEXT: ;;#ASMEND 8501; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8502; GFX90A-NEXT: v_mov_b32_e32 v8, 0 8503; GFX90A-NEXT: ;;#ASMSTART 8504; GFX90A-NEXT: ; def v[4:7] 8505; GFX90A-NEXT: ;;#ASMEND 8506; GFX90A-NEXT: v_perm_b32 v0, v4, v2, s4 8507; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 8508; GFX90A-NEXT: s_waitcnt vmcnt(0) 8509; GFX90A-NEXT: s_setpc_b64 s[30:31] 8510; 8511; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_9: 8512; GFX940: ; %bb.0: 8513; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8514; GFX940-NEXT: ;;#ASMSTART 8515; GFX940-NEXT: ; def v[0:3] 8516; GFX940-NEXT: ;;#ASMEND 8517; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8518; GFX940-NEXT: v_mov_b32_e32 v8, 0 8519; GFX940-NEXT: ;;#ASMSTART 8520; GFX940-NEXT: ; def v[4:7] 8521; GFX940-NEXT: ;;#ASMEND 8522; GFX940-NEXT: s_nop 0 8523; GFX940-NEXT: v_perm_b32 v0, v4, v2, s2 8524; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 8525; GFX940-NEXT: s_waitcnt vmcnt(0) 8526; GFX940-NEXT: s_setpc_b64 s[30:31] 8527 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8528 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8529 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 9> 8530 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8531 ret void 8532} 8533 8534define void @v_shuffle_v2i16_v8i16__6_9(ptr addrspace(1) inreg %ptr) { 8535; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_9: 8536; GFX900: ; %bb.0: 8537; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8538; GFX900-NEXT: ;;#ASMSTART 8539; GFX900-NEXT: ; def v[0:3] 8540; GFX900-NEXT: ;;#ASMEND 8541; GFX900-NEXT: s_mov_b32 s4, 0xffff 8542; GFX900-NEXT: v_mov_b32_e32 v8, 0 8543; GFX900-NEXT: ;;#ASMSTART 8544; GFX900-NEXT: ; def v[4:7] 8545; GFX900-NEXT: ;;#ASMEND 8546; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v4 8547; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 8548; GFX900-NEXT: s_waitcnt vmcnt(0) 8549; GFX900-NEXT: s_setpc_b64 s[30:31] 8550; 8551; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_9: 8552; GFX90A: ; %bb.0: 8553; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8554; GFX90A-NEXT: ;;#ASMSTART 8555; GFX90A-NEXT: ; def v[0:3] 8556; GFX90A-NEXT: ;;#ASMEND 8557; GFX90A-NEXT: s_mov_b32 s4, 0xffff 8558; GFX90A-NEXT: v_mov_b32_e32 v8, 0 8559; GFX90A-NEXT: ;;#ASMSTART 8560; GFX90A-NEXT: ; def v[4:7] 8561; GFX90A-NEXT: ;;#ASMEND 8562; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v4 8563; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 8564; GFX90A-NEXT: s_waitcnt vmcnt(0) 8565; GFX90A-NEXT: s_setpc_b64 s[30:31] 8566; 8567; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_9: 8568; GFX940: ; %bb.0: 8569; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8570; GFX940-NEXT: ;;#ASMSTART 8571; GFX940-NEXT: ; def v[0:3] 8572; GFX940-NEXT: ;;#ASMEND 8573; GFX940-NEXT: s_mov_b32 s2, 0xffff 8574; GFX940-NEXT: v_mov_b32_e32 v8, 0 8575; GFX940-NEXT: ;;#ASMSTART 8576; GFX940-NEXT: ; def v[4:7] 8577; GFX940-NEXT: ;;#ASMEND 8578; GFX940-NEXT: s_nop 0 8579; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v4 8580; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 8581; GFX940-NEXT: s_waitcnt vmcnt(0) 8582; GFX940-NEXT: s_setpc_b64 s[30:31] 8583 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8584 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8585 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 9> 8586 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8587 ret void 8588} 8589 8590define void @v_shuffle_v2i16_v8i16__7_9(ptr addrspace(1) inreg %ptr) { 8591; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_9: 8592; GFX900: ; %bb.0: 8593; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8594; GFX900-NEXT: ;;#ASMSTART 8595; GFX900-NEXT: ; def v[0:3] 8596; GFX900-NEXT: ;;#ASMEND 8597; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8598; GFX900-NEXT: v_mov_b32_e32 v8, 0 8599; GFX900-NEXT: ;;#ASMSTART 8600; GFX900-NEXT: ; def v[4:7] 8601; GFX900-NEXT: ;;#ASMEND 8602; GFX900-NEXT: v_perm_b32 v0, v4, v3, s4 8603; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 8604; GFX900-NEXT: s_waitcnt vmcnt(0) 8605; GFX900-NEXT: s_setpc_b64 s[30:31] 8606; 8607; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_9: 8608; GFX90A: ; %bb.0: 8609; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8610; GFX90A-NEXT: ;;#ASMSTART 8611; GFX90A-NEXT: ; def v[0:3] 8612; GFX90A-NEXT: ;;#ASMEND 8613; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8614; GFX90A-NEXT: v_mov_b32_e32 v8, 0 8615; GFX90A-NEXT: ;;#ASMSTART 8616; GFX90A-NEXT: ; def v[4:7] 8617; GFX90A-NEXT: ;;#ASMEND 8618; GFX90A-NEXT: v_perm_b32 v0, v4, v3, s4 8619; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 8620; GFX90A-NEXT: s_waitcnt vmcnt(0) 8621; GFX90A-NEXT: s_setpc_b64 s[30:31] 8622; 8623; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_9: 8624; GFX940: ; %bb.0: 8625; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8626; GFX940-NEXT: ;;#ASMSTART 8627; GFX940-NEXT: ; def v[0:3] 8628; GFX940-NEXT: ;;#ASMEND 8629; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8630; GFX940-NEXT: v_mov_b32_e32 v8, 0 8631; GFX940-NEXT: ;;#ASMSTART 8632; GFX940-NEXT: ; def v[4:7] 8633; GFX940-NEXT: ;;#ASMEND 8634; GFX940-NEXT: s_nop 0 8635; GFX940-NEXT: v_perm_b32 v0, v4, v3, s2 8636; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 8637; GFX940-NEXT: s_waitcnt vmcnt(0) 8638; GFX940-NEXT: s_setpc_b64 s[30:31] 8639 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8640 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8641 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 9> 8642 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8643 ret void 8644} 8645 8646define void @v_shuffle_v2i16_v8i16__8_9(ptr addrspace(1) inreg %ptr) { 8647; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_9: 8648; GFX900: ; %bb.0: 8649; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8650; GFX900-NEXT: v_mov_b32_e32 v4, 0 8651; GFX900-NEXT: ;;#ASMSTART 8652; GFX900-NEXT: ; def v[0:3] 8653; GFX900-NEXT: ;;#ASMEND 8654; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8655; GFX900-NEXT: s_waitcnt vmcnt(0) 8656; GFX900-NEXT: s_setpc_b64 s[30:31] 8657; 8658; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_9: 8659; GFX90A: ; %bb.0: 8660; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8661; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8662; GFX90A-NEXT: ;;#ASMSTART 8663; GFX90A-NEXT: ; def v[0:3] 8664; GFX90A-NEXT: ;;#ASMEND 8665; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8666; GFX90A-NEXT: s_waitcnt vmcnt(0) 8667; GFX90A-NEXT: s_setpc_b64 s[30:31] 8668; 8669; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_9: 8670; GFX940: ; %bb.0: 8671; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8672; GFX940-NEXT: v_mov_b32_e32 v4, 0 8673; GFX940-NEXT: ;;#ASMSTART 8674; GFX940-NEXT: ; def v[0:3] 8675; GFX940-NEXT: ;;#ASMEND 8676; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8677; GFX940-NEXT: s_waitcnt vmcnt(0) 8678; GFX940-NEXT: s_setpc_b64 s[30:31] 8679 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8680 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8681 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 9> 8682 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8683 ret void 8684} 8685 8686define void @v_shuffle_v2i16_v8i16__9_9(ptr addrspace(1) inreg %ptr) { 8687; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_9: 8688; GFX900: ; %bb.0: 8689; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8690; GFX900-NEXT: ;;#ASMSTART 8691; GFX900-NEXT: ; def v[0:3] 8692; GFX900-NEXT: ;;#ASMEND 8693; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8694; GFX900-NEXT: v_mov_b32_e32 v4, 0 8695; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 8696; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8697; GFX900-NEXT: s_waitcnt vmcnt(0) 8698; GFX900-NEXT: s_setpc_b64 s[30:31] 8699; 8700; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_9: 8701; GFX90A: ; %bb.0: 8702; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8703; GFX90A-NEXT: ;;#ASMSTART 8704; GFX90A-NEXT: ; def v[0:3] 8705; GFX90A-NEXT: ;;#ASMEND 8706; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8707; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8708; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 8709; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8710; GFX90A-NEXT: s_waitcnt vmcnt(0) 8711; GFX90A-NEXT: s_setpc_b64 s[30:31] 8712; 8713; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_9: 8714; GFX940: ; %bb.0: 8715; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8716; GFX940-NEXT: ;;#ASMSTART 8717; GFX940-NEXT: ; def v[0:3] 8718; GFX940-NEXT: ;;#ASMEND 8719; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8720; GFX940-NEXT: v_mov_b32_e32 v4, 0 8721; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 8722; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8723; GFX940-NEXT: s_waitcnt vmcnt(0) 8724; GFX940-NEXT: s_setpc_b64 s[30:31] 8725 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8726 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8727 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 9> 8728 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8729 ret void 8730} 8731 8732define void @v_shuffle_v2i16_v8i16__10_9(ptr addrspace(1) inreg %ptr) { 8733; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_9: 8734; GFX900: ; %bb.0: 8735; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8736; GFX900-NEXT: ;;#ASMSTART 8737; GFX900-NEXT: ; def v[0:3] 8738; GFX900-NEXT: ;;#ASMEND 8739; GFX900-NEXT: s_mov_b32 s4, 0xffff 8740; GFX900-NEXT: v_mov_b32_e32 v4, 0 8741; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0 8742; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8743; GFX900-NEXT: s_waitcnt vmcnt(0) 8744; GFX900-NEXT: s_setpc_b64 s[30:31] 8745; 8746; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_9: 8747; GFX90A: ; %bb.0: 8748; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8749; GFX90A-NEXT: ;;#ASMSTART 8750; GFX90A-NEXT: ; def v[0:3] 8751; GFX90A-NEXT: ;;#ASMEND 8752; GFX90A-NEXT: s_mov_b32 s4, 0xffff 8753; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8754; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0 8755; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8756; GFX90A-NEXT: s_waitcnt vmcnt(0) 8757; GFX90A-NEXT: s_setpc_b64 s[30:31] 8758; 8759; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_9: 8760; GFX940: ; %bb.0: 8761; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8762; GFX940-NEXT: ;;#ASMSTART 8763; GFX940-NEXT: ; def v[0:3] 8764; GFX940-NEXT: ;;#ASMEND 8765; GFX940-NEXT: s_mov_b32 s2, 0xffff 8766; GFX940-NEXT: v_mov_b32_e32 v4, 0 8767; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 8768; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8769; GFX940-NEXT: s_waitcnt vmcnt(0) 8770; GFX940-NEXT: s_setpc_b64 s[30:31] 8771 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8772 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8773 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 9> 8774 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8775 ret void 8776} 8777 8778define void @v_shuffle_v2i16_v8i16__11_9(ptr addrspace(1) inreg %ptr) { 8779; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_9: 8780; GFX900: ; %bb.0: 8781; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8782; GFX900-NEXT: ;;#ASMSTART 8783; GFX900-NEXT: ; def v[0:3] 8784; GFX900-NEXT: ;;#ASMEND 8785; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8786; GFX900-NEXT: v_mov_b32_e32 v4, 0 8787; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 8788; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8789; GFX900-NEXT: s_waitcnt vmcnt(0) 8790; GFX900-NEXT: s_setpc_b64 s[30:31] 8791; 8792; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_9: 8793; GFX90A: ; %bb.0: 8794; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8795; GFX90A-NEXT: ;;#ASMSTART 8796; GFX90A-NEXT: ; def v[0:3] 8797; GFX90A-NEXT: ;;#ASMEND 8798; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8799; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8800; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 8801; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8802; GFX90A-NEXT: s_waitcnt vmcnt(0) 8803; GFX90A-NEXT: s_setpc_b64 s[30:31] 8804; 8805; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_9: 8806; GFX940: ; %bb.0: 8807; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8808; GFX940-NEXT: ;;#ASMSTART 8809; GFX940-NEXT: ; def v[0:3] 8810; GFX940-NEXT: ;;#ASMEND 8811; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8812; GFX940-NEXT: v_mov_b32_e32 v4, 0 8813; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 8814; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8815; GFX940-NEXT: s_waitcnt vmcnt(0) 8816; GFX940-NEXT: s_setpc_b64 s[30:31] 8817 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8818 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8819 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 9> 8820 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8821 ret void 8822} 8823 8824define void @v_shuffle_v2i16_v8i16__12_9(ptr addrspace(1) inreg %ptr) { 8825; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_9: 8826; GFX900: ; %bb.0: 8827; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8828; GFX900-NEXT: ;;#ASMSTART 8829; GFX900-NEXT: ; def v[0:3] 8830; GFX900-NEXT: ;;#ASMEND 8831; GFX900-NEXT: s_mov_b32 s4, 0xffff 8832; GFX900-NEXT: v_mov_b32_e32 v4, 0 8833; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 8834; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8835; GFX900-NEXT: s_waitcnt vmcnt(0) 8836; GFX900-NEXT: s_setpc_b64 s[30:31] 8837; 8838; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_9: 8839; GFX90A: ; %bb.0: 8840; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8841; GFX90A-NEXT: ;;#ASMSTART 8842; GFX90A-NEXT: ; def v[0:3] 8843; GFX90A-NEXT: ;;#ASMEND 8844; GFX90A-NEXT: s_mov_b32 s4, 0xffff 8845; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8846; GFX90A-NEXT: v_bfi_b32 v0, s4, v2, v0 8847; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8848; GFX90A-NEXT: s_waitcnt vmcnt(0) 8849; GFX90A-NEXT: s_setpc_b64 s[30:31] 8850; 8851; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_9: 8852; GFX940: ; %bb.0: 8853; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8854; GFX940-NEXT: ;;#ASMSTART 8855; GFX940-NEXT: ; def v[0:3] 8856; GFX940-NEXT: ;;#ASMEND 8857; GFX940-NEXT: s_mov_b32 s2, 0xffff 8858; GFX940-NEXT: v_mov_b32_e32 v4, 0 8859; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v0 8860; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8861; GFX940-NEXT: s_waitcnt vmcnt(0) 8862; GFX940-NEXT: s_setpc_b64 s[30:31] 8863 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8864 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8865 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 9> 8866 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8867 ret void 8868} 8869 8870define void @v_shuffle_v2i16_v8i16__13_9(ptr addrspace(1) inreg %ptr) { 8871; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_9: 8872; GFX900: ; %bb.0: 8873; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8874; GFX900-NEXT: ;;#ASMSTART 8875; GFX900-NEXT: ; def v[0:3] 8876; GFX900-NEXT: ;;#ASMEND 8877; GFX900-NEXT: s_mov_b32 s4, 0x7060302 8878; GFX900-NEXT: v_mov_b32_e32 v4, 0 8879; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 8880; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8881; GFX900-NEXT: s_waitcnt vmcnt(0) 8882; GFX900-NEXT: s_setpc_b64 s[30:31] 8883; 8884; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_9: 8885; GFX90A: ; %bb.0: 8886; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8887; GFX90A-NEXT: ;;#ASMSTART 8888; GFX90A-NEXT: ; def v[0:3] 8889; GFX90A-NEXT: ;;#ASMEND 8890; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 8891; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8892; GFX90A-NEXT: v_perm_b32 v0, v0, v2, s4 8893; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8894; GFX90A-NEXT: s_waitcnt vmcnt(0) 8895; GFX90A-NEXT: s_setpc_b64 s[30:31] 8896; 8897; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_9: 8898; GFX940: ; %bb.0: 8899; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8900; GFX940-NEXT: ;;#ASMSTART 8901; GFX940-NEXT: ; def v[0:3] 8902; GFX940-NEXT: ;;#ASMEND 8903; GFX940-NEXT: s_mov_b32 s2, 0x7060302 8904; GFX940-NEXT: v_mov_b32_e32 v4, 0 8905; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 8906; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8907; GFX940-NEXT: s_waitcnt vmcnt(0) 8908; GFX940-NEXT: s_setpc_b64 s[30:31] 8909 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8910 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8911 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 9> 8912 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8913 ret void 8914} 8915 8916define void @v_shuffle_v2i16_v8i16__14_9(ptr addrspace(1) inreg %ptr) { 8917; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_9: 8918; GFX900: ; %bb.0: 8919; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8920; GFX900-NEXT: ;;#ASMSTART 8921; GFX900-NEXT: ; def v[0:3] 8922; GFX900-NEXT: ;;#ASMEND 8923; GFX900-NEXT: s_mov_b32 s4, 0xffff 8924; GFX900-NEXT: v_mov_b32_e32 v4, 0 8925; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v0 8926; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8927; GFX900-NEXT: s_waitcnt vmcnt(0) 8928; GFX900-NEXT: s_setpc_b64 s[30:31] 8929; 8930; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_9: 8931; GFX90A: ; %bb.0: 8932; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8933; GFX90A-NEXT: ;;#ASMSTART 8934; GFX90A-NEXT: ; def v[0:3] 8935; GFX90A-NEXT: ;;#ASMEND 8936; GFX90A-NEXT: s_mov_b32 s4, 0xffff 8937; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8938; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 8939; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8940; GFX90A-NEXT: s_waitcnt vmcnt(0) 8941; GFX90A-NEXT: s_setpc_b64 s[30:31] 8942; 8943; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_9: 8944; GFX940: ; %bb.0: 8945; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8946; GFX940-NEXT: ;;#ASMSTART 8947; GFX940-NEXT: ; def v[0:3] 8948; GFX940-NEXT: ;;#ASMEND 8949; GFX940-NEXT: s_mov_b32 s2, 0xffff 8950; GFX940-NEXT: v_mov_b32_e32 v4, 0 8951; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 8952; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8953; GFX940-NEXT: s_waitcnt vmcnt(0) 8954; GFX940-NEXT: s_setpc_b64 s[30:31] 8955 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8956 %vec1 = call <8 x i16> asm "; def $0", "=v"() 8957 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 9> 8958 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 8959 ret void 8960} 8961 8962define void @v_shuffle_v2i16_v8i16__u_10(ptr addrspace(1) inreg %ptr) { 8963; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_10: 8964; GFX900: ; %bb.0: 8965; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8966; GFX900-NEXT: ;;#ASMSTART 8967; GFX900-NEXT: ; def v[0:3] 8968; GFX900-NEXT: ;;#ASMEND 8969; GFX900-NEXT: v_mov_b32_e32 v4, 0 8970; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 8971; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 8972; GFX900-NEXT: s_waitcnt vmcnt(0) 8973; GFX900-NEXT: s_setpc_b64 s[30:31] 8974; 8975; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_10: 8976; GFX90A: ; %bb.0: 8977; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8978; GFX90A-NEXT: ;;#ASMSTART 8979; GFX90A-NEXT: ; def v[0:3] 8980; GFX90A-NEXT: ;;#ASMEND 8981; GFX90A-NEXT: v_mov_b32_e32 v4, 0 8982; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 8983; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 8984; GFX90A-NEXT: s_waitcnt vmcnt(0) 8985; GFX90A-NEXT: s_setpc_b64 s[30:31] 8986; 8987; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_10: 8988; GFX940: ; %bb.0: 8989; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8990; GFX940-NEXT: ;;#ASMSTART 8991; GFX940-NEXT: ; def v[0:3] 8992; GFX940-NEXT: ;;#ASMEND 8993; GFX940-NEXT: v_mov_b32_e32 v4, 0 8994; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 8995; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 8996; GFX940-NEXT: s_waitcnt vmcnt(0) 8997; GFX940-NEXT: s_setpc_b64 s[30:31] 8998 %vec0 = call <8 x i16> asm "; def $0", "=v"() 8999 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9000 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 10> 9001 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9002 ret void 9003} 9004 9005define void @v_shuffle_v2i16_v8i16__0_10(ptr addrspace(1) inreg %ptr) { 9006; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_10: 9007; GFX900: ; %bb.0: 9008; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9009; GFX900-NEXT: ;;#ASMSTART 9010; GFX900-NEXT: ; def v[0:3] 9011; GFX900-NEXT: ;;#ASMEND 9012; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9013; GFX900-NEXT: v_mov_b32_e32 v5, 0 9014; GFX900-NEXT: ;;#ASMSTART 9015; GFX900-NEXT: ; def v[1:4] 9016; GFX900-NEXT: ;;#ASMEND 9017; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 9018; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 9019; GFX900-NEXT: s_waitcnt vmcnt(0) 9020; GFX900-NEXT: s_setpc_b64 s[30:31] 9021; 9022; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_10: 9023; GFX90A: ; %bb.0: 9024; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9025; GFX90A-NEXT: ;;#ASMSTART 9026; GFX90A-NEXT: ; def v[0:3] 9027; GFX90A-NEXT: ;;#ASMEND 9028; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9029; GFX90A-NEXT: v_mov_b32_e32 v6, 0 9030; GFX90A-NEXT: ;;#ASMSTART 9031; GFX90A-NEXT: ; def v[2:5] 9032; GFX90A-NEXT: ;;#ASMEND 9033; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 9034; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 9035; GFX90A-NEXT: s_waitcnt vmcnt(0) 9036; GFX90A-NEXT: s_setpc_b64 s[30:31] 9037; 9038; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_10: 9039; GFX940: ; %bb.0: 9040; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9041; GFX940-NEXT: ;;#ASMSTART 9042; GFX940-NEXT: ; def v[0:3] 9043; GFX940-NEXT: ;;#ASMEND 9044; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9045; GFX940-NEXT: v_mov_b32_e32 v6, 0 9046; GFX940-NEXT: ;;#ASMSTART 9047; GFX940-NEXT: ; def v[2:5] 9048; GFX940-NEXT: ;;#ASMEND 9049; GFX940-NEXT: s_nop 0 9050; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 9051; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 9052; GFX940-NEXT: s_waitcnt vmcnt(0) 9053; GFX940-NEXT: s_setpc_b64 s[30:31] 9054 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9055 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9056 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 10> 9057 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9058 ret void 9059} 9060 9061define void @v_shuffle_v2i16_v8i16__1_10(ptr addrspace(1) inreg %ptr) { 9062; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_10: 9063; GFX900: ; %bb.0: 9064; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9065; GFX900-NEXT: ;;#ASMSTART 9066; GFX900-NEXT: ; def v[0:3] 9067; GFX900-NEXT: ;;#ASMEND 9068; GFX900-NEXT: v_mov_b32_e32 v5, 0 9069; GFX900-NEXT: ;;#ASMSTART 9070; GFX900-NEXT: ; def v[1:4] 9071; GFX900-NEXT: ;;#ASMEND 9072; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16 9073; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 9074; GFX900-NEXT: s_waitcnt vmcnt(0) 9075; GFX900-NEXT: s_setpc_b64 s[30:31] 9076; 9077; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_10: 9078; GFX90A: ; %bb.0: 9079; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9080; GFX90A-NEXT: ;;#ASMSTART 9081; GFX90A-NEXT: ; def v[0:3] 9082; GFX90A-NEXT: ;;#ASMEND 9083; GFX90A-NEXT: v_mov_b32_e32 v6, 0 9084; GFX90A-NEXT: ;;#ASMSTART 9085; GFX90A-NEXT: ; def v[2:5] 9086; GFX90A-NEXT: ;;#ASMEND 9087; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16 9088; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 9089; GFX90A-NEXT: s_waitcnt vmcnt(0) 9090; GFX90A-NEXT: s_setpc_b64 s[30:31] 9091; 9092; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_10: 9093; GFX940: ; %bb.0: 9094; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9095; GFX940-NEXT: ;;#ASMSTART 9096; GFX940-NEXT: ; def v[0:3] 9097; GFX940-NEXT: ;;#ASMEND 9098; GFX940-NEXT: v_mov_b32_e32 v6, 0 9099; GFX940-NEXT: ;;#ASMSTART 9100; GFX940-NEXT: ; def v[2:5] 9101; GFX940-NEXT: ;;#ASMEND 9102; GFX940-NEXT: s_nop 0 9103; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 9104; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 9105; GFX940-NEXT: s_waitcnt vmcnt(0) 9106; GFX940-NEXT: s_setpc_b64 s[30:31] 9107 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9108 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9109 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 10> 9110 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9111 ret void 9112} 9113 9114define void @v_shuffle_v2i16_v8i16__2_10(ptr addrspace(1) inreg %ptr) { 9115; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_10: 9116; GFX900: ; %bb.0: 9117; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9118; GFX900-NEXT: ;;#ASMSTART 9119; GFX900-NEXT: ; def v[0:3] 9120; GFX900-NEXT: ;;#ASMEND 9121; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9122; GFX900-NEXT: v_mov_b32_e32 v6, 0 9123; GFX900-NEXT: ;;#ASMSTART 9124; GFX900-NEXT: ; def v[2:5] 9125; GFX900-NEXT: ;;#ASMEND 9126; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 9127; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 9128; GFX900-NEXT: s_waitcnt vmcnt(0) 9129; GFX900-NEXT: s_setpc_b64 s[30:31] 9130; 9131; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_10: 9132; GFX90A: ; %bb.0: 9133; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9134; GFX90A-NEXT: ;;#ASMSTART 9135; GFX90A-NEXT: ; def v[0:3] 9136; GFX90A-NEXT: ;;#ASMEND 9137; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9138; GFX90A-NEXT: v_mov_b32_e32 v6, 0 9139; GFX90A-NEXT: ;;#ASMSTART 9140; GFX90A-NEXT: ; def v[2:5] 9141; GFX90A-NEXT: ;;#ASMEND 9142; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 9143; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 9144; GFX90A-NEXT: s_waitcnt vmcnt(0) 9145; GFX90A-NEXT: s_setpc_b64 s[30:31] 9146; 9147; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_10: 9148; GFX940: ; %bb.0: 9149; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9150; GFX940-NEXT: ;;#ASMSTART 9151; GFX940-NEXT: ; def v[0:3] 9152; GFX940-NEXT: ;;#ASMEND 9153; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9154; GFX940-NEXT: v_mov_b32_e32 v6, 0 9155; GFX940-NEXT: ;;#ASMSTART 9156; GFX940-NEXT: ; def v[2:5] 9157; GFX940-NEXT: ;;#ASMEND 9158; GFX940-NEXT: s_nop 0 9159; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 9160; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 9161; GFX940-NEXT: s_waitcnt vmcnt(0) 9162; GFX940-NEXT: s_setpc_b64 s[30:31] 9163 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9164 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9165 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 10> 9166 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9167 ret void 9168} 9169 9170define void @v_shuffle_v2i16_v8i16__3_10(ptr addrspace(1) inreg %ptr) { 9171; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_10: 9172; GFX900: ; %bb.0: 9173; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9174; GFX900-NEXT: ;;#ASMSTART 9175; GFX900-NEXT: ; def v[0:3] 9176; GFX900-NEXT: ;;#ASMEND 9177; GFX900-NEXT: v_mov_b32_e32 v6, 0 9178; GFX900-NEXT: ;;#ASMSTART 9179; GFX900-NEXT: ; def v[2:5] 9180; GFX900-NEXT: ;;#ASMEND 9181; GFX900-NEXT: v_alignbit_b32 v0, v3, v1, 16 9182; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 9183; GFX900-NEXT: s_waitcnt vmcnt(0) 9184; GFX900-NEXT: s_setpc_b64 s[30:31] 9185; 9186; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_10: 9187; GFX90A: ; %bb.0: 9188; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9189; GFX90A-NEXT: ;;#ASMSTART 9190; GFX90A-NEXT: ; def v[0:3] 9191; GFX90A-NEXT: ;;#ASMEND 9192; GFX90A-NEXT: v_mov_b32_e32 v6, 0 9193; GFX90A-NEXT: ;;#ASMSTART 9194; GFX90A-NEXT: ; def v[2:5] 9195; GFX90A-NEXT: ;;#ASMEND 9196; GFX90A-NEXT: v_alignbit_b32 v0, v3, v1, 16 9197; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 9198; GFX90A-NEXT: s_waitcnt vmcnt(0) 9199; GFX90A-NEXT: s_setpc_b64 s[30:31] 9200; 9201; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_10: 9202; GFX940: ; %bb.0: 9203; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9204; GFX940-NEXT: ;;#ASMSTART 9205; GFX940-NEXT: ; def v[0:3] 9206; GFX940-NEXT: ;;#ASMEND 9207; GFX940-NEXT: v_mov_b32_e32 v6, 0 9208; GFX940-NEXT: ;;#ASMSTART 9209; GFX940-NEXT: ; def v[2:5] 9210; GFX940-NEXT: ;;#ASMEND 9211; GFX940-NEXT: s_nop 0 9212; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 9213; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 9214; GFX940-NEXT: s_waitcnt vmcnt(0) 9215; GFX940-NEXT: s_setpc_b64 s[30:31] 9216 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9217 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9218 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 10> 9219 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9220 ret void 9221} 9222 9223define void @v_shuffle_v2i16_v8i16__4_10(ptr addrspace(1) inreg %ptr) { 9224; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_10: 9225; GFX900: ; %bb.0: 9226; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9227; GFX900-NEXT: ;;#ASMSTART 9228; GFX900-NEXT: ; def v[0:3] 9229; GFX900-NEXT: ;;#ASMEND 9230; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9231; GFX900-NEXT: v_mov_b32_e32 v7, 0 9232; GFX900-NEXT: ;;#ASMSTART 9233; GFX900-NEXT: ; def v[3:6] 9234; GFX900-NEXT: ;;#ASMEND 9235; GFX900-NEXT: v_perm_b32 v0, v4, v2, s4 9236; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 9237; GFX900-NEXT: s_waitcnt vmcnt(0) 9238; GFX900-NEXT: s_setpc_b64 s[30:31] 9239; 9240; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_10: 9241; GFX90A: ; %bb.0: 9242; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9243; GFX90A-NEXT: ;;#ASMSTART 9244; GFX90A-NEXT: ; def v[0:3] 9245; GFX90A-NEXT: ;;#ASMEND 9246; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9247; GFX90A-NEXT: v_mov_b32_e32 v8, 0 9248; GFX90A-NEXT: ;;#ASMSTART 9249; GFX90A-NEXT: ; def v[4:7] 9250; GFX90A-NEXT: ;;#ASMEND 9251; GFX90A-NEXT: v_perm_b32 v0, v5, v2, s4 9252; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 9253; GFX90A-NEXT: s_waitcnt vmcnt(0) 9254; GFX90A-NEXT: s_setpc_b64 s[30:31] 9255; 9256; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_10: 9257; GFX940: ; %bb.0: 9258; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9259; GFX940-NEXT: ;;#ASMSTART 9260; GFX940-NEXT: ; def v[0:3] 9261; GFX940-NEXT: ;;#ASMEND 9262; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9263; GFX940-NEXT: v_mov_b32_e32 v8, 0 9264; GFX940-NEXT: ;;#ASMSTART 9265; GFX940-NEXT: ; def v[4:7] 9266; GFX940-NEXT: ;;#ASMEND 9267; GFX940-NEXT: s_nop 0 9268; GFX940-NEXT: v_perm_b32 v0, v5, v2, s2 9269; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 9270; GFX940-NEXT: s_waitcnt vmcnt(0) 9271; GFX940-NEXT: s_setpc_b64 s[30:31] 9272 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9273 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9274 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 10> 9275 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9276 ret void 9277} 9278 9279define void @v_shuffle_v2i16_v8i16__5_10(ptr addrspace(1) inreg %ptr) { 9280; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_10: 9281; GFX900: ; %bb.0: 9282; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9283; GFX900-NEXT: ;;#ASMSTART 9284; GFX900-NEXT: ; def v[0:3] 9285; GFX900-NEXT: ;;#ASMEND 9286; GFX900-NEXT: v_mov_b32_e32 v7, 0 9287; GFX900-NEXT: ;;#ASMSTART 9288; GFX900-NEXT: ; def v[3:6] 9289; GFX900-NEXT: ;;#ASMEND 9290; GFX900-NEXT: v_alignbit_b32 v0, v4, v2, 16 9291; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 9292; GFX900-NEXT: s_waitcnt vmcnt(0) 9293; GFX900-NEXT: s_setpc_b64 s[30:31] 9294; 9295; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_10: 9296; GFX90A: ; %bb.0: 9297; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9298; GFX90A-NEXT: ;;#ASMSTART 9299; GFX90A-NEXT: ; def v[0:3] 9300; GFX90A-NEXT: ;;#ASMEND 9301; GFX90A-NEXT: v_mov_b32_e32 v8, 0 9302; GFX90A-NEXT: ;;#ASMSTART 9303; GFX90A-NEXT: ; def v[4:7] 9304; GFX90A-NEXT: ;;#ASMEND 9305; GFX90A-NEXT: v_alignbit_b32 v0, v5, v2, 16 9306; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 9307; GFX90A-NEXT: s_waitcnt vmcnt(0) 9308; GFX90A-NEXT: s_setpc_b64 s[30:31] 9309; 9310; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_10: 9311; GFX940: ; %bb.0: 9312; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9313; GFX940-NEXT: ;;#ASMSTART 9314; GFX940-NEXT: ; def v[0:3] 9315; GFX940-NEXT: ;;#ASMEND 9316; GFX940-NEXT: v_mov_b32_e32 v8, 0 9317; GFX940-NEXT: ;;#ASMSTART 9318; GFX940-NEXT: ; def v[4:7] 9319; GFX940-NEXT: ;;#ASMEND 9320; GFX940-NEXT: s_nop 0 9321; GFX940-NEXT: v_alignbit_b32 v0, v5, v2, 16 9322; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 9323; GFX940-NEXT: s_waitcnt vmcnt(0) 9324; GFX940-NEXT: s_setpc_b64 s[30:31] 9325 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9326 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9327 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 10> 9328 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9329 ret void 9330} 9331 9332define void @v_shuffle_v2i16_v8i16__6_10(ptr addrspace(1) inreg %ptr) { 9333; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_10: 9334; GFX900: ; %bb.0: 9335; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9336; GFX900-NEXT: ;;#ASMSTART 9337; GFX900-NEXT: ; def v[0:3] 9338; GFX900-NEXT: ;;#ASMEND 9339; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9340; GFX900-NEXT: v_mov_b32_e32 v8, 0 9341; GFX900-NEXT: ;;#ASMSTART 9342; GFX900-NEXT: ; def v[4:7] 9343; GFX900-NEXT: ;;#ASMEND 9344; GFX900-NEXT: v_perm_b32 v0, v5, v3, s4 9345; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 9346; GFX900-NEXT: s_waitcnt vmcnt(0) 9347; GFX900-NEXT: s_setpc_b64 s[30:31] 9348; 9349; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_10: 9350; GFX90A: ; %bb.0: 9351; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9352; GFX90A-NEXT: ;;#ASMSTART 9353; GFX90A-NEXT: ; def v[0:3] 9354; GFX90A-NEXT: ;;#ASMEND 9355; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9356; GFX90A-NEXT: v_mov_b32_e32 v8, 0 9357; GFX90A-NEXT: ;;#ASMSTART 9358; GFX90A-NEXT: ; def v[4:7] 9359; GFX90A-NEXT: ;;#ASMEND 9360; GFX90A-NEXT: v_perm_b32 v0, v5, v3, s4 9361; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 9362; GFX90A-NEXT: s_waitcnt vmcnt(0) 9363; GFX90A-NEXT: s_setpc_b64 s[30:31] 9364; 9365; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_10: 9366; GFX940: ; %bb.0: 9367; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9368; GFX940-NEXT: ;;#ASMSTART 9369; GFX940-NEXT: ; def v[0:3] 9370; GFX940-NEXT: ;;#ASMEND 9371; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9372; GFX940-NEXT: v_mov_b32_e32 v8, 0 9373; GFX940-NEXT: ;;#ASMSTART 9374; GFX940-NEXT: ; def v[4:7] 9375; GFX940-NEXT: ;;#ASMEND 9376; GFX940-NEXT: s_nop 0 9377; GFX940-NEXT: v_perm_b32 v0, v5, v3, s2 9378; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 9379; GFX940-NEXT: s_waitcnt vmcnt(0) 9380; GFX940-NEXT: s_setpc_b64 s[30:31] 9381 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9382 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9383 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 10> 9384 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9385 ret void 9386} 9387 9388define void @v_shuffle_v2i16_v8i16__7_10(ptr addrspace(1) inreg %ptr) { 9389; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_10: 9390; GFX900: ; %bb.0: 9391; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9392; GFX900-NEXT: ;;#ASMSTART 9393; GFX900-NEXT: ; def v[0:3] 9394; GFX900-NEXT: ;;#ASMEND 9395; GFX900-NEXT: v_mov_b32_e32 v8, 0 9396; GFX900-NEXT: ;;#ASMSTART 9397; GFX900-NEXT: ; def v[4:7] 9398; GFX900-NEXT: ;;#ASMEND 9399; GFX900-NEXT: v_alignbit_b32 v0, v5, v3, 16 9400; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 9401; GFX900-NEXT: s_waitcnt vmcnt(0) 9402; GFX900-NEXT: s_setpc_b64 s[30:31] 9403; 9404; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_10: 9405; GFX90A: ; %bb.0: 9406; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9407; GFX90A-NEXT: ;;#ASMSTART 9408; GFX90A-NEXT: ; def v[0:3] 9409; GFX90A-NEXT: ;;#ASMEND 9410; GFX90A-NEXT: v_mov_b32_e32 v8, 0 9411; GFX90A-NEXT: ;;#ASMSTART 9412; GFX90A-NEXT: ; def v[4:7] 9413; GFX90A-NEXT: ;;#ASMEND 9414; GFX90A-NEXT: v_alignbit_b32 v0, v5, v3, 16 9415; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 9416; GFX90A-NEXT: s_waitcnt vmcnt(0) 9417; GFX90A-NEXT: s_setpc_b64 s[30:31] 9418; 9419; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_10: 9420; GFX940: ; %bb.0: 9421; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9422; GFX940-NEXT: ;;#ASMSTART 9423; GFX940-NEXT: ; def v[0:3] 9424; GFX940-NEXT: ;;#ASMEND 9425; GFX940-NEXT: v_mov_b32_e32 v8, 0 9426; GFX940-NEXT: ;;#ASMSTART 9427; GFX940-NEXT: ; def v[4:7] 9428; GFX940-NEXT: ;;#ASMEND 9429; GFX940-NEXT: s_nop 0 9430; GFX940-NEXT: v_alignbit_b32 v0, v5, v3, 16 9431; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 9432; GFX940-NEXT: s_waitcnt vmcnt(0) 9433; GFX940-NEXT: s_setpc_b64 s[30:31] 9434 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9435 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9436 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 10> 9437 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9438 ret void 9439} 9440 9441define void @v_shuffle_v2i16_v8i16__8_10(ptr addrspace(1) inreg %ptr) { 9442; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_10: 9443; GFX900: ; %bb.0: 9444; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9445; GFX900-NEXT: ;;#ASMSTART 9446; GFX900-NEXT: ; def v[0:3] 9447; GFX900-NEXT: ;;#ASMEND 9448; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9449; GFX900-NEXT: v_mov_b32_e32 v4, 0 9450; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 9451; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 9452; GFX900-NEXT: s_waitcnt vmcnt(0) 9453; GFX900-NEXT: s_setpc_b64 s[30:31] 9454; 9455; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_10: 9456; GFX90A: ; %bb.0: 9457; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9458; GFX90A-NEXT: ;;#ASMSTART 9459; GFX90A-NEXT: ; def v[0:3] 9460; GFX90A-NEXT: ;;#ASMEND 9461; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9462; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9463; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 9464; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 9465; GFX90A-NEXT: s_waitcnt vmcnt(0) 9466; GFX90A-NEXT: s_setpc_b64 s[30:31] 9467; 9468; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_10: 9469; GFX940: ; %bb.0: 9470; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9471; GFX940-NEXT: ;;#ASMSTART 9472; GFX940-NEXT: ; def v[0:3] 9473; GFX940-NEXT: ;;#ASMEND 9474; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9475; GFX940-NEXT: v_mov_b32_e32 v4, 0 9476; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 9477; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 9478; GFX940-NEXT: s_waitcnt vmcnt(0) 9479; GFX940-NEXT: s_setpc_b64 s[30:31] 9480 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9481 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9482 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 10> 9483 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9484 ret void 9485} 9486 9487define void @v_shuffle_v2i16_v8i16__9_10(ptr addrspace(1) inreg %ptr) { 9488; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_10: 9489; GFX900: ; %bb.0: 9490; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9491; GFX900-NEXT: ;;#ASMSTART 9492; GFX900-NEXT: ; def v[0:3] 9493; GFX900-NEXT: ;;#ASMEND 9494; GFX900-NEXT: v_mov_b32_e32 v4, 0 9495; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 9496; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 9497; GFX900-NEXT: s_waitcnt vmcnt(0) 9498; GFX900-NEXT: s_setpc_b64 s[30:31] 9499; 9500; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_10: 9501; GFX90A: ; %bb.0: 9502; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9503; GFX90A-NEXT: ;;#ASMSTART 9504; GFX90A-NEXT: ; def v[0:3] 9505; GFX90A-NEXT: ;;#ASMEND 9506; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9507; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 9508; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 9509; GFX90A-NEXT: s_waitcnt vmcnt(0) 9510; GFX90A-NEXT: s_setpc_b64 s[30:31] 9511; 9512; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_10: 9513; GFX940: ; %bb.0: 9514; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9515; GFX940-NEXT: ;;#ASMSTART 9516; GFX940-NEXT: ; def v[0:3] 9517; GFX940-NEXT: ;;#ASMEND 9518; GFX940-NEXT: v_mov_b32_e32 v4, 0 9519; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 9520; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 9521; GFX940-NEXT: s_waitcnt vmcnt(0) 9522; GFX940-NEXT: s_setpc_b64 s[30:31] 9523 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9524 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9525 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 10> 9526 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9527 ret void 9528} 9529 9530define void @v_shuffle_v2i16_v8i16__10_10(ptr addrspace(1) inreg %ptr) { 9531; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_10: 9532; GFX900: ; %bb.0: 9533; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9534; GFX900-NEXT: ;;#ASMSTART 9535; GFX900-NEXT: ; def v[0:3] 9536; GFX900-NEXT: ;;#ASMEND 9537; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9538; GFX900-NEXT: v_mov_b32_e32 v4, 0 9539; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 9540; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 9541; GFX900-NEXT: s_waitcnt vmcnt(0) 9542; GFX900-NEXT: s_setpc_b64 s[30:31] 9543; 9544; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_10: 9545; GFX90A: ; %bb.0: 9546; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9547; GFX90A-NEXT: ;;#ASMSTART 9548; GFX90A-NEXT: ; def v[0:3] 9549; GFX90A-NEXT: ;;#ASMEND 9550; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9551; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9552; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 9553; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 9554; GFX90A-NEXT: s_waitcnt vmcnt(0) 9555; GFX90A-NEXT: s_setpc_b64 s[30:31] 9556; 9557; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_10: 9558; GFX940: ; %bb.0: 9559; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9560; GFX940-NEXT: ;;#ASMSTART 9561; GFX940-NEXT: ; def v[0:3] 9562; GFX940-NEXT: ;;#ASMEND 9563; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9564; GFX940-NEXT: v_mov_b32_e32 v4, 0 9565; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 9566; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 9567; GFX940-NEXT: s_waitcnt vmcnt(0) 9568; GFX940-NEXT: s_setpc_b64 s[30:31] 9569 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9570 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9571 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 10> 9572 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9573 ret void 9574} 9575 9576define void @v_shuffle_v2i16_v8i16__11_10(ptr addrspace(1) inreg %ptr) { 9577; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_10: 9578; GFX900: ; %bb.0: 9579; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9580; GFX900-NEXT: ;;#ASMSTART 9581; GFX900-NEXT: ; def v[0:3] 9582; GFX900-NEXT: ;;#ASMEND 9583; GFX900-NEXT: v_mov_b32_e32 v4, 0 9584; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 9585; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 9586; GFX900-NEXT: s_waitcnt vmcnt(0) 9587; GFX900-NEXT: s_setpc_b64 s[30:31] 9588; 9589; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_10: 9590; GFX90A: ; %bb.0: 9591; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9592; GFX90A-NEXT: ;;#ASMSTART 9593; GFX90A-NEXT: ; def v[0:3] 9594; GFX90A-NEXT: ;;#ASMEND 9595; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9596; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 9597; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 9598; GFX90A-NEXT: s_waitcnt vmcnt(0) 9599; GFX90A-NEXT: s_setpc_b64 s[30:31] 9600; 9601; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_10: 9602; GFX940: ; %bb.0: 9603; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9604; GFX940-NEXT: ;;#ASMSTART 9605; GFX940-NEXT: ; def v[0:3] 9606; GFX940-NEXT: ;;#ASMEND 9607; GFX940-NEXT: v_mov_b32_e32 v4, 0 9608; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 9609; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 9610; GFX940-NEXT: s_waitcnt vmcnt(0) 9611; GFX940-NEXT: s_setpc_b64 s[30:31] 9612 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9613 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9614 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 10> 9615 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9616 ret void 9617} 9618 9619define void @v_shuffle_v2i16_v8i16__12_10(ptr addrspace(1) inreg %ptr) { 9620; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_10: 9621; GFX900: ; %bb.0: 9622; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9623; GFX900-NEXT: ;;#ASMSTART 9624; GFX900-NEXT: ; def v[0:3] 9625; GFX900-NEXT: ;;#ASMEND 9626; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9627; GFX900-NEXT: v_mov_b32_e32 v4, 0 9628; GFX900-NEXT: v_perm_b32 v0, v1, v2, s4 9629; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 9630; GFX900-NEXT: s_waitcnt vmcnt(0) 9631; GFX900-NEXT: s_setpc_b64 s[30:31] 9632; 9633; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_10: 9634; GFX90A: ; %bb.0: 9635; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9636; GFX90A-NEXT: ;;#ASMSTART 9637; GFX90A-NEXT: ; def v[0:3] 9638; GFX90A-NEXT: ;;#ASMEND 9639; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9640; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9641; GFX90A-NEXT: v_perm_b32 v0, v1, v2, s4 9642; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 9643; GFX90A-NEXT: s_waitcnt vmcnt(0) 9644; GFX90A-NEXT: s_setpc_b64 s[30:31] 9645; 9646; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_10: 9647; GFX940: ; %bb.0: 9648; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9649; GFX940-NEXT: ;;#ASMSTART 9650; GFX940-NEXT: ; def v[0:3] 9651; GFX940-NEXT: ;;#ASMEND 9652; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9653; GFX940-NEXT: v_mov_b32_e32 v4, 0 9654; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 9655; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 9656; GFX940-NEXT: s_waitcnt vmcnt(0) 9657; GFX940-NEXT: s_setpc_b64 s[30:31] 9658 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9659 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9660 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 10> 9661 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9662 ret void 9663} 9664 9665define void @v_shuffle_v2i16_v8i16__13_10(ptr addrspace(1) inreg %ptr) { 9666; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_10: 9667; GFX900: ; %bb.0: 9668; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9669; GFX900-NEXT: ;;#ASMSTART 9670; GFX900-NEXT: ; def v[0:3] 9671; GFX900-NEXT: ;;#ASMEND 9672; GFX900-NEXT: v_mov_b32_e32 v4, 0 9673; GFX900-NEXT: v_alignbit_b32 v0, v1, v2, 16 9674; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 9675; GFX900-NEXT: s_waitcnt vmcnt(0) 9676; GFX900-NEXT: s_setpc_b64 s[30:31] 9677; 9678; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_10: 9679; GFX90A: ; %bb.0: 9680; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9681; GFX90A-NEXT: ;;#ASMSTART 9682; GFX90A-NEXT: ; def v[0:3] 9683; GFX90A-NEXT: ;;#ASMEND 9684; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9685; GFX90A-NEXT: v_alignbit_b32 v0, v1, v2, 16 9686; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 9687; GFX90A-NEXT: s_waitcnt vmcnt(0) 9688; GFX90A-NEXT: s_setpc_b64 s[30:31] 9689; 9690; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_10: 9691; GFX940: ; %bb.0: 9692; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9693; GFX940-NEXT: ;;#ASMSTART 9694; GFX940-NEXT: ; def v[0:3] 9695; GFX940-NEXT: ;;#ASMEND 9696; GFX940-NEXT: v_mov_b32_e32 v4, 0 9697; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 9698; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 9699; GFX940-NEXT: s_waitcnt vmcnt(0) 9700; GFX940-NEXT: s_setpc_b64 s[30:31] 9701 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9702 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9703 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 10> 9704 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9705 ret void 9706} 9707 9708define void @v_shuffle_v2i16_v8i16__14_10(ptr addrspace(1) inreg %ptr) { 9709; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_10: 9710; GFX900: ; %bb.0: 9711; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9712; GFX900-NEXT: ;;#ASMSTART 9713; GFX900-NEXT: ; def v[0:3] 9714; GFX900-NEXT: ;;#ASMEND 9715; GFX900-NEXT: s_mov_b32 s4, 0x5040100 9716; GFX900-NEXT: v_mov_b32_e32 v4, 0 9717; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 9718; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 9719; GFX900-NEXT: s_waitcnt vmcnt(0) 9720; GFX900-NEXT: s_setpc_b64 s[30:31] 9721; 9722; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_10: 9723; GFX90A: ; %bb.0: 9724; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9725; GFX90A-NEXT: ;;#ASMSTART 9726; GFX90A-NEXT: ; def v[0:3] 9727; GFX90A-NEXT: ;;#ASMEND 9728; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 9729; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9730; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 9731; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 9732; GFX90A-NEXT: s_waitcnt vmcnt(0) 9733; GFX90A-NEXT: s_setpc_b64 s[30:31] 9734; 9735; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_10: 9736; GFX940: ; %bb.0: 9737; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9738; GFX940-NEXT: ;;#ASMSTART 9739; GFX940-NEXT: ; def v[0:3] 9740; GFX940-NEXT: ;;#ASMEND 9741; GFX940-NEXT: s_mov_b32 s2, 0x5040100 9742; GFX940-NEXT: v_mov_b32_e32 v4, 0 9743; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 9744; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 9745; GFX940-NEXT: s_waitcnt vmcnt(0) 9746; GFX940-NEXT: s_setpc_b64 s[30:31] 9747 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9748 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9749 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 10> 9750 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9751 ret void 9752} 9753 9754define void @v_shuffle_v2i16_v8i16__u_11(ptr addrspace(1) inreg %ptr) { 9755; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_11: 9756; GFX900: ; %bb.0: 9757; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9758; GFX900-NEXT: v_mov_b32_e32 v4, 0 9759; GFX900-NEXT: ;;#ASMSTART 9760; GFX900-NEXT: ; def v[0:3] 9761; GFX900-NEXT: ;;#ASMEND 9762; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 9763; GFX900-NEXT: s_waitcnt vmcnt(0) 9764; GFX900-NEXT: s_setpc_b64 s[30:31] 9765; 9766; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_11: 9767; GFX90A: ; %bb.0: 9768; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9769; GFX90A-NEXT: v_mov_b32_e32 v4, 0 9770; GFX90A-NEXT: ;;#ASMSTART 9771; GFX90A-NEXT: ; def v[0:3] 9772; GFX90A-NEXT: ;;#ASMEND 9773; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 9774; GFX90A-NEXT: s_waitcnt vmcnt(0) 9775; GFX90A-NEXT: s_setpc_b64 s[30:31] 9776; 9777; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_11: 9778; GFX940: ; %bb.0: 9779; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9780; GFX940-NEXT: v_mov_b32_e32 v4, 0 9781; GFX940-NEXT: ;;#ASMSTART 9782; GFX940-NEXT: ; def v[0:3] 9783; GFX940-NEXT: ;;#ASMEND 9784; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 9785; GFX940-NEXT: s_waitcnt vmcnt(0) 9786; GFX940-NEXT: s_setpc_b64 s[30:31] 9787 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9788 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9789 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 11> 9790 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9791 ret void 9792} 9793 9794define void @v_shuffle_v2i16_v8i16__0_11(ptr addrspace(1) inreg %ptr) { 9795; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_11: 9796; GFX900: ; %bb.0: 9797; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9798; GFX900-NEXT: ;;#ASMSTART 9799; GFX900-NEXT: ; def v[0:3] 9800; GFX900-NEXT: ;;#ASMEND 9801; GFX900-NEXT: s_mov_b32 s4, 0xffff 9802; GFX900-NEXT: v_mov_b32_e32 v5, 0 9803; GFX900-NEXT: ;;#ASMSTART 9804; GFX900-NEXT: ; def v[1:4] 9805; GFX900-NEXT: ;;#ASMEND 9806; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v2 9807; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 9808; GFX900-NEXT: s_waitcnt vmcnt(0) 9809; GFX900-NEXT: s_setpc_b64 s[30:31] 9810; 9811; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_11: 9812; GFX90A: ; %bb.0: 9813; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9814; GFX90A-NEXT: ;;#ASMSTART 9815; GFX90A-NEXT: ; def v[0:3] 9816; GFX90A-NEXT: ;;#ASMEND 9817; GFX90A-NEXT: s_mov_b32 s4, 0xffff 9818; GFX90A-NEXT: v_mov_b32_e32 v6, 0 9819; GFX90A-NEXT: ;;#ASMSTART 9820; GFX90A-NEXT: ; def v[2:5] 9821; GFX90A-NEXT: ;;#ASMEND 9822; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v3 9823; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 9824; GFX90A-NEXT: s_waitcnt vmcnt(0) 9825; GFX90A-NEXT: s_setpc_b64 s[30:31] 9826; 9827; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_11: 9828; GFX940: ; %bb.0: 9829; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9830; GFX940-NEXT: ;;#ASMSTART 9831; GFX940-NEXT: ; def v[0:3] 9832; GFX940-NEXT: ;;#ASMEND 9833; GFX940-NEXT: s_mov_b32 s2, 0xffff 9834; GFX940-NEXT: v_mov_b32_e32 v6, 0 9835; GFX940-NEXT: ;;#ASMSTART 9836; GFX940-NEXT: ; def v[2:5] 9837; GFX940-NEXT: ;;#ASMEND 9838; GFX940-NEXT: s_nop 0 9839; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 9840; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 9841; GFX940-NEXT: s_waitcnt vmcnt(0) 9842; GFX940-NEXT: s_setpc_b64 s[30:31] 9843 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9844 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9845 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 11> 9846 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9847 ret void 9848} 9849 9850define void @v_shuffle_v2i16_v8i16__1_11(ptr addrspace(1) inreg %ptr) { 9851; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_11: 9852; GFX900: ; %bb.0: 9853; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9854; GFX900-NEXT: ;;#ASMSTART 9855; GFX900-NEXT: ; def v[0:3] 9856; GFX900-NEXT: ;;#ASMEND 9857; GFX900-NEXT: s_mov_b32 s4, 0x7060302 9858; GFX900-NEXT: v_mov_b32_e32 v5, 0 9859; GFX900-NEXT: ;;#ASMSTART 9860; GFX900-NEXT: ; def v[1:4] 9861; GFX900-NEXT: ;;#ASMEND 9862; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 9863; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 9864; GFX900-NEXT: s_waitcnt vmcnt(0) 9865; GFX900-NEXT: s_setpc_b64 s[30:31] 9866; 9867; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_11: 9868; GFX90A: ; %bb.0: 9869; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9870; GFX90A-NEXT: ;;#ASMSTART 9871; GFX90A-NEXT: ; def v[0:3] 9872; GFX90A-NEXT: ;;#ASMEND 9873; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 9874; GFX90A-NEXT: v_mov_b32_e32 v6, 0 9875; GFX90A-NEXT: ;;#ASMSTART 9876; GFX90A-NEXT: ; def v[2:5] 9877; GFX90A-NEXT: ;;#ASMEND 9878; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 9879; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 9880; GFX90A-NEXT: s_waitcnt vmcnt(0) 9881; GFX90A-NEXT: s_setpc_b64 s[30:31] 9882; 9883; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_11: 9884; GFX940: ; %bb.0: 9885; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9886; GFX940-NEXT: ;;#ASMSTART 9887; GFX940-NEXT: ; def v[0:3] 9888; GFX940-NEXT: ;;#ASMEND 9889; GFX940-NEXT: s_mov_b32 s2, 0x7060302 9890; GFX940-NEXT: v_mov_b32_e32 v6, 0 9891; GFX940-NEXT: ;;#ASMSTART 9892; GFX940-NEXT: ; def v[2:5] 9893; GFX940-NEXT: ;;#ASMEND 9894; GFX940-NEXT: s_nop 0 9895; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 9896; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 9897; GFX940-NEXT: s_waitcnt vmcnt(0) 9898; GFX940-NEXT: s_setpc_b64 s[30:31] 9899 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9900 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9901 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 11> 9902 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9903 ret void 9904} 9905 9906define void @v_shuffle_v2i16_v8i16__2_11(ptr addrspace(1) inreg %ptr) { 9907; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_11: 9908; GFX900: ; %bb.0: 9909; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9910; GFX900-NEXT: ;;#ASMSTART 9911; GFX900-NEXT: ; def v[0:3] 9912; GFX900-NEXT: ;;#ASMEND 9913; GFX900-NEXT: s_mov_b32 s4, 0xffff 9914; GFX900-NEXT: v_mov_b32_e32 v6, 0 9915; GFX900-NEXT: ;;#ASMSTART 9916; GFX900-NEXT: ; def v[2:5] 9917; GFX900-NEXT: ;;#ASMEND 9918; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v3 9919; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 9920; GFX900-NEXT: s_waitcnt vmcnt(0) 9921; GFX900-NEXT: s_setpc_b64 s[30:31] 9922; 9923; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_11: 9924; GFX90A: ; %bb.0: 9925; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9926; GFX90A-NEXT: ;;#ASMSTART 9927; GFX90A-NEXT: ; def v[0:3] 9928; GFX90A-NEXT: ;;#ASMEND 9929; GFX90A-NEXT: s_mov_b32 s4, 0xffff 9930; GFX90A-NEXT: v_mov_b32_e32 v6, 0 9931; GFX90A-NEXT: ;;#ASMSTART 9932; GFX90A-NEXT: ; def v[2:5] 9933; GFX90A-NEXT: ;;#ASMEND 9934; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v3 9935; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 9936; GFX90A-NEXT: s_waitcnt vmcnt(0) 9937; GFX90A-NEXT: s_setpc_b64 s[30:31] 9938; 9939; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_11: 9940; GFX940: ; %bb.0: 9941; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9942; GFX940-NEXT: ;;#ASMSTART 9943; GFX940-NEXT: ; def v[0:3] 9944; GFX940-NEXT: ;;#ASMEND 9945; GFX940-NEXT: s_mov_b32 s2, 0xffff 9946; GFX940-NEXT: v_mov_b32_e32 v6, 0 9947; GFX940-NEXT: ;;#ASMSTART 9948; GFX940-NEXT: ; def v[2:5] 9949; GFX940-NEXT: ;;#ASMEND 9950; GFX940-NEXT: s_nop 0 9951; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 9952; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 9953; GFX940-NEXT: s_waitcnt vmcnt(0) 9954; GFX940-NEXT: s_setpc_b64 s[30:31] 9955 %vec0 = call <8 x i16> asm "; def $0", "=v"() 9956 %vec1 = call <8 x i16> asm "; def $0", "=v"() 9957 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 11> 9958 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 9959 ret void 9960} 9961 9962define void @v_shuffle_v2i16_v8i16__3_11(ptr addrspace(1) inreg %ptr) { 9963; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_11: 9964; GFX900: ; %bb.0: 9965; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9966; GFX900-NEXT: ;;#ASMSTART 9967; GFX900-NEXT: ; def v[0:3] 9968; GFX900-NEXT: ;;#ASMEND 9969; GFX900-NEXT: s_mov_b32 s4, 0x7060302 9970; GFX900-NEXT: v_mov_b32_e32 v6, 0 9971; GFX900-NEXT: ;;#ASMSTART 9972; GFX900-NEXT: ; def v[2:5] 9973; GFX900-NEXT: ;;#ASMEND 9974; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 9975; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 9976; GFX900-NEXT: s_waitcnt vmcnt(0) 9977; GFX900-NEXT: s_setpc_b64 s[30:31] 9978; 9979; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_11: 9980; GFX90A: ; %bb.0: 9981; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9982; GFX90A-NEXT: ;;#ASMSTART 9983; GFX90A-NEXT: ; def v[0:3] 9984; GFX90A-NEXT: ;;#ASMEND 9985; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 9986; GFX90A-NEXT: v_mov_b32_e32 v6, 0 9987; GFX90A-NEXT: ;;#ASMSTART 9988; GFX90A-NEXT: ; def v[2:5] 9989; GFX90A-NEXT: ;;#ASMEND 9990; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 9991; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 9992; GFX90A-NEXT: s_waitcnt vmcnt(0) 9993; GFX90A-NEXT: s_setpc_b64 s[30:31] 9994; 9995; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_11: 9996; GFX940: ; %bb.0: 9997; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9998; GFX940-NEXT: ;;#ASMSTART 9999; GFX940-NEXT: ; def v[0:3] 10000; GFX940-NEXT: ;;#ASMEND 10001; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10002; GFX940-NEXT: v_mov_b32_e32 v6, 0 10003; GFX940-NEXT: ;;#ASMSTART 10004; GFX940-NEXT: ; def v[2:5] 10005; GFX940-NEXT: ;;#ASMEND 10006; GFX940-NEXT: s_nop 0 10007; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 10008; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 10009; GFX940-NEXT: s_waitcnt vmcnt(0) 10010; GFX940-NEXT: s_setpc_b64 s[30:31] 10011 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10012 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10013 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 11> 10014 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10015 ret void 10016} 10017 10018define void @v_shuffle_v2i16_v8i16__4_11(ptr addrspace(1) inreg %ptr) { 10019; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_11: 10020; GFX900: ; %bb.0: 10021; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10022; GFX900-NEXT: ;;#ASMSTART 10023; GFX900-NEXT: ; def v[0:3] 10024; GFX900-NEXT: ;;#ASMEND 10025; GFX900-NEXT: s_mov_b32 s4, 0xffff 10026; GFX900-NEXT: v_mov_b32_e32 v7, 0 10027; GFX900-NEXT: ;;#ASMSTART 10028; GFX900-NEXT: ; def v[3:6] 10029; GFX900-NEXT: ;;#ASMEND 10030; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v4 10031; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 10032; GFX900-NEXT: s_waitcnt vmcnt(0) 10033; GFX900-NEXT: s_setpc_b64 s[30:31] 10034; 10035; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_11: 10036; GFX90A: ; %bb.0: 10037; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10038; GFX90A-NEXT: ;;#ASMSTART 10039; GFX90A-NEXT: ; def v[0:3] 10040; GFX90A-NEXT: ;;#ASMEND 10041; GFX90A-NEXT: s_mov_b32 s4, 0xffff 10042; GFX90A-NEXT: v_mov_b32_e32 v8, 0 10043; GFX90A-NEXT: ;;#ASMSTART 10044; GFX90A-NEXT: ; def v[4:7] 10045; GFX90A-NEXT: ;;#ASMEND 10046; GFX90A-NEXT: v_bfi_b32 v0, s4, v2, v5 10047; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 10048; GFX90A-NEXT: s_waitcnt vmcnt(0) 10049; GFX90A-NEXT: s_setpc_b64 s[30:31] 10050; 10051; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_11: 10052; GFX940: ; %bb.0: 10053; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10054; GFX940-NEXT: ;;#ASMSTART 10055; GFX940-NEXT: ; def v[0:3] 10056; GFX940-NEXT: ;;#ASMEND 10057; GFX940-NEXT: s_mov_b32 s2, 0xffff 10058; GFX940-NEXT: v_mov_b32_e32 v8, 0 10059; GFX940-NEXT: ;;#ASMSTART 10060; GFX940-NEXT: ; def v[4:7] 10061; GFX940-NEXT: ;;#ASMEND 10062; GFX940-NEXT: s_nop 0 10063; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v5 10064; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 10065; GFX940-NEXT: s_waitcnt vmcnt(0) 10066; GFX940-NEXT: s_setpc_b64 s[30:31] 10067 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10068 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10069 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 11> 10070 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10071 ret void 10072} 10073 10074define void @v_shuffle_v2i16_v8i16__5_11(ptr addrspace(1) inreg %ptr) { 10075; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_11: 10076; GFX900: ; %bb.0: 10077; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10078; GFX900-NEXT: ;;#ASMSTART 10079; GFX900-NEXT: ; def v[0:3] 10080; GFX900-NEXT: ;;#ASMEND 10081; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10082; GFX900-NEXT: v_mov_b32_e32 v7, 0 10083; GFX900-NEXT: ;;#ASMSTART 10084; GFX900-NEXT: ; def v[3:6] 10085; GFX900-NEXT: ;;#ASMEND 10086; GFX900-NEXT: v_perm_b32 v0, v4, v2, s4 10087; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 10088; GFX900-NEXT: s_waitcnt vmcnt(0) 10089; GFX900-NEXT: s_setpc_b64 s[30:31] 10090; 10091; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_11: 10092; GFX90A: ; %bb.0: 10093; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10094; GFX90A-NEXT: ;;#ASMSTART 10095; GFX90A-NEXT: ; def v[0:3] 10096; GFX90A-NEXT: ;;#ASMEND 10097; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10098; GFX90A-NEXT: v_mov_b32_e32 v8, 0 10099; GFX90A-NEXT: ;;#ASMSTART 10100; GFX90A-NEXT: ; def v[4:7] 10101; GFX90A-NEXT: ;;#ASMEND 10102; GFX90A-NEXT: v_perm_b32 v0, v5, v2, s4 10103; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 10104; GFX90A-NEXT: s_waitcnt vmcnt(0) 10105; GFX90A-NEXT: s_setpc_b64 s[30:31] 10106; 10107; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_11: 10108; GFX940: ; %bb.0: 10109; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10110; GFX940-NEXT: ;;#ASMSTART 10111; GFX940-NEXT: ; def v[0:3] 10112; GFX940-NEXT: ;;#ASMEND 10113; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10114; GFX940-NEXT: v_mov_b32_e32 v8, 0 10115; GFX940-NEXT: ;;#ASMSTART 10116; GFX940-NEXT: ; def v[4:7] 10117; GFX940-NEXT: ;;#ASMEND 10118; GFX940-NEXT: s_nop 0 10119; GFX940-NEXT: v_perm_b32 v0, v5, v2, s2 10120; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 10121; GFX940-NEXT: s_waitcnt vmcnt(0) 10122; GFX940-NEXT: s_setpc_b64 s[30:31] 10123 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10124 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10125 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 11> 10126 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10127 ret void 10128} 10129 10130define void @v_shuffle_v2i16_v8i16__6_11(ptr addrspace(1) inreg %ptr) { 10131; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_11: 10132; GFX900: ; %bb.0: 10133; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10134; GFX900-NEXT: ;;#ASMSTART 10135; GFX900-NEXT: ; def v[0:3] 10136; GFX900-NEXT: ;;#ASMEND 10137; GFX900-NEXT: s_mov_b32 s4, 0xffff 10138; GFX900-NEXT: v_mov_b32_e32 v8, 0 10139; GFX900-NEXT: ;;#ASMSTART 10140; GFX900-NEXT: ; def v[4:7] 10141; GFX900-NEXT: ;;#ASMEND 10142; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v5 10143; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 10144; GFX900-NEXT: s_waitcnt vmcnt(0) 10145; GFX900-NEXT: s_setpc_b64 s[30:31] 10146; 10147; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_11: 10148; GFX90A: ; %bb.0: 10149; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10150; GFX90A-NEXT: ;;#ASMSTART 10151; GFX90A-NEXT: ; def v[0:3] 10152; GFX90A-NEXT: ;;#ASMEND 10153; GFX90A-NEXT: s_mov_b32 s4, 0xffff 10154; GFX90A-NEXT: v_mov_b32_e32 v8, 0 10155; GFX90A-NEXT: ;;#ASMSTART 10156; GFX90A-NEXT: ; def v[4:7] 10157; GFX90A-NEXT: ;;#ASMEND 10158; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v5 10159; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 10160; GFX90A-NEXT: s_waitcnt vmcnt(0) 10161; GFX90A-NEXT: s_setpc_b64 s[30:31] 10162; 10163; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_11: 10164; GFX940: ; %bb.0: 10165; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10166; GFX940-NEXT: ;;#ASMSTART 10167; GFX940-NEXT: ; def v[0:3] 10168; GFX940-NEXT: ;;#ASMEND 10169; GFX940-NEXT: s_mov_b32 s2, 0xffff 10170; GFX940-NEXT: v_mov_b32_e32 v8, 0 10171; GFX940-NEXT: ;;#ASMSTART 10172; GFX940-NEXT: ; def v[4:7] 10173; GFX940-NEXT: ;;#ASMEND 10174; GFX940-NEXT: s_nop 0 10175; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v5 10176; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 10177; GFX940-NEXT: s_waitcnt vmcnt(0) 10178; GFX940-NEXT: s_setpc_b64 s[30:31] 10179 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10180 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10181 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 11> 10182 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10183 ret void 10184} 10185 10186define void @v_shuffle_v2i16_v8i16__7_11(ptr addrspace(1) inreg %ptr) { 10187; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_11: 10188; GFX900: ; %bb.0: 10189; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10190; GFX900-NEXT: ;;#ASMSTART 10191; GFX900-NEXT: ; def v[0:3] 10192; GFX900-NEXT: ;;#ASMEND 10193; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10194; GFX900-NEXT: v_mov_b32_e32 v8, 0 10195; GFX900-NEXT: ;;#ASMSTART 10196; GFX900-NEXT: ; def v[4:7] 10197; GFX900-NEXT: ;;#ASMEND 10198; GFX900-NEXT: v_perm_b32 v0, v5, v3, s4 10199; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 10200; GFX900-NEXT: s_waitcnt vmcnt(0) 10201; GFX900-NEXT: s_setpc_b64 s[30:31] 10202; 10203; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_11: 10204; GFX90A: ; %bb.0: 10205; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10206; GFX90A-NEXT: ;;#ASMSTART 10207; GFX90A-NEXT: ; def v[0:3] 10208; GFX90A-NEXT: ;;#ASMEND 10209; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10210; GFX90A-NEXT: v_mov_b32_e32 v8, 0 10211; GFX90A-NEXT: ;;#ASMSTART 10212; GFX90A-NEXT: ; def v[4:7] 10213; GFX90A-NEXT: ;;#ASMEND 10214; GFX90A-NEXT: v_perm_b32 v0, v5, v3, s4 10215; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 10216; GFX90A-NEXT: s_waitcnt vmcnt(0) 10217; GFX90A-NEXT: s_setpc_b64 s[30:31] 10218; 10219; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_11: 10220; GFX940: ; %bb.0: 10221; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10222; GFX940-NEXT: ;;#ASMSTART 10223; GFX940-NEXT: ; def v[0:3] 10224; GFX940-NEXT: ;;#ASMEND 10225; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10226; GFX940-NEXT: v_mov_b32_e32 v8, 0 10227; GFX940-NEXT: ;;#ASMSTART 10228; GFX940-NEXT: ; def v[4:7] 10229; GFX940-NEXT: ;;#ASMEND 10230; GFX940-NEXT: s_nop 0 10231; GFX940-NEXT: v_perm_b32 v0, v5, v3, s2 10232; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 10233; GFX940-NEXT: s_waitcnt vmcnt(0) 10234; GFX940-NEXT: s_setpc_b64 s[30:31] 10235 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10236 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10237 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 11> 10238 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10239 ret void 10240} 10241 10242define void @v_shuffle_v2i16_v8i16__8_11(ptr addrspace(1) inreg %ptr) { 10243; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_11: 10244; GFX900: ; %bb.0: 10245; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10246; GFX900-NEXT: ;;#ASMSTART 10247; GFX900-NEXT: ; def v[0:3] 10248; GFX900-NEXT: ;;#ASMEND 10249; GFX900-NEXT: s_mov_b32 s4, 0xffff 10250; GFX900-NEXT: v_mov_b32_e32 v4, 0 10251; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 10252; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 10253; GFX900-NEXT: s_waitcnt vmcnt(0) 10254; GFX900-NEXT: s_setpc_b64 s[30:31] 10255; 10256; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_11: 10257; GFX90A: ; %bb.0: 10258; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10259; GFX90A-NEXT: ;;#ASMSTART 10260; GFX90A-NEXT: ; def v[0:3] 10261; GFX90A-NEXT: ;;#ASMEND 10262; GFX90A-NEXT: s_mov_b32 s4, 0xffff 10263; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10264; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 10265; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 10266; GFX90A-NEXT: s_waitcnt vmcnt(0) 10267; GFX90A-NEXT: s_setpc_b64 s[30:31] 10268; 10269; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_11: 10270; GFX940: ; %bb.0: 10271; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10272; GFX940-NEXT: ;;#ASMSTART 10273; GFX940-NEXT: ; def v[0:3] 10274; GFX940-NEXT: ;;#ASMEND 10275; GFX940-NEXT: s_mov_b32 s2, 0xffff 10276; GFX940-NEXT: v_mov_b32_e32 v4, 0 10277; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 10278; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 10279; GFX940-NEXT: s_waitcnt vmcnt(0) 10280; GFX940-NEXT: s_setpc_b64 s[30:31] 10281 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10282 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10283 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 11> 10284 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10285 ret void 10286} 10287 10288define void @v_shuffle_v2i16_v8i16__9_11(ptr addrspace(1) inreg %ptr) { 10289; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_11: 10290; GFX900: ; %bb.0: 10291; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10292; GFX900-NEXT: ;;#ASMSTART 10293; GFX900-NEXT: ; def v[0:3] 10294; GFX900-NEXT: ;;#ASMEND 10295; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10296; GFX900-NEXT: v_mov_b32_e32 v4, 0 10297; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 10298; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 10299; GFX900-NEXT: s_waitcnt vmcnt(0) 10300; GFX900-NEXT: s_setpc_b64 s[30:31] 10301; 10302; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_11: 10303; GFX90A: ; %bb.0: 10304; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10305; GFX90A-NEXT: ;;#ASMSTART 10306; GFX90A-NEXT: ; def v[0:3] 10307; GFX90A-NEXT: ;;#ASMEND 10308; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10309; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10310; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 10311; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 10312; GFX90A-NEXT: s_waitcnt vmcnt(0) 10313; GFX90A-NEXT: s_setpc_b64 s[30:31] 10314; 10315; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_11: 10316; GFX940: ; %bb.0: 10317; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10318; GFX940-NEXT: ;;#ASMSTART 10319; GFX940-NEXT: ; def v[0:3] 10320; GFX940-NEXT: ;;#ASMEND 10321; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10322; GFX940-NEXT: v_mov_b32_e32 v4, 0 10323; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 10324; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 10325; GFX940-NEXT: s_waitcnt vmcnt(0) 10326; GFX940-NEXT: s_setpc_b64 s[30:31] 10327 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10328 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10329 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 11> 10330 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10331 ret void 10332} 10333 10334define void @v_shuffle_v2i16_v8i16__10_11(ptr addrspace(1) inreg %ptr) { 10335; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_11: 10336; GFX900: ; %bb.0: 10337; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10338; GFX900-NEXT: v_mov_b32_e32 v4, 0 10339; GFX900-NEXT: ;;#ASMSTART 10340; GFX900-NEXT: ; def v[0:3] 10341; GFX900-NEXT: ;;#ASMEND 10342; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 10343; GFX900-NEXT: s_waitcnt vmcnt(0) 10344; GFX900-NEXT: s_setpc_b64 s[30:31] 10345; 10346; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_11: 10347; GFX90A: ; %bb.0: 10348; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10349; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10350; GFX90A-NEXT: ;;#ASMSTART 10351; GFX90A-NEXT: ; def v[0:3] 10352; GFX90A-NEXT: ;;#ASMEND 10353; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 10354; GFX90A-NEXT: s_waitcnt vmcnt(0) 10355; GFX90A-NEXT: s_setpc_b64 s[30:31] 10356; 10357; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_11: 10358; GFX940: ; %bb.0: 10359; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10360; GFX940-NEXT: v_mov_b32_e32 v4, 0 10361; GFX940-NEXT: ;;#ASMSTART 10362; GFX940-NEXT: ; def v[0:3] 10363; GFX940-NEXT: ;;#ASMEND 10364; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 10365; GFX940-NEXT: s_waitcnt vmcnt(0) 10366; GFX940-NEXT: s_setpc_b64 s[30:31] 10367 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10368 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10369 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 11> 10370 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10371 ret void 10372} 10373 10374define void @v_shuffle_v2i16_v8i16__11_11(ptr addrspace(1) inreg %ptr) { 10375; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_11: 10376; GFX900: ; %bb.0: 10377; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10378; GFX900-NEXT: ;;#ASMSTART 10379; GFX900-NEXT: ; def v[0:3] 10380; GFX900-NEXT: ;;#ASMEND 10381; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10382; GFX900-NEXT: v_mov_b32_e32 v4, 0 10383; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 10384; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 10385; GFX900-NEXT: s_waitcnt vmcnt(0) 10386; GFX900-NEXT: s_setpc_b64 s[30:31] 10387; 10388; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_11: 10389; GFX90A: ; %bb.0: 10390; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10391; GFX90A-NEXT: ;;#ASMSTART 10392; GFX90A-NEXT: ; def v[0:3] 10393; GFX90A-NEXT: ;;#ASMEND 10394; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10395; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10396; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 10397; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 10398; GFX90A-NEXT: s_waitcnt vmcnt(0) 10399; GFX90A-NEXT: s_setpc_b64 s[30:31] 10400; 10401; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_11: 10402; GFX940: ; %bb.0: 10403; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10404; GFX940-NEXT: ;;#ASMSTART 10405; GFX940-NEXT: ; def v[0:3] 10406; GFX940-NEXT: ;;#ASMEND 10407; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10408; GFX940-NEXT: v_mov_b32_e32 v4, 0 10409; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 10410; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 10411; GFX940-NEXT: s_waitcnt vmcnt(0) 10412; GFX940-NEXT: s_setpc_b64 s[30:31] 10413 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10414 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10415 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 11> 10416 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10417 ret void 10418} 10419 10420define void @v_shuffle_v2i16_v8i16__12_11(ptr addrspace(1) inreg %ptr) { 10421; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_11: 10422; GFX900: ; %bb.0: 10423; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10424; GFX900-NEXT: ;;#ASMSTART 10425; GFX900-NEXT: ; def v[0:3] 10426; GFX900-NEXT: ;;#ASMEND 10427; GFX900-NEXT: s_mov_b32 s4, 0xffff 10428; GFX900-NEXT: v_mov_b32_e32 v4, 0 10429; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v1 10430; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 10431; GFX900-NEXT: s_waitcnt vmcnt(0) 10432; GFX900-NEXT: s_setpc_b64 s[30:31] 10433; 10434; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_11: 10435; GFX90A: ; %bb.0: 10436; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10437; GFX90A-NEXT: ;;#ASMSTART 10438; GFX90A-NEXT: ; def v[0:3] 10439; GFX90A-NEXT: ;;#ASMEND 10440; GFX90A-NEXT: s_mov_b32 s4, 0xffff 10441; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10442; GFX90A-NEXT: v_bfi_b32 v0, s4, v2, v1 10443; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 10444; GFX90A-NEXT: s_waitcnt vmcnt(0) 10445; GFX90A-NEXT: s_setpc_b64 s[30:31] 10446; 10447; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_11: 10448; GFX940: ; %bb.0: 10449; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10450; GFX940-NEXT: ;;#ASMSTART 10451; GFX940-NEXT: ; def v[0:3] 10452; GFX940-NEXT: ;;#ASMEND 10453; GFX940-NEXT: s_mov_b32 s2, 0xffff 10454; GFX940-NEXT: v_mov_b32_e32 v4, 0 10455; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v1 10456; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 10457; GFX940-NEXT: s_waitcnt vmcnt(0) 10458; GFX940-NEXT: s_setpc_b64 s[30:31] 10459 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10460 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10461 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 11> 10462 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10463 ret void 10464} 10465 10466define void @v_shuffle_v2i16_v8i16__13_11(ptr addrspace(1) inreg %ptr) { 10467; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_11: 10468; GFX900: ; %bb.0: 10469; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10470; GFX900-NEXT: ;;#ASMSTART 10471; GFX900-NEXT: ; def v[0:3] 10472; GFX900-NEXT: ;;#ASMEND 10473; GFX900-NEXT: s_mov_b32 s4, 0x7060302 10474; GFX900-NEXT: v_mov_b32_e32 v4, 0 10475; GFX900-NEXT: v_perm_b32 v0, v1, v2, s4 10476; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 10477; GFX900-NEXT: s_waitcnt vmcnt(0) 10478; GFX900-NEXT: s_setpc_b64 s[30:31] 10479; 10480; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_11: 10481; GFX90A: ; %bb.0: 10482; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10483; GFX90A-NEXT: ;;#ASMSTART 10484; GFX90A-NEXT: ; def v[0:3] 10485; GFX90A-NEXT: ;;#ASMEND 10486; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 10487; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10488; GFX90A-NEXT: v_perm_b32 v0, v1, v2, s4 10489; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 10490; GFX90A-NEXT: s_waitcnt vmcnt(0) 10491; GFX90A-NEXT: s_setpc_b64 s[30:31] 10492; 10493; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_11: 10494; GFX940: ; %bb.0: 10495; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10496; GFX940-NEXT: ;;#ASMSTART 10497; GFX940-NEXT: ; def v[0:3] 10498; GFX940-NEXT: ;;#ASMEND 10499; GFX940-NEXT: s_mov_b32 s2, 0x7060302 10500; GFX940-NEXT: v_mov_b32_e32 v4, 0 10501; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 10502; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 10503; GFX940-NEXT: s_waitcnt vmcnt(0) 10504; GFX940-NEXT: s_setpc_b64 s[30:31] 10505 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10506 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10507 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 11> 10508 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10509 ret void 10510} 10511 10512define void @v_shuffle_v2i16_v8i16__14_11(ptr addrspace(1) inreg %ptr) { 10513; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_11: 10514; GFX900: ; %bb.0: 10515; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10516; GFX900-NEXT: ;;#ASMSTART 10517; GFX900-NEXT: ; def v[0:3] 10518; GFX900-NEXT: ;;#ASMEND 10519; GFX900-NEXT: s_mov_b32 s4, 0xffff 10520; GFX900-NEXT: v_mov_b32_e32 v4, 0 10521; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v1 10522; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 10523; GFX900-NEXT: s_waitcnt vmcnt(0) 10524; GFX900-NEXT: s_setpc_b64 s[30:31] 10525; 10526; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_11: 10527; GFX90A: ; %bb.0: 10528; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10529; GFX90A-NEXT: ;;#ASMSTART 10530; GFX90A-NEXT: ; def v[0:3] 10531; GFX90A-NEXT: ;;#ASMEND 10532; GFX90A-NEXT: s_mov_b32 s4, 0xffff 10533; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10534; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v1 10535; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 10536; GFX90A-NEXT: s_waitcnt vmcnt(0) 10537; GFX90A-NEXT: s_setpc_b64 s[30:31] 10538; 10539; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_11: 10540; GFX940: ; %bb.0: 10541; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10542; GFX940-NEXT: ;;#ASMSTART 10543; GFX940-NEXT: ; def v[0:3] 10544; GFX940-NEXT: ;;#ASMEND 10545; GFX940-NEXT: s_mov_b32 s2, 0xffff 10546; GFX940-NEXT: v_mov_b32_e32 v4, 0 10547; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 10548; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 10549; GFX940-NEXT: s_waitcnt vmcnt(0) 10550; GFX940-NEXT: s_setpc_b64 s[30:31] 10551 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10552 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10553 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 11> 10554 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10555 ret void 10556} 10557 10558define void @v_shuffle_v2i16_v8i16__u_12(ptr addrspace(1) inreg %ptr) { 10559; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_12: 10560; GFX900: ; %bb.0: 10561; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10562; GFX900-NEXT: ;;#ASMSTART 10563; GFX900-NEXT: ; def v[0:3] 10564; GFX900-NEXT: ;;#ASMEND 10565; GFX900-NEXT: v_mov_b32_e32 v4, 0 10566; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v2 10567; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 10568; GFX900-NEXT: s_waitcnt vmcnt(0) 10569; GFX900-NEXT: s_setpc_b64 s[30:31] 10570; 10571; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_12: 10572; GFX90A: ; %bb.0: 10573; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10574; GFX90A-NEXT: ;;#ASMSTART 10575; GFX90A-NEXT: ; def v[0:3] 10576; GFX90A-NEXT: ;;#ASMEND 10577; GFX90A-NEXT: v_mov_b32_e32 v4, 0 10578; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v2 10579; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 10580; GFX90A-NEXT: s_waitcnt vmcnt(0) 10581; GFX90A-NEXT: s_setpc_b64 s[30:31] 10582; 10583; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_12: 10584; GFX940: ; %bb.0: 10585; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10586; GFX940-NEXT: ;;#ASMSTART 10587; GFX940-NEXT: ; def v[0:3] 10588; GFX940-NEXT: ;;#ASMEND 10589; GFX940-NEXT: v_mov_b32_e32 v4, 0 10590; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v2 10591; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 10592; GFX940-NEXT: s_waitcnt vmcnt(0) 10593; GFX940-NEXT: s_setpc_b64 s[30:31] 10594 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10595 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10596 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 12> 10597 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10598 ret void 10599} 10600 10601define void @v_shuffle_v2i16_v8i16__0_12(ptr addrspace(1) inreg %ptr) { 10602; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_12: 10603; GFX900: ; %bb.0: 10604; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10605; GFX900-NEXT: ;;#ASMSTART 10606; GFX900-NEXT: ; def v[0:3] 10607; GFX900-NEXT: ;;#ASMEND 10608; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10609; GFX900-NEXT: v_mov_b32_e32 v5, 0 10610; GFX900-NEXT: ;;#ASMSTART 10611; GFX900-NEXT: ; def v[1:4] 10612; GFX900-NEXT: ;;#ASMEND 10613; GFX900-NEXT: v_perm_b32 v0, v3, v0, s4 10614; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 10615; GFX900-NEXT: s_waitcnt vmcnt(0) 10616; GFX900-NEXT: s_setpc_b64 s[30:31] 10617; 10618; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_12: 10619; GFX90A: ; %bb.0: 10620; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10621; GFX90A-NEXT: ;;#ASMSTART 10622; GFX90A-NEXT: ; def v[0:3] 10623; GFX90A-NEXT: ;;#ASMEND 10624; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10625; GFX90A-NEXT: v_mov_b32_e32 v6, 0 10626; GFX90A-NEXT: ;;#ASMSTART 10627; GFX90A-NEXT: ; def v[2:5] 10628; GFX90A-NEXT: ;;#ASMEND 10629; GFX90A-NEXT: v_perm_b32 v0, v4, v0, s4 10630; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 10631; GFX90A-NEXT: s_waitcnt vmcnt(0) 10632; GFX90A-NEXT: s_setpc_b64 s[30:31] 10633; 10634; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_12: 10635; GFX940: ; %bb.0: 10636; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10637; GFX940-NEXT: ;;#ASMSTART 10638; GFX940-NEXT: ; def v[0:3] 10639; GFX940-NEXT: ;;#ASMEND 10640; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10641; GFX940-NEXT: v_mov_b32_e32 v6, 0 10642; GFX940-NEXT: ;;#ASMSTART 10643; GFX940-NEXT: ; def v[2:5] 10644; GFX940-NEXT: ;;#ASMEND 10645; GFX940-NEXT: s_nop 0 10646; GFX940-NEXT: v_perm_b32 v0, v4, v0, s2 10647; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 10648; GFX940-NEXT: s_waitcnt vmcnt(0) 10649; GFX940-NEXT: s_setpc_b64 s[30:31] 10650 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10651 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10652 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 12> 10653 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10654 ret void 10655} 10656 10657define void @v_shuffle_v2i16_v8i16__1_12(ptr addrspace(1) inreg %ptr) { 10658; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_12: 10659; GFX900: ; %bb.0: 10660; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10661; GFX900-NEXT: ;;#ASMSTART 10662; GFX900-NEXT: ; def v[0:3] 10663; GFX900-NEXT: ;;#ASMEND 10664; GFX900-NEXT: v_mov_b32_e32 v5, 0 10665; GFX900-NEXT: ;;#ASMSTART 10666; GFX900-NEXT: ; def v[1:4] 10667; GFX900-NEXT: ;;#ASMEND 10668; GFX900-NEXT: v_alignbit_b32 v0, v3, v0, 16 10669; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 10670; GFX900-NEXT: s_waitcnt vmcnt(0) 10671; GFX900-NEXT: s_setpc_b64 s[30:31] 10672; 10673; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_12: 10674; GFX90A: ; %bb.0: 10675; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10676; GFX90A-NEXT: ;;#ASMSTART 10677; GFX90A-NEXT: ; def v[0:3] 10678; GFX90A-NEXT: ;;#ASMEND 10679; GFX90A-NEXT: v_mov_b32_e32 v6, 0 10680; GFX90A-NEXT: ;;#ASMSTART 10681; GFX90A-NEXT: ; def v[2:5] 10682; GFX90A-NEXT: ;;#ASMEND 10683; GFX90A-NEXT: v_alignbit_b32 v0, v4, v0, 16 10684; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 10685; GFX90A-NEXT: s_waitcnt vmcnt(0) 10686; GFX90A-NEXT: s_setpc_b64 s[30:31] 10687; 10688; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_12: 10689; GFX940: ; %bb.0: 10690; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10691; GFX940-NEXT: ;;#ASMSTART 10692; GFX940-NEXT: ; def v[0:3] 10693; GFX940-NEXT: ;;#ASMEND 10694; GFX940-NEXT: v_mov_b32_e32 v6, 0 10695; GFX940-NEXT: ;;#ASMSTART 10696; GFX940-NEXT: ; def v[2:5] 10697; GFX940-NEXT: ;;#ASMEND 10698; GFX940-NEXT: s_nop 0 10699; GFX940-NEXT: v_alignbit_b32 v0, v4, v0, 16 10700; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 10701; GFX940-NEXT: s_waitcnt vmcnt(0) 10702; GFX940-NEXT: s_setpc_b64 s[30:31] 10703 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10704 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10705 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 12> 10706 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10707 ret void 10708} 10709 10710define void @v_shuffle_v2i16_v8i16__2_12(ptr addrspace(1) inreg %ptr) { 10711; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_12: 10712; GFX900: ; %bb.0: 10713; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10714; GFX900-NEXT: ;;#ASMSTART 10715; GFX900-NEXT: ; def v[0:3] 10716; GFX900-NEXT: ;;#ASMEND 10717; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10718; GFX900-NEXT: v_mov_b32_e32 v6, 0 10719; GFX900-NEXT: ;;#ASMSTART 10720; GFX900-NEXT: ; def v[2:5] 10721; GFX900-NEXT: ;;#ASMEND 10722; GFX900-NEXT: v_perm_b32 v0, v4, v1, s4 10723; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 10724; GFX900-NEXT: s_waitcnt vmcnt(0) 10725; GFX900-NEXT: s_setpc_b64 s[30:31] 10726; 10727; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_12: 10728; GFX90A: ; %bb.0: 10729; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10730; GFX90A-NEXT: ;;#ASMSTART 10731; GFX90A-NEXT: ; def v[0:3] 10732; GFX90A-NEXT: ;;#ASMEND 10733; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10734; GFX90A-NEXT: v_mov_b32_e32 v6, 0 10735; GFX90A-NEXT: ;;#ASMSTART 10736; GFX90A-NEXT: ; def v[2:5] 10737; GFX90A-NEXT: ;;#ASMEND 10738; GFX90A-NEXT: v_perm_b32 v0, v4, v1, s4 10739; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 10740; GFX90A-NEXT: s_waitcnt vmcnt(0) 10741; GFX90A-NEXT: s_setpc_b64 s[30:31] 10742; 10743; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_12: 10744; GFX940: ; %bb.0: 10745; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10746; GFX940-NEXT: ;;#ASMSTART 10747; GFX940-NEXT: ; def v[0:3] 10748; GFX940-NEXT: ;;#ASMEND 10749; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10750; GFX940-NEXT: v_mov_b32_e32 v6, 0 10751; GFX940-NEXT: ;;#ASMSTART 10752; GFX940-NEXT: ; def v[2:5] 10753; GFX940-NEXT: ;;#ASMEND 10754; GFX940-NEXT: s_nop 0 10755; GFX940-NEXT: v_perm_b32 v0, v4, v1, s2 10756; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 10757; GFX940-NEXT: s_waitcnt vmcnt(0) 10758; GFX940-NEXT: s_setpc_b64 s[30:31] 10759 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10760 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10761 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 12> 10762 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10763 ret void 10764} 10765 10766define void @v_shuffle_v2i16_v8i16__3_12(ptr addrspace(1) inreg %ptr) { 10767; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_12: 10768; GFX900: ; %bb.0: 10769; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10770; GFX900-NEXT: ;;#ASMSTART 10771; GFX900-NEXT: ; def v[0:3] 10772; GFX900-NEXT: ;;#ASMEND 10773; GFX900-NEXT: v_mov_b32_e32 v6, 0 10774; GFX900-NEXT: ;;#ASMSTART 10775; GFX900-NEXT: ; def v[2:5] 10776; GFX900-NEXT: ;;#ASMEND 10777; GFX900-NEXT: v_alignbit_b32 v0, v4, v1, 16 10778; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 10779; GFX900-NEXT: s_waitcnt vmcnt(0) 10780; GFX900-NEXT: s_setpc_b64 s[30:31] 10781; 10782; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_12: 10783; GFX90A: ; %bb.0: 10784; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10785; GFX90A-NEXT: ;;#ASMSTART 10786; GFX90A-NEXT: ; def v[0:3] 10787; GFX90A-NEXT: ;;#ASMEND 10788; GFX90A-NEXT: v_mov_b32_e32 v6, 0 10789; GFX90A-NEXT: ;;#ASMSTART 10790; GFX90A-NEXT: ; def v[2:5] 10791; GFX90A-NEXT: ;;#ASMEND 10792; GFX90A-NEXT: v_alignbit_b32 v0, v4, v1, 16 10793; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 10794; GFX90A-NEXT: s_waitcnt vmcnt(0) 10795; GFX90A-NEXT: s_setpc_b64 s[30:31] 10796; 10797; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_12: 10798; GFX940: ; %bb.0: 10799; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10800; GFX940-NEXT: ;;#ASMSTART 10801; GFX940-NEXT: ; def v[0:3] 10802; GFX940-NEXT: ;;#ASMEND 10803; GFX940-NEXT: v_mov_b32_e32 v6, 0 10804; GFX940-NEXT: ;;#ASMSTART 10805; GFX940-NEXT: ; def v[2:5] 10806; GFX940-NEXT: ;;#ASMEND 10807; GFX940-NEXT: s_nop 0 10808; GFX940-NEXT: v_alignbit_b32 v0, v4, v1, 16 10809; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 10810; GFX940-NEXT: s_waitcnt vmcnt(0) 10811; GFX940-NEXT: s_setpc_b64 s[30:31] 10812 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10813 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10814 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 12> 10815 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10816 ret void 10817} 10818 10819define void @v_shuffle_v2i16_v8i16__4_12(ptr addrspace(1) inreg %ptr) { 10820; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_12: 10821; GFX900: ; %bb.0: 10822; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10823; GFX900-NEXT: ;;#ASMSTART 10824; GFX900-NEXT: ; def v[0:3] 10825; GFX900-NEXT: ;;#ASMEND 10826; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10827; GFX900-NEXT: v_mov_b32_e32 v7, 0 10828; GFX900-NEXT: ;;#ASMSTART 10829; GFX900-NEXT: ; def v[3:6] 10830; GFX900-NEXT: ;;#ASMEND 10831; GFX900-NEXT: v_perm_b32 v0, v5, v2, s4 10832; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 10833; GFX900-NEXT: s_waitcnt vmcnt(0) 10834; GFX900-NEXT: s_setpc_b64 s[30:31] 10835; 10836; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_12: 10837; GFX90A: ; %bb.0: 10838; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10839; GFX90A-NEXT: ;;#ASMSTART 10840; GFX90A-NEXT: ; def v[0:3] 10841; GFX90A-NEXT: ;;#ASMEND 10842; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10843; GFX90A-NEXT: v_mov_b32_e32 v8, 0 10844; GFX90A-NEXT: ;;#ASMSTART 10845; GFX90A-NEXT: ; def v[4:7] 10846; GFX90A-NEXT: ;;#ASMEND 10847; GFX90A-NEXT: v_perm_b32 v0, v6, v2, s4 10848; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 10849; GFX90A-NEXT: s_waitcnt vmcnt(0) 10850; GFX90A-NEXT: s_setpc_b64 s[30:31] 10851; 10852; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_12: 10853; GFX940: ; %bb.0: 10854; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10855; GFX940-NEXT: ;;#ASMSTART 10856; GFX940-NEXT: ; def v[0:3] 10857; GFX940-NEXT: ;;#ASMEND 10858; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10859; GFX940-NEXT: v_mov_b32_e32 v8, 0 10860; GFX940-NEXT: ;;#ASMSTART 10861; GFX940-NEXT: ; def v[4:7] 10862; GFX940-NEXT: ;;#ASMEND 10863; GFX940-NEXT: s_nop 0 10864; GFX940-NEXT: v_perm_b32 v0, v6, v2, s2 10865; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 10866; GFX940-NEXT: s_waitcnt vmcnt(0) 10867; GFX940-NEXT: s_setpc_b64 s[30:31] 10868 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10869 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10870 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 12> 10871 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10872 ret void 10873} 10874 10875define void @v_shuffle_v2i16_v8i16__5_12(ptr addrspace(1) inreg %ptr) { 10876; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_12: 10877; GFX900: ; %bb.0: 10878; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10879; GFX900-NEXT: ;;#ASMSTART 10880; GFX900-NEXT: ; def v[0:3] 10881; GFX900-NEXT: ;;#ASMEND 10882; GFX900-NEXT: v_mov_b32_e32 v7, 0 10883; GFX900-NEXT: ;;#ASMSTART 10884; GFX900-NEXT: ; def v[3:6] 10885; GFX900-NEXT: ;;#ASMEND 10886; GFX900-NEXT: v_alignbit_b32 v0, v5, v2, 16 10887; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 10888; GFX900-NEXT: s_waitcnt vmcnt(0) 10889; GFX900-NEXT: s_setpc_b64 s[30:31] 10890; 10891; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_12: 10892; GFX90A: ; %bb.0: 10893; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10894; GFX90A-NEXT: ;;#ASMSTART 10895; GFX90A-NEXT: ; def v[0:3] 10896; GFX90A-NEXT: ;;#ASMEND 10897; GFX90A-NEXT: v_mov_b32_e32 v8, 0 10898; GFX90A-NEXT: ;;#ASMSTART 10899; GFX90A-NEXT: ; def v[4:7] 10900; GFX90A-NEXT: ;;#ASMEND 10901; GFX90A-NEXT: v_alignbit_b32 v0, v6, v2, 16 10902; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 10903; GFX90A-NEXT: s_waitcnt vmcnt(0) 10904; GFX90A-NEXT: s_setpc_b64 s[30:31] 10905; 10906; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_12: 10907; GFX940: ; %bb.0: 10908; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10909; GFX940-NEXT: ;;#ASMSTART 10910; GFX940-NEXT: ; def v[0:3] 10911; GFX940-NEXT: ;;#ASMEND 10912; GFX940-NEXT: v_mov_b32_e32 v8, 0 10913; GFX940-NEXT: ;;#ASMSTART 10914; GFX940-NEXT: ; def v[4:7] 10915; GFX940-NEXT: ;;#ASMEND 10916; GFX940-NEXT: s_nop 0 10917; GFX940-NEXT: v_alignbit_b32 v0, v6, v2, 16 10918; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 10919; GFX940-NEXT: s_waitcnt vmcnt(0) 10920; GFX940-NEXT: s_setpc_b64 s[30:31] 10921 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10922 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10923 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 12> 10924 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10925 ret void 10926} 10927 10928define void @v_shuffle_v2i16_v8i16__6_12(ptr addrspace(1) inreg %ptr) { 10929; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_12: 10930; GFX900: ; %bb.0: 10931; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10932; GFX900-NEXT: ;;#ASMSTART 10933; GFX900-NEXT: ; def v[0:3] 10934; GFX900-NEXT: ;;#ASMEND 10935; GFX900-NEXT: s_mov_b32 s4, 0x5040100 10936; GFX900-NEXT: v_mov_b32_e32 v8, 0 10937; GFX900-NEXT: ;;#ASMSTART 10938; GFX900-NEXT: ; def v[4:7] 10939; GFX900-NEXT: ;;#ASMEND 10940; GFX900-NEXT: v_perm_b32 v0, v6, v3, s4 10941; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 10942; GFX900-NEXT: s_waitcnt vmcnt(0) 10943; GFX900-NEXT: s_setpc_b64 s[30:31] 10944; 10945; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_12: 10946; GFX90A: ; %bb.0: 10947; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10948; GFX90A-NEXT: ;;#ASMSTART 10949; GFX90A-NEXT: ; def v[0:3] 10950; GFX90A-NEXT: ;;#ASMEND 10951; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 10952; GFX90A-NEXT: v_mov_b32_e32 v8, 0 10953; GFX90A-NEXT: ;;#ASMSTART 10954; GFX90A-NEXT: ; def v[4:7] 10955; GFX90A-NEXT: ;;#ASMEND 10956; GFX90A-NEXT: v_perm_b32 v0, v6, v3, s4 10957; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 10958; GFX90A-NEXT: s_waitcnt vmcnt(0) 10959; GFX90A-NEXT: s_setpc_b64 s[30:31] 10960; 10961; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_12: 10962; GFX940: ; %bb.0: 10963; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10964; GFX940-NEXT: ;;#ASMSTART 10965; GFX940-NEXT: ; def v[0:3] 10966; GFX940-NEXT: ;;#ASMEND 10967; GFX940-NEXT: s_mov_b32 s2, 0x5040100 10968; GFX940-NEXT: v_mov_b32_e32 v8, 0 10969; GFX940-NEXT: ;;#ASMSTART 10970; GFX940-NEXT: ; def v[4:7] 10971; GFX940-NEXT: ;;#ASMEND 10972; GFX940-NEXT: s_nop 0 10973; GFX940-NEXT: v_perm_b32 v0, v6, v3, s2 10974; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 10975; GFX940-NEXT: s_waitcnt vmcnt(0) 10976; GFX940-NEXT: s_setpc_b64 s[30:31] 10977 %vec0 = call <8 x i16> asm "; def $0", "=v"() 10978 %vec1 = call <8 x i16> asm "; def $0", "=v"() 10979 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 12> 10980 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 10981 ret void 10982} 10983 10984define void @v_shuffle_v2i16_v8i16__7_12(ptr addrspace(1) inreg %ptr) { 10985; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_12: 10986; GFX900: ; %bb.0: 10987; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10988; GFX900-NEXT: ;;#ASMSTART 10989; GFX900-NEXT: ; def v[0:3] 10990; GFX900-NEXT: ;;#ASMEND 10991; GFX900-NEXT: v_mov_b32_e32 v8, 0 10992; GFX900-NEXT: ;;#ASMSTART 10993; GFX900-NEXT: ; def v[4:7] 10994; GFX900-NEXT: ;;#ASMEND 10995; GFX900-NEXT: v_alignbit_b32 v0, v6, v3, 16 10996; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 10997; GFX900-NEXT: s_waitcnt vmcnt(0) 10998; GFX900-NEXT: s_setpc_b64 s[30:31] 10999; 11000; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_12: 11001; GFX90A: ; %bb.0: 11002; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11003; GFX90A-NEXT: ;;#ASMSTART 11004; GFX90A-NEXT: ; def v[0:3] 11005; GFX90A-NEXT: ;;#ASMEND 11006; GFX90A-NEXT: v_mov_b32_e32 v8, 0 11007; GFX90A-NEXT: ;;#ASMSTART 11008; GFX90A-NEXT: ; def v[4:7] 11009; GFX90A-NEXT: ;;#ASMEND 11010; GFX90A-NEXT: v_alignbit_b32 v0, v6, v3, 16 11011; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 11012; GFX90A-NEXT: s_waitcnt vmcnt(0) 11013; GFX90A-NEXT: s_setpc_b64 s[30:31] 11014; 11015; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_12: 11016; GFX940: ; %bb.0: 11017; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11018; GFX940-NEXT: ;;#ASMSTART 11019; GFX940-NEXT: ; def v[0:3] 11020; GFX940-NEXT: ;;#ASMEND 11021; GFX940-NEXT: v_mov_b32_e32 v8, 0 11022; GFX940-NEXT: ;;#ASMSTART 11023; GFX940-NEXT: ; def v[4:7] 11024; GFX940-NEXT: ;;#ASMEND 11025; GFX940-NEXT: s_nop 0 11026; GFX940-NEXT: v_alignbit_b32 v0, v6, v3, 16 11027; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 11028; GFX940-NEXT: s_waitcnt vmcnt(0) 11029; GFX940-NEXT: s_setpc_b64 s[30:31] 11030 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11031 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11032 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 12> 11033 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11034 ret void 11035} 11036 11037define void @v_shuffle_v2i16_v8i16__8_12(ptr addrspace(1) inreg %ptr) { 11038; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_12: 11039; GFX900: ; %bb.0: 11040; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11041; GFX900-NEXT: ;;#ASMSTART 11042; GFX900-NEXT: ; def v[0:3] 11043; GFX900-NEXT: ;;#ASMEND 11044; GFX900-NEXT: s_mov_b32 s4, 0x5040100 11045; GFX900-NEXT: v_mov_b32_e32 v4, 0 11046; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 11047; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 11048; GFX900-NEXT: s_waitcnt vmcnt(0) 11049; GFX900-NEXT: s_setpc_b64 s[30:31] 11050; 11051; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_12: 11052; GFX90A: ; %bb.0: 11053; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11054; GFX90A-NEXT: ;;#ASMSTART 11055; GFX90A-NEXT: ; def v[0:3] 11056; GFX90A-NEXT: ;;#ASMEND 11057; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 11058; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11059; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 11060; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 11061; GFX90A-NEXT: s_waitcnt vmcnt(0) 11062; GFX90A-NEXT: s_setpc_b64 s[30:31] 11063; 11064; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_12: 11065; GFX940: ; %bb.0: 11066; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11067; GFX940-NEXT: ;;#ASMSTART 11068; GFX940-NEXT: ; def v[0:3] 11069; GFX940-NEXT: ;;#ASMEND 11070; GFX940-NEXT: s_mov_b32 s2, 0x5040100 11071; GFX940-NEXT: v_mov_b32_e32 v4, 0 11072; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 11073; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 11074; GFX940-NEXT: s_waitcnt vmcnt(0) 11075; GFX940-NEXT: s_setpc_b64 s[30:31] 11076 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11077 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11078 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 12> 11079 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11080 ret void 11081} 11082 11083define void @v_shuffle_v2i16_v8i16__9_12(ptr addrspace(1) inreg %ptr) { 11084; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_12: 11085; GFX900: ; %bb.0: 11086; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11087; GFX900-NEXT: ;;#ASMSTART 11088; GFX900-NEXT: ; def v[0:3] 11089; GFX900-NEXT: ;;#ASMEND 11090; GFX900-NEXT: v_mov_b32_e32 v4, 0 11091; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16 11092; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 11093; GFX900-NEXT: s_waitcnt vmcnt(0) 11094; GFX900-NEXT: s_setpc_b64 s[30:31] 11095; 11096; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_12: 11097; GFX90A: ; %bb.0: 11098; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11099; GFX90A-NEXT: ;;#ASMSTART 11100; GFX90A-NEXT: ; def v[0:3] 11101; GFX90A-NEXT: ;;#ASMEND 11102; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11103; GFX90A-NEXT: v_alignbit_b32 v0, v2, v0, 16 11104; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 11105; GFX90A-NEXT: s_waitcnt vmcnt(0) 11106; GFX90A-NEXT: s_setpc_b64 s[30:31] 11107; 11108; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_12: 11109; GFX940: ; %bb.0: 11110; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11111; GFX940-NEXT: ;;#ASMSTART 11112; GFX940-NEXT: ; def v[0:3] 11113; GFX940-NEXT: ;;#ASMEND 11114; GFX940-NEXT: v_mov_b32_e32 v4, 0 11115; GFX940-NEXT: v_alignbit_b32 v0, v2, v0, 16 11116; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 11117; GFX940-NEXT: s_waitcnt vmcnt(0) 11118; GFX940-NEXT: s_setpc_b64 s[30:31] 11119 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11120 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11121 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 12> 11122 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11123 ret void 11124} 11125 11126define void @v_shuffle_v2i16_v8i16__10_12(ptr addrspace(1) inreg %ptr) { 11127; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_12: 11128; GFX900: ; %bb.0: 11129; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11130; GFX900-NEXT: ;;#ASMSTART 11131; GFX900-NEXT: ; def v[0:3] 11132; GFX900-NEXT: ;;#ASMEND 11133; GFX900-NEXT: s_mov_b32 s4, 0x5040100 11134; GFX900-NEXT: v_mov_b32_e32 v4, 0 11135; GFX900-NEXT: v_perm_b32 v0, v2, v1, s4 11136; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 11137; GFX900-NEXT: s_waitcnt vmcnt(0) 11138; GFX900-NEXT: s_setpc_b64 s[30:31] 11139; 11140; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_12: 11141; GFX90A: ; %bb.0: 11142; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11143; GFX90A-NEXT: ;;#ASMSTART 11144; GFX90A-NEXT: ; def v[0:3] 11145; GFX90A-NEXT: ;;#ASMEND 11146; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 11147; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11148; GFX90A-NEXT: v_perm_b32 v0, v2, v1, s4 11149; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 11150; GFX90A-NEXT: s_waitcnt vmcnt(0) 11151; GFX90A-NEXT: s_setpc_b64 s[30:31] 11152; 11153; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_12: 11154; GFX940: ; %bb.0: 11155; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11156; GFX940-NEXT: ;;#ASMSTART 11157; GFX940-NEXT: ; def v[0:3] 11158; GFX940-NEXT: ;;#ASMEND 11159; GFX940-NEXT: s_mov_b32 s2, 0x5040100 11160; GFX940-NEXT: v_mov_b32_e32 v4, 0 11161; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 11162; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 11163; GFX940-NEXT: s_waitcnt vmcnt(0) 11164; GFX940-NEXT: s_setpc_b64 s[30:31] 11165 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11166 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11167 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 12> 11168 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11169 ret void 11170} 11171 11172define void @v_shuffle_v2i16_v8i16__11_12(ptr addrspace(1) inreg %ptr) { 11173; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_12: 11174; GFX900: ; %bb.0: 11175; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11176; GFX900-NEXT: ;;#ASMSTART 11177; GFX900-NEXT: ; def v[0:3] 11178; GFX900-NEXT: ;;#ASMEND 11179; GFX900-NEXT: v_mov_b32_e32 v4, 0 11180; GFX900-NEXT: v_alignbit_b32 v0, v2, v1, 16 11181; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 11182; GFX900-NEXT: s_waitcnt vmcnt(0) 11183; GFX900-NEXT: s_setpc_b64 s[30:31] 11184; 11185; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_12: 11186; GFX90A: ; %bb.0: 11187; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11188; GFX90A-NEXT: ;;#ASMSTART 11189; GFX90A-NEXT: ; def v[0:3] 11190; GFX90A-NEXT: ;;#ASMEND 11191; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11192; GFX90A-NEXT: v_alignbit_b32 v0, v2, v1, 16 11193; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 11194; GFX90A-NEXT: s_waitcnt vmcnt(0) 11195; GFX90A-NEXT: s_setpc_b64 s[30:31] 11196; 11197; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_12: 11198; GFX940: ; %bb.0: 11199; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11200; GFX940-NEXT: ;;#ASMSTART 11201; GFX940-NEXT: ; def v[0:3] 11202; GFX940-NEXT: ;;#ASMEND 11203; GFX940-NEXT: v_mov_b32_e32 v4, 0 11204; GFX940-NEXT: v_alignbit_b32 v0, v2, v1, 16 11205; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 11206; GFX940-NEXT: s_waitcnt vmcnt(0) 11207; GFX940-NEXT: s_setpc_b64 s[30:31] 11208 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11209 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11210 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 12> 11211 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11212 ret void 11213} 11214 11215define void @v_shuffle_v2i16_v8i16__12_12(ptr addrspace(1) inreg %ptr) { 11216; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_12: 11217; GFX900: ; %bb.0: 11218; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11219; GFX900-NEXT: ;;#ASMSTART 11220; GFX900-NEXT: ; def v[0:3] 11221; GFX900-NEXT: ;;#ASMEND 11222; GFX900-NEXT: s_mov_b32 s4, 0x5040100 11223; GFX900-NEXT: v_mov_b32_e32 v4, 0 11224; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 11225; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 11226; GFX900-NEXT: s_waitcnt vmcnt(0) 11227; GFX900-NEXT: s_setpc_b64 s[30:31] 11228; 11229; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_12: 11230; GFX90A: ; %bb.0: 11231; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11232; GFX90A-NEXT: ;;#ASMSTART 11233; GFX90A-NEXT: ; def v[0:3] 11234; GFX90A-NEXT: ;;#ASMEND 11235; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 11236; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11237; GFX90A-NEXT: v_perm_b32 v0, v2, v2, s4 11238; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 11239; GFX90A-NEXT: s_waitcnt vmcnt(0) 11240; GFX90A-NEXT: s_setpc_b64 s[30:31] 11241; 11242; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_12: 11243; GFX940: ; %bb.0: 11244; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11245; GFX940-NEXT: ;;#ASMSTART 11246; GFX940-NEXT: ; def v[0:3] 11247; GFX940-NEXT: ;;#ASMEND 11248; GFX940-NEXT: s_mov_b32 s2, 0x5040100 11249; GFX940-NEXT: v_mov_b32_e32 v4, 0 11250; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 11251; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 11252; GFX940-NEXT: s_waitcnt vmcnt(0) 11253; GFX940-NEXT: s_setpc_b64 s[30:31] 11254 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11255 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11256 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 12> 11257 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11258 ret void 11259} 11260 11261define void @v_shuffle_v2i16_v8i16__13_12(ptr addrspace(1) inreg %ptr) { 11262; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_12: 11263; GFX900: ; %bb.0: 11264; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11265; GFX900-NEXT: ;;#ASMSTART 11266; GFX900-NEXT: ; def v[0:3] 11267; GFX900-NEXT: ;;#ASMEND 11268; GFX900-NEXT: v_mov_b32_e32 v4, 0 11269; GFX900-NEXT: v_alignbit_b32 v0, v2, v2, 16 11270; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 11271; GFX900-NEXT: s_waitcnt vmcnt(0) 11272; GFX900-NEXT: s_setpc_b64 s[30:31] 11273; 11274; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_12: 11275; GFX90A: ; %bb.0: 11276; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11277; GFX90A-NEXT: ;;#ASMSTART 11278; GFX90A-NEXT: ; def v[0:3] 11279; GFX90A-NEXT: ;;#ASMEND 11280; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11281; GFX90A-NEXT: v_alignbit_b32 v0, v2, v2, 16 11282; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 11283; GFX90A-NEXT: s_waitcnt vmcnt(0) 11284; GFX90A-NEXT: s_setpc_b64 s[30:31] 11285; 11286; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_12: 11287; GFX940: ; %bb.0: 11288; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11289; GFX940-NEXT: ;;#ASMSTART 11290; GFX940-NEXT: ; def v[0:3] 11291; GFX940-NEXT: ;;#ASMEND 11292; GFX940-NEXT: v_mov_b32_e32 v4, 0 11293; GFX940-NEXT: v_alignbit_b32 v0, v2, v2, 16 11294; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 11295; GFX940-NEXT: s_waitcnt vmcnt(0) 11296; GFX940-NEXT: s_setpc_b64 s[30:31] 11297 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11298 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11299 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 12> 11300 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11301 ret void 11302} 11303 11304define void @v_shuffle_v2i16_v8i16__14_12(ptr addrspace(1) inreg %ptr) { 11305; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_12: 11306; GFX900: ; %bb.0: 11307; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11308; GFX900-NEXT: ;;#ASMSTART 11309; GFX900-NEXT: ; def v[0:3] 11310; GFX900-NEXT: ;;#ASMEND 11311; GFX900-NEXT: s_mov_b32 s4, 0x5040100 11312; GFX900-NEXT: v_mov_b32_e32 v4, 0 11313; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 11314; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 11315; GFX900-NEXT: s_waitcnt vmcnt(0) 11316; GFX900-NEXT: s_setpc_b64 s[30:31] 11317; 11318; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_12: 11319; GFX90A: ; %bb.0: 11320; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11321; GFX90A-NEXT: ;;#ASMSTART 11322; GFX90A-NEXT: ; def v[0:3] 11323; GFX90A-NEXT: ;;#ASMEND 11324; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 11325; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11326; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 11327; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 11328; GFX90A-NEXT: s_waitcnt vmcnt(0) 11329; GFX90A-NEXT: s_setpc_b64 s[30:31] 11330; 11331; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_12: 11332; GFX940: ; %bb.0: 11333; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11334; GFX940-NEXT: ;;#ASMSTART 11335; GFX940-NEXT: ; def v[0:3] 11336; GFX940-NEXT: ;;#ASMEND 11337; GFX940-NEXT: s_mov_b32 s2, 0x5040100 11338; GFX940-NEXT: v_mov_b32_e32 v4, 0 11339; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 11340; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 11341; GFX940-NEXT: s_waitcnt vmcnt(0) 11342; GFX940-NEXT: s_setpc_b64 s[30:31] 11343 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11344 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11345 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 12> 11346 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11347 ret void 11348} 11349 11350define void @v_shuffle_v2i16_v8i16__u_13(ptr addrspace(1) inreg %ptr) { 11351; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_13: 11352; GFX900: ; %bb.0: 11353; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11354; GFX900-NEXT: v_mov_b32_e32 v4, 0 11355; GFX900-NEXT: ;;#ASMSTART 11356; GFX900-NEXT: ; def v[0:3] 11357; GFX900-NEXT: ;;#ASMEND 11358; GFX900-NEXT: global_store_dword v4, v2, s[16:17] 11359; GFX900-NEXT: s_waitcnt vmcnt(0) 11360; GFX900-NEXT: s_setpc_b64 s[30:31] 11361; 11362; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_13: 11363; GFX90A: ; %bb.0: 11364; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11365; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11366; GFX90A-NEXT: ;;#ASMSTART 11367; GFX90A-NEXT: ; def v[0:3] 11368; GFX90A-NEXT: ;;#ASMEND 11369; GFX90A-NEXT: global_store_dword v4, v2, s[16:17] 11370; GFX90A-NEXT: s_waitcnt vmcnt(0) 11371; GFX90A-NEXT: s_setpc_b64 s[30:31] 11372; 11373; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_13: 11374; GFX940: ; %bb.0: 11375; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11376; GFX940-NEXT: v_mov_b32_e32 v4, 0 11377; GFX940-NEXT: ;;#ASMSTART 11378; GFX940-NEXT: ; def v[0:3] 11379; GFX940-NEXT: ;;#ASMEND 11380; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 11381; GFX940-NEXT: s_waitcnt vmcnt(0) 11382; GFX940-NEXT: s_setpc_b64 s[30:31] 11383 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11384 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11385 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 13> 11386 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11387 ret void 11388} 11389 11390define void @v_shuffle_v2i16_v8i16__0_13(ptr addrspace(1) inreg %ptr) { 11391; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_13: 11392; GFX900: ; %bb.0: 11393; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11394; GFX900-NEXT: ;;#ASMSTART 11395; GFX900-NEXT: ; def v[0:3] 11396; GFX900-NEXT: ;;#ASMEND 11397; GFX900-NEXT: s_mov_b32 s4, 0xffff 11398; GFX900-NEXT: v_mov_b32_e32 v5, 0 11399; GFX900-NEXT: ;;#ASMSTART 11400; GFX900-NEXT: ; def v[1:4] 11401; GFX900-NEXT: ;;#ASMEND 11402; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v3 11403; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 11404; GFX900-NEXT: s_waitcnt vmcnt(0) 11405; GFX900-NEXT: s_setpc_b64 s[30:31] 11406; 11407; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_13: 11408; GFX90A: ; %bb.0: 11409; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11410; GFX90A-NEXT: ;;#ASMSTART 11411; GFX90A-NEXT: ; def v[0:3] 11412; GFX90A-NEXT: ;;#ASMEND 11413; GFX90A-NEXT: s_mov_b32 s4, 0xffff 11414; GFX90A-NEXT: v_mov_b32_e32 v6, 0 11415; GFX90A-NEXT: ;;#ASMSTART 11416; GFX90A-NEXT: ; def v[2:5] 11417; GFX90A-NEXT: ;;#ASMEND 11418; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v4 11419; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 11420; GFX90A-NEXT: s_waitcnt vmcnt(0) 11421; GFX90A-NEXT: s_setpc_b64 s[30:31] 11422; 11423; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_13: 11424; GFX940: ; %bb.0: 11425; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11426; GFX940-NEXT: ;;#ASMSTART 11427; GFX940-NEXT: ; def v[0:3] 11428; GFX940-NEXT: ;;#ASMEND 11429; GFX940-NEXT: s_mov_b32 s2, 0xffff 11430; GFX940-NEXT: v_mov_b32_e32 v6, 0 11431; GFX940-NEXT: ;;#ASMSTART 11432; GFX940-NEXT: ; def v[2:5] 11433; GFX940-NEXT: ;;#ASMEND 11434; GFX940-NEXT: s_nop 0 11435; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v4 11436; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 11437; GFX940-NEXT: s_waitcnt vmcnt(0) 11438; GFX940-NEXT: s_setpc_b64 s[30:31] 11439 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11440 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11441 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 13> 11442 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11443 ret void 11444} 11445 11446define void @v_shuffle_v2i16_v8i16__1_13(ptr addrspace(1) inreg %ptr) { 11447; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_13: 11448; GFX900: ; %bb.0: 11449; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11450; GFX900-NEXT: ;;#ASMSTART 11451; GFX900-NEXT: ; def v[0:3] 11452; GFX900-NEXT: ;;#ASMEND 11453; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11454; GFX900-NEXT: v_mov_b32_e32 v5, 0 11455; GFX900-NEXT: ;;#ASMSTART 11456; GFX900-NEXT: ; def v[1:4] 11457; GFX900-NEXT: ;;#ASMEND 11458; GFX900-NEXT: v_perm_b32 v0, v3, v0, s4 11459; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 11460; GFX900-NEXT: s_waitcnt vmcnt(0) 11461; GFX900-NEXT: s_setpc_b64 s[30:31] 11462; 11463; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_13: 11464; GFX90A: ; %bb.0: 11465; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11466; GFX90A-NEXT: ;;#ASMSTART 11467; GFX90A-NEXT: ; def v[0:3] 11468; GFX90A-NEXT: ;;#ASMEND 11469; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11470; GFX90A-NEXT: v_mov_b32_e32 v6, 0 11471; GFX90A-NEXT: ;;#ASMSTART 11472; GFX90A-NEXT: ; def v[2:5] 11473; GFX90A-NEXT: ;;#ASMEND 11474; GFX90A-NEXT: v_perm_b32 v0, v4, v0, s4 11475; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 11476; GFX90A-NEXT: s_waitcnt vmcnt(0) 11477; GFX90A-NEXT: s_setpc_b64 s[30:31] 11478; 11479; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_13: 11480; GFX940: ; %bb.0: 11481; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11482; GFX940-NEXT: ;;#ASMSTART 11483; GFX940-NEXT: ; def v[0:3] 11484; GFX940-NEXT: ;;#ASMEND 11485; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11486; GFX940-NEXT: v_mov_b32_e32 v6, 0 11487; GFX940-NEXT: ;;#ASMSTART 11488; GFX940-NEXT: ; def v[2:5] 11489; GFX940-NEXT: ;;#ASMEND 11490; GFX940-NEXT: s_nop 0 11491; GFX940-NEXT: v_perm_b32 v0, v4, v0, s2 11492; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 11493; GFX940-NEXT: s_waitcnt vmcnt(0) 11494; GFX940-NEXT: s_setpc_b64 s[30:31] 11495 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11496 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11497 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 13> 11498 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11499 ret void 11500} 11501 11502define void @v_shuffle_v2i16_v8i16__2_13(ptr addrspace(1) inreg %ptr) { 11503; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_13: 11504; GFX900: ; %bb.0: 11505; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11506; GFX900-NEXT: ;;#ASMSTART 11507; GFX900-NEXT: ; def v[0:3] 11508; GFX900-NEXT: ;;#ASMEND 11509; GFX900-NEXT: s_mov_b32 s4, 0xffff 11510; GFX900-NEXT: v_mov_b32_e32 v6, 0 11511; GFX900-NEXT: ;;#ASMSTART 11512; GFX900-NEXT: ; def v[2:5] 11513; GFX900-NEXT: ;;#ASMEND 11514; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v4 11515; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 11516; GFX900-NEXT: s_waitcnt vmcnt(0) 11517; GFX900-NEXT: s_setpc_b64 s[30:31] 11518; 11519; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_13: 11520; GFX90A: ; %bb.0: 11521; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11522; GFX90A-NEXT: ;;#ASMSTART 11523; GFX90A-NEXT: ; def v[0:3] 11524; GFX90A-NEXT: ;;#ASMEND 11525; GFX90A-NEXT: s_mov_b32 s4, 0xffff 11526; GFX90A-NEXT: v_mov_b32_e32 v6, 0 11527; GFX90A-NEXT: ;;#ASMSTART 11528; GFX90A-NEXT: ; def v[2:5] 11529; GFX90A-NEXT: ;;#ASMEND 11530; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v4 11531; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 11532; GFX90A-NEXT: s_waitcnt vmcnt(0) 11533; GFX90A-NEXT: s_setpc_b64 s[30:31] 11534; 11535; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_13: 11536; GFX940: ; %bb.0: 11537; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11538; GFX940-NEXT: ;;#ASMSTART 11539; GFX940-NEXT: ; def v[0:3] 11540; GFX940-NEXT: ;;#ASMEND 11541; GFX940-NEXT: s_mov_b32 s2, 0xffff 11542; GFX940-NEXT: v_mov_b32_e32 v6, 0 11543; GFX940-NEXT: ;;#ASMSTART 11544; GFX940-NEXT: ; def v[2:5] 11545; GFX940-NEXT: ;;#ASMEND 11546; GFX940-NEXT: s_nop 0 11547; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v4 11548; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 11549; GFX940-NEXT: s_waitcnt vmcnt(0) 11550; GFX940-NEXT: s_setpc_b64 s[30:31] 11551 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11552 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11553 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 13> 11554 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11555 ret void 11556} 11557 11558define void @v_shuffle_v2i16_v8i16__3_13(ptr addrspace(1) inreg %ptr) { 11559; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_13: 11560; GFX900: ; %bb.0: 11561; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11562; GFX900-NEXT: ;;#ASMSTART 11563; GFX900-NEXT: ; def v[0:3] 11564; GFX900-NEXT: ;;#ASMEND 11565; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11566; GFX900-NEXT: v_mov_b32_e32 v6, 0 11567; GFX900-NEXT: ;;#ASMSTART 11568; GFX900-NEXT: ; def v[2:5] 11569; GFX900-NEXT: ;;#ASMEND 11570; GFX900-NEXT: v_perm_b32 v0, v4, v1, s4 11571; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 11572; GFX900-NEXT: s_waitcnt vmcnt(0) 11573; GFX900-NEXT: s_setpc_b64 s[30:31] 11574; 11575; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_13: 11576; GFX90A: ; %bb.0: 11577; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11578; GFX90A-NEXT: ;;#ASMSTART 11579; GFX90A-NEXT: ; def v[0:3] 11580; GFX90A-NEXT: ;;#ASMEND 11581; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11582; GFX90A-NEXT: v_mov_b32_e32 v6, 0 11583; GFX90A-NEXT: ;;#ASMSTART 11584; GFX90A-NEXT: ; def v[2:5] 11585; GFX90A-NEXT: ;;#ASMEND 11586; GFX90A-NEXT: v_perm_b32 v0, v4, v1, s4 11587; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 11588; GFX90A-NEXT: s_waitcnt vmcnt(0) 11589; GFX90A-NEXT: s_setpc_b64 s[30:31] 11590; 11591; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_13: 11592; GFX940: ; %bb.0: 11593; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11594; GFX940-NEXT: ;;#ASMSTART 11595; GFX940-NEXT: ; def v[0:3] 11596; GFX940-NEXT: ;;#ASMEND 11597; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11598; GFX940-NEXT: v_mov_b32_e32 v6, 0 11599; GFX940-NEXT: ;;#ASMSTART 11600; GFX940-NEXT: ; def v[2:5] 11601; GFX940-NEXT: ;;#ASMEND 11602; GFX940-NEXT: s_nop 0 11603; GFX940-NEXT: v_perm_b32 v0, v4, v1, s2 11604; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 11605; GFX940-NEXT: s_waitcnt vmcnt(0) 11606; GFX940-NEXT: s_setpc_b64 s[30:31] 11607 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11608 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11609 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 13> 11610 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11611 ret void 11612} 11613 11614define void @v_shuffle_v2i16_v8i16__4_13(ptr addrspace(1) inreg %ptr) { 11615; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_13: 11616; GFX900: ; %bb.0: 11617; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11618; GFX900-NEXT: ;;#ASMSTART 11619; GFX900-NEXT: ; def v[0:3] 11620; GFX900-NEXT: ;;#ASMEND 11621; GFX900-NEXT: s_mov_b32 s4, 0xffff 11622; GFX900-NEXT: v_mov_b32_e32 v7, 0 11623; GFX900-NEXT: ;;#ASMSTART 11624; GFX900-NEXT: ; def v[3:6] 11625; GFX900-NEXT: ;;#ASMEND 11626; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v5 11627; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 11628; GFX900-NEXT: s_waitcnt vmcnt(0) 11629; GFX900-NEXT: s_setpc_b64 s[30:31] 11630; 11631; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_13: 11632; GFX90A: ; %bb.0: 11633; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11634; GFX90A-NEXT: ;;#ASMSTART 11635; GFX90A-NEXT: ; def v[0:3] 11636; GFX90A-NEXT: ;;#ASMEND 11637; GFX90A-NEXT: s_mov_b32 s4, 0xffff 11638; GFX90A-NEXT: v_mov_b32_e32 v8, 0 11639; GFX90A-NEXT: ;;#ASMSTART 11640; GFX90A-NEXT: ; def v[4:7] 11641; GFX90A-NEXT: ;;#ASMEND 11642; GFX90A-NEXT: v_bfi_b32 v0, s4, v2, v6 11643; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 11644; GFX90A-NEXT: s_waitcnt vmcnt(0) 11645; GFX90A-NEXT: s_setpc_b64 s[30:31] 11646; 11647; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_13: 11648; GFX940: ; %bb.0: 11649; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11650; GFX940-NEXT: ;;#ASMSTART 11651; GFX940-NEXT: ; def v[0:3] 11652; GFX940-NEXT: ;;#ASMEND 11653; GFX940-NEXT: s_mov_b32 s2, 0xffff 11654; GFX940-NEXT: v_mov_b32_e32 v8, 0 11655; GFX940-NEXT: ;;#ASMSTART 11656; GFX940-NEXT: ; def v[4:7] 11657; GFX940-NEXT: ;;#ASMEND 11658; GFX940-NEXT: s_nop 0 11659; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v6 11660; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 11661; GFX940-NEXT: s_waitcnt vmcnt(0) 11662; GFX940-NEXT: s_setpc_b64 s[30:31] 11663 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11664 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11665 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 13> 11666 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11667 ret void 11668} 11669 11670define void @v_shuffle_v2i16_v8i16__5_13(ptr addrspace(1) inreg %ptr) { 11671; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_13: 11672; GFX900: ; %bb.0: 11673; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11674; GFX900-NEXT: ;;#ASMSTART 11675; GFX900-NEXT: ; def v[0:3] 11676; GFX900-NEXT: ;;#ASMEND 11677; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11678; GFX900-NEXT: v_mov_b32_e32 v7, 0 11679; GFX900-NEXT: ;;#ASMSTART 11680; GFX900-NEXT: ; def v[3:6] 11681; GFX900-NEXT: ;;#ASMEND 11682; GFX900-NEXT: v_perm_b32 v0, v5, v2, s4 11683; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 11684; GFX900-NEXT: s_waitcnt vmcnt(0) 11685; GFX900-NEXT: s_setpc_b64 s[30:31] 11686; 11687; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_13: 11688; GFX90A: ; %bb.0: 11689; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11690; GFX90A-NEXT: ;;#ASMSTART 11691; GFX90A-NEXT: ; def v[0:3] 11692; GFX90A-NEXT: ;;#ASMEND 11693; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11694; GFX90A-NEXT: v_mov_b32_e32 v8, 0 11695; GFX90A-NEXT: ;;#ASMSTART 11696; GFX90A-NEXT: ; def v[4:7] 11697; GFX90A-NEXT: ;;#ASMEND 11698; GFX90A-NEXT: v_perm_b32 v0, v6, v2, s4 11699; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 11700; GFX90A-NEXT: s_waitcnt vmcnt(0) 11701; GFX90A-NEXT: s_setpc_b64 s[30:31] 11702; 11703; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_13: 11704; GFX940: ; %bb.0: 11705; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11706; GFX940-NEXT: ;;#ASMSTART 11707; GFX940-NEXT: ; def v[0:3] 11708; GFX940-NEXT: ;;#ASMEND 11709; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11710; GFX940-NEXT: v_mov_b32_e32 v8, 0 11711; GFX940-NEXT: ;;#ASMSTART 11712; GFX940-NEXT: ; def v[4:7] 11713; GFX940-NEXT: ;;#ASMEND 11714; GFX940-NEXT: s_nop 0 11715; GFX940-NEXT: v_perm_b32 v0, v6, v2, s2 11716; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 11717; GFX940-NEXT: s_waitcnt vmcnt(0) 11718; GFX940-NEXT: s_setpc_b64 s[30:31] 11719 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11720 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11721 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 13> 11722 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11723 ret void 11724} 11725 11726define void @v_shuffle_v2i16_v8i16__6_13(ptr addrspace(1) inreg %ptr) { 11727; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_13: 11728; GFX900: ; %bb.0: 11729; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11730; GFX900-NEXT: ;;#ASMSTART 11731; GFX900-NEXT: ; def v[0:3] 11732; GFX900-NEXT: ;;#ASMEND 11733; GFX900-NEXT: s_mov_b32 s4, 0xffff 11734; GFX900-NEXT: v_mov_b32_e32 v8, 0 11735; GFX900-NEXT: ;;#ASMSTART 11736; GFX900-NEXT: ; def v[4:7] 11737; GFX900-NEXT: ;;#ASMEND 11738; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v6 11739; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 11740; GFX900-NEXT: s_waitcnt vmcnt(0) 11741; GFX900-NEXT: s_setpc_b64 s[30:31] 11742; 11743; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_13: 11744; GFX90A: ; %bb.0: 11745; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11746; GFX90A-NEXT: ;;#ASMSTART 11747; GFX90A-NEXT: ; def v[0:3] 11748; GFX90A-NEXT: ;;#ASMEND 11749; GFX90A-NEXT: s_mov_b32 s4, 0xffff 11750; GFX90A-NEXT: v_mov_b32_e32 v8, 0 11751; GFX90A-NEXT: ;;#ASMSTART 11752; GFX90A-NEXT: ; def v[4:7] 11753; GFX90A-NEXT: ;;#ASMEND 11754; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v6 11755; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 11756; GFX90A-NEXT: s_waitcnt vmcnt(0) 11757; GFX90A-NEXT: s_setpc_b64 s[30:31] 11758; 11759; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_13: 11760; GFX940: ; %bb.0: 11761; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11762; GFX940-NEXT: ;;#ASMSTART 11763; GFX940-NEXT: ; def v[0:3] 11764; GFX940-NEXT: ;;#ASMEND 11765; GFX940-NEXT: s_mov_b32 s2, 0xffff 11766; GFX940-NEXT: v_mov_b32_e32 v8, 0 11767; GFX940-NEXT: ;;#ASMSTART 11768; GFX940-NEXT: ; def v[4:7] 11769; GFX940-NEXT: ;;#ASMEND 11770; GFX940-NEXT: s_nop 0 11771; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v6 11772; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 11773; GFX940-NEXT: s_waitcnt vmcnt(0) 11774; GFX940-NEXT: s_setpc_b64 s[30:31] 11775 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11776 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11777 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 13> 11778 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11779 ret void 11780} 11781 11782define void @v_shuffle_v2i16_v8i16__7_13(ptr addrspace(1) inreg %ptr) { 11783; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_13: 11784; GFX900: ; %bb.0: 11785; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11786; GFX900-NEXT: ;;#ASMSTART 11787; GFX900-NEXT: ; def v[0:3] 11788; GFX900-NEXT: ;;#ASMEND 11789; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11790; GFX900-NEXT: v_mov_b32_e32 v8, 0 11791; GFX900-NEXT: ;;#ASMSTART 11792; GFX900-NEXT: ; def v[4:7] 11793; GFX900-NEXT: ;;#ASMEND 11794; GFX900-NEXT: v_perm_b32 v0, v6, v3, s4 11795; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 11796; GFX900-NEXT: s_waitcnt vmcnt(0) 11797; GFX900-NEXT: s_setpc_b64 s[30:31] 11798; 11799; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_13: 11800; GFX90A: ; %bb.0: 11801; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11802; GFX90A-NEXT: ;;#ASMSTART 11803; GFX90A-NEXT: ; def v[0:3] 11804; GFX90A-NEXT: ;;#ASMEND 11805; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11806; GFX90A-NEXT: v_mov_b32_e32 v8, 0 11807; GFX90A-NEXT: ;;#ASMSTART 11808; GFX90A-NEXT: ; def v[4:7] 11809; GFX90A-NEXT: ;;#ASMEND 11810; GFX90A-NEXT: v_perm_b32 v0, v6, v3, s4 11811; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 11812; GFX90A-NEXT: s_waitcnt vmcnt(0) 11813; GFX90A-NEXT: s_setpc_b64 s[30:31] 11814; 11815; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_13: 11816; GFX940: ; %bb.0: 11817; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11818; GFX940-NEXT: ;;#ASMSTART 11819; GFX940-NEXT: ; def v[0:3] 11820; GFX940-NEXT: ;;#ASMEND 11821; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11822; GFX940-NEXT: v_mov_b32_e32 v8, 0 11823; GFX940-NEXT: ;;#ASMSTART 11824; GFX940-NEXT: ; def v[4:7] 11825; GFX940-NEXT: ;;#ASMEND 11826; GFX940-NEXT: s_nop 0 11827; GFX940-NEXT: v_perm_b32 v0, v6, v3, s2 11828; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 11829; GFX940-NEXT: s_waitcnt vmcnt(0) 11830; GFX940-NEXT: s_setpc_b64 s[30:31] 11831 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11832 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11833 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 13> 11834 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11835 ret void 11836} 11837 11838define void @v_shuffle_v2i16_v8i16__8_13(ptr addrspace(1) inreg %ptr) { 11839; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_13: 11840; GFX900: ; %bb.0: 11841; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11842; GFX900-NEXT: ;;#ASMSTART 11843; GFX900-NEXT: ; def v[0:3] 11844; GFX900-NEXT: ;;#ASMEND 11845; GFX900-NEXT: s_mov_b32 s4, 0xffff 11846; GFX900-NEXT: v_mov_b32_e32 v4, 0 11847; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v2 11848; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 11849; GFX900-NEXT: s_waitcnt vmcnt(0) 11850; GFX900-NEXT: s_setpc_b64 s[30:31] 11851; 11852; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_13: 11853; GFX90A: ; %bb.0: 11854; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11855; GFX90A-NEXT: ;;#ASMSTART 11856; GFX90A-NEXT: ; def v[0:3] 11857; GFX90A-NEXT: ;;#ASMEND 11858; GFX90A-NEXT: s_mov_b32 s4, 0xffff 11859; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11860; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2 11861; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 11862; GFX90A-NEXT: s_waitcnt vmcnt(0) 11863; GFX90A-NEXT: s_setpc_b64 s[30:31] 11864; 11865; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_13: 11866; GFX940: ; %bb.0: 11867; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11868; GFX940-NEXT: ;;#ASMSTART 11869; GFX940-NEXT: ; def v[0:3] 11870; GFX940-NEXT: ;;#ASMEND 11871; GFX940-NEXT: s_mov_b32 s2, 0xffff 11872; GFX940-NEXT: v_mov_b32_e32 v4, 0 11873; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 11874; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 11875; GFX940-NEXT: s_waitcnt vmcnt(0) 11876; GFX940-NEXT: s_setpc_b64 s[30:31] 11877 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11878 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11879 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 13> 11880 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11881 ret void 11882} 11883 11884define void @v_shuffle_v2i16_v8i16__9_13(ptr addrspace(1) inreg %ptr) { 11885; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_13: 11886; GFX900: ; %bb.0: 11887; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11888; GFX900-NEXT: ;;#ASMSTART 11889; GFX900-NEXT: ; def v[0:3] 11890; GFX900-NEXT: ;;#ASMEND 11891; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11892; GFX900-NEXT: v_mov_b32_e32 v4, 0 11893; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 11894; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 11895; GFX900-NEXT: s_waitcnt vmcnt(0) 11896; GFX900-NEXT: s_setpc_b64 s[30:31] 11897; 11898; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_13: 11899; GFX90A: ; %bb.0: 11900; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11901; GFX90A-NEXT: ;;#ASMSTART 11902; GFX90A-NEXT: ; def v[0:3] 11903; GFX90A-NEXT: ;;#ASMEND 11904; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11905; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11906; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 11907; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 11908; GFX90A-NEXT: s_waitcnt vmcnt(0) 11909; GFX90A-NEXT: s_setpc_b64 s[30:31] 11910; 11911; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_13: 11912; GFX940: ; %bb.0: 11913; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11914; GFX940-NEXT: ;;#ASMSTART 11915; GFX940-NEXT: ; def v[0:3] 11916; GFX940-NEXT: ;;#ASMEND 11917; GFX940-NEXT: s_mov_b32 s2, 0x7060302 11918; GFX940-NEXT: v_mov_b32_e32 v4, 0 11919; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 11920; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 11921; GFX940-NEXT: s_waitcnt vmcnt(0) 11922; GFX940-NEXT: s_setpc_b64 s[30:31] 11923 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11924 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11925 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 13> 11926 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11927 ret void 11928} 11929 11930define void @v_shuffle_v2i16_v8i16__10_13(ptr addrspace(1) inreg %ptr) { 11931; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_13: 11932; GFX900: ; %bb.0: 11933; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11934; GFX900-NEXT: ;;#ASMSTART 11935; GFX900-NEXT: ; def v[0:3] 11936; GFX900-NEXT: ;;#ASMEND 11937; GFX900-NEXT: s_mov_b32 s4, 0xffff 11938; GFX900-NEXT: v_mov_b32_e32 v4, 0 11939; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2 11940; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 11941; GFX900-NEXT: s_waitcnt vmcnt(0) 11942; GFX900-NEXT: s_setpc_b64 s[30:31] 11943; 11944; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_13: 11945; GFX90A: ; %bb.0: 11946; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11947; GFX90A-NEXT: ;;#ASMSTART 11948; GFX90A-NEXT: ; def v[0:3] 11949; GFX90A-NEXT: ;;#ASMEND 11950; GFX90A-NEXT: s_mov_b32 s4, 0xffff 11951; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11952; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2 11953; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 11954; GFX90A-NEXT: s_waitcnt vmcnt(0) 11955; GFX90A-NEXT: s_setpc_b64 s[30:31] 11956; 11957; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_13: 11958; GFX940: ; %bb.0: 11959; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11960; GFX940-NEXT: ;;#ASMSTART 11961; GFX940-NEXT: ; def v[0:3] 11962; GFX940-NEXT: ;;#ASMEND 11963; GFX940-NEXT: s_mov_b32 s2, 0xffff 11964; GFX940-NEXT: v_mov_b32_e32 v4, 0 11965; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 11966; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 11967; GFX940-NEXT: s_waitcnt vmcnt(0) 11968; GFX940-NEXT: s_setpc_b64 s[30:31] 11969 %vec0 = call <8 x i16> asm "; def $0", "=v"() 11970 %vec1 = call <8 x i16> asm "; def $0", "=v"() 11971 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 13> 11972 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 11973 ret void 11974} 11975 11976define void @v_shuffle_v2i16_v8i16__11_13(ptr addrspace(1) inreg %ptr) { 11977; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_13: 11978; GFX900: ; %bb.0: 11979; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11980; GFX900-NEXT: ;;#ASMSTART 11981; GFX900-NEXT: ; def v[0:3] 11982; GFX900-NEXT: ;;#ASMEND 11983; GFX900-NEXT: s_mov_b32 s4, 0x7060302 11984; GFX900-NEXT: v_mov_b32_e32 v4, 0 11985; GFX900-NEXT: v_perm_b32 v0, v2, v1, s4 11986; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 11987; GFX900-NEXT: s_waitcnt vmcnt(0) 11988; GFX900-NEXT: s_setpc_b64 s[30:31] 11989; 11990; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_13: 11991; GFX90A: ; %bb.0: 11992; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11993; GFX90A-NEXT: ;;#ASMSTART 11994; GFX90A-NEXT: ; def v[0:3] 11995; GFX90A-NEXT: ;;#ASMEND 11996; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 11997; GFX90A-NEXT: v_mov_b32_e32 v4, 0 11998; GFX90A-NEXT: v_perm_b32 v0, v2, v1, s4 11999; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 12000; GFX90A-NEXT: s_waitcnt vmcnt(0) 12001; GFX90A-NEXT: s_setpc_b64 s[30:31] 12002; 12003; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_13: 12004; GFX940: ; %bb.0: 12005; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12006; GFX940-NEXT: ;;#ASMSTART 12007; GFX940-NEXT: ; def v[0:3] 12008; GFX940-NEXT: ;;#ASMEND 12009; GFX940-NEXT: s_mov_b32 s2, 0x7060302 12010; GFX940-NEXT: v_mov_b32_e32 v4, 0 12011; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 12012; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 12013; GFX940-NEXT: s_waitcnt vmcnt(0) 12014; GFX940-NEXT: s_setpc_b64 s[30:31] 12015 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12016 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12017 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 13> 12018 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12019 ret void 12020} 12021 12022define void @v_shuffle_v2i16_v8i16__12_13(ptr addrspace(1) inreg %ptr) { 12023; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_13: 12024; GFX900: ; %bb.0: 12025; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12026; GFX900-NEXT: v_mov_b32_e32 v4, 0 12027; GFX900-NEXT: ;;#ASMSTART 12028; GFX900-NEXT: ; def v[0:3] 12029; GFX900-NEXT: ;;#ASMEND 12030; GFX900-NEXT: global_store_dword v4, v2, s[16:17] 12031; GFX900-NEXT: s_waitcnt vmcnt(0) 12032; GFX900-NEXT: s_setpc_b64 s[30:31] 12033; 12034; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_13: 12035; GFX90A: ; %bb.0: 12036; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12037; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12038; GFX90A-NEXT: ;;#ASMSTART 12039; GFX90A-NEXT: ; def v[0:3] 12040; GFX90A-NEXT: ;;#ASMEND 12041; GFX90A-NEXT: global_store_dword v4, v2, s[16:17] 12042; GFX90A-NEXT: s_waitcnt vmcnt(0) 12043; GFX90A-NEXT: s_setpc_b64 s[30:31] 12044; 12045; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_13: 12046; GFX940: ; %bb.0: 12047; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12048; GFX940-NEXT: v_mov_b32_e32 v4, 0 12049; GFX940-NEXT: ;;#ASMSTART 12050; GFX940-NEXT: ; def v[0:3] 12051; GFX940-NEXT: ;;#ASMEND 12052; GFX940-NEXT: global_store_dword v4, v2, s[0:1] sc0 sc1 12053; GFX940-NEXT: s_waitcnt vmcnt(0) 12054; GFX940-NEXT: s_setpc_b64 s[30:31] 12055 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12056 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12057 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 13> 12058 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12059 ret void 12060} 12061 12062define void @v_shuffle_v2i16_v8i16__13_13(ptr addrspace(1) inreg %ptr) { 12063; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_13: 12064; GFX900: ; %bb.0: 12065; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12066; GFX900-NEXT: ;;#ASMSTART 12067; GFX900-NEXT: ; def v[0:3] 12068; GFX900-NEXT: ;;#ASMEND 12069; GFX900-NEXT: s_mov_b32 s4, 0x7060302 12070; GFX900-NEXT: v_mov_b32_e32 v4, 0 12071; GFX900-NEXT: v_perm_b32 v0, v2, v2, s4 12072; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 12073; GFX900-NEXT: s_waitcnt vmcnt(0) 12074; GFX900-NEXT: s_setpc_b64 s[30:31] 12075; 12076; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_13: 12077; GFX90A: ; %bb.0: 12078; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12079; GFX90A-NEXT: ;;#ASMSTART 12080; GFX90A-NEXT: ; def v[0:3] 12081; GFX90A-NEXT: ;;#ASMEND 12082; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 12083; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12084; GFX90A-NEXT: v_perm_b32 v0, v2, v2, s4 12085; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 12086; GFX90A-NEXT: s_waitcnt vmcnt(0) 12087; GFX90A-NEXT: s_setpc_b64 s[30:31] 12088; 12089; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_13: 12090; GFX940: ; %bb.0: 12091; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12092; GFX940-NEXT: ;;#ASMSTART 12093; GFX940-NEXT: ; def v[0:3] 12094; GFX940-NEXT: ;;#ASMEND 12095; GFX940-NEXT: s_mov_b32 s2, 0x7060302 12096; GFX940-NEXT: v_mov_b32_e32 v4, 0 12097; GFX940-NEXT: v_perm_b32 v0, v2, v2, s2 12098; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 12099; GFX940-NEXT: s_waitcnt vmcnt(0) 12100; GFX940-NEXT: s_setpc_b64 s[30:31] 12101 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12102 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12103 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 13> 12104 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12105 ret void 12106} 12107 12108define void @v_shuffle_v2i16_v8i16__14_13(ptr addrspace(1) inreg %ptr) { 12109; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_13: 12110; GFX900: ; %bb.0: 12111; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12112; GFX900-NEXT: ;;#ASMSTART 12113; GFX900-NEXT: ; def v[0:3] 12114; GFX900-NEXT: ;;#ASMEND 12115; GFX900-NEXT: s_mov_b32 s4, 0xffff 12116; GFX900-NEXT: v_mov_b32_e32 v4, 0 12117; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v2 12118; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 12119; GFX900-NEXT: s_waitcnt vmcnt(0) 12120; GFX900-NEXT: s_setpc_b64 s[30:31] 12121; 12122; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_13: 12123; GFX90A: ; %bb.0: 12124; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12125; GFX90A-NEXT: ;;#ASMSTART 12126; GFX90A-NEXT: ; def v[0:3] 12127; GFX90A-NEXT: ;;#ASMEND 12128; GFX90A-NEXT: s_mov_b32 s4, 0xffff 12129; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12130; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v2 12131; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 12132; GFX90A-NEXT: s_waitcnt vmcnt(0) 12133; GFX90A-NEXT: s_setpc_b64 s[30:31] 12134; 12135; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_13: 12136; GFX940: ; %bb.0: 12137; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12138; GFX940-NEXT: ;;#ASMSTART 12139; GFX940-NEXT: ; def v[0:3] 12140; GFX940-NEXT: ;;#ASMEND 12141; GFX940-NEXT: s_mov_b32 s2, 0xffff 12142; GFX940-NEXT: v_mov_b32_e32 v4, 0 12143; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v2 12144; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 12145; GFX940-NEXT: s_waitcnt vmcnt(0) 12146; GFX940-NEXT: s_setpc_b64 s[30:31] 12147 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12148 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12149 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 13> 12150 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12151 ret void 12152} 12153 12154define void @v_shuffle_v2i16_v8i16__u_14(ptr addrspace(1) inreg %ptr) { 12155; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_14: 12156; GFX900: ; %bb.0: 12157; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12158; GFX900-NEXT: ;;#ASMSTART 12159; GFX900-NEXT: ; def v[0:3] 12160; GFX900-NEXT: ;;#ASMEND 12161; GFX900-NEXT: v_mov_b32_e32 v4, 0 12162; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v3 12163; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 12164; GFX900-NEXT: s_waitcnt vmcnt(0) 12165; GFX900-NEXT: s_setpc_b64 s[30:31] 12166; 12167; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_14: 12168; GFX90A: ; %bb.0: 12169; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12170; GFX90A-NEXT: ;;#ASMSTART 12171; GFX90A-NEXT: ; def v[0:3] 12172; GFX90A-NEXT: ;;#ASMEND 12173; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12174; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v3 12175; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 12176; GFX90A-NEXT: s_waitcnt vmcnt(0) 12177; GFX90A-NEXT: s_setpc_b64 s[30:31] 12178; 12179; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_14: 12180; GFX940: ; %bb.0: 12181; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12182; GFX940-NEXT: ;;#ASMSTART 12183; GFX940-NEXT: ; def v[0:3] 12184; GFX940-NEXT: ;;#ASMEND 12185; GFX940-NEXT: v_mov_b32_e32 v4, 0 12186; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v3 12187; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 12188; GFX940-NEXT: s_waitcnt vmcnt(0) 12189; GFX940-NEXT: s_setpc_b64 s[30:31] 12190 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12191 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12192 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 14> 12193 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12194 ret void 12195} 12196 12197define void @v_shuffle_v2i16_v8i16__0_14(ptr addrspace(1) inreg %ptr) { 12198; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_14: 12199; GFX900: ; %bb.0: 12200; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12201; GFX900-NEXT: ;;#ASMSTART 12202; GFX900-NEXT: ; def v[0:3] 12203; GFX900-NEXT: ;;#ASMEND 12204; GFX900-NEXT: s_mov_b32 s4, 0x5040100 12205; GFX900-NEXT: v_mov_b32_e32 v5, 0 12206; GFX900-NEXT: ;;#ASMSTART 12207; GFX900-NEXT: ; def v[1:4] 12208; GFX900-NEXT: ;;#ASMEND 12209; GFX900-NEXT: v_perm_b32 v0, v4, v0, s4 12210; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 12211; GFX900-NEXT: s_waitcnt vmcnt(0) 12212; GFX900-NEXT: s_setpc_b64 s[30:31] 12213; 12214; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_14: 12215; GFX90A: ; %bb.0: 12216; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12217; GFX90A-NEXT: ;;#ASMSTART 12218; GFX90A-NEXT: ; def v[0:3] 12219; GFX90A-NEXT: ;;#ASMEND 12220; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 12221; GFX90A-NEXT: v_mov_b32_e32 v6, 0 12222; GFX90A-NEXT: ;;#ASMSTART 12223; GFX90A-NEXT: ; def v[2:5] 12224; GFX90A-NEXT: ;;#ASMEND 12225; GFX90A-NEXT: v_perm_b32 v0, v5, v0, s4 12226; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 12227; GFX90A-NEXT: s_waitcnt vmcnt(0) 12228; GFX90A-NEXT: s_setpc_b64 s[30:31] 12229; 12230; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_14: 12231; GFX940: ; %bb.0: 12232; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12233; GFX940-NEXT: ;;#ASMSTART 12234; GFX940-NEXT: ; def v[0:3] 12235; GFX940-NEXT: ;;#ASMEND 12236; GFX940-NEXT: s_mov_b32 s2, 0x5040100 12237; GFX940-NEXT: v_mov_b32_e32 v6, 0 12238; GFX940-NEXT: ;;#ASMSTART 12239; GFX940-NEXT: ; def v[2:5] 12240; GFX940-NEXT: ;;#ASMEND 12241; GFX940-NEXT: s_nop 0 12242; GFX940-NEXT: v_perm_b32 v0, v5, v0, s2 12243; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 12244; GFX940-NEXT: s_waitcnt vmcnt(0) 12245; GFX940-NEXT: s_setpc_b64 s[30:31] 12246 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12247 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12248 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 14> 12249 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12250 ret void 12251} 12252 12253define void @v_shuffle_v2i16_v8i16__1_14(ptr addrspace(1) inreg %ptr) { 12254; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_14: 12255; GFX900: ; %bb.0: 12256; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12257; GFX900-NEXT: ;;#ASMSTART 12258; GFX900-NEXT: ; def v[0:3] 12259; GFX900-NEXT: ;;#ASMEND 12260; GFX900-NEXT: v_mov_b32_e32 v5, 0 12261; GFX900-NEXT: ;;#ASMSTART 12262; GFX900-NEXT: ; def v[1:4] 12263; GFX900-NEXT: ;;#ASMEND 12264; GFX900-NEXT: v_alignbit_b32 v0, v4, v0, 16 12265; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 12266; GFX900-NEXT: s_waitcnt vmcnt(0) 12267; GFX900-NEXT: s_setpc_b64 s[30:31] 12268; 12269; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_14: 12270; GFX90A: ; %bb.0: 12271; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12272; GFX90A-NEXT: ;;#ASMSTART 12273; GFX90A-NEXT: ; def v[0:3] 12274; GFX90A-NEXT: ;;#ASMEND 12275; GFX90A-NEXT: v_mov_b32_e32 v6, 0 12276; GFX90A-NEXT: ;;#ASMSTART 12277; GFX90A-NEXT: ; def v[2:5] 12278; GFX90A-NEXT: ;;#ASMEND 12279; GFX90A-NEXT: v_alignbit_b32 v0, v5, v0, 16 12280; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 12281; GFX90A-NEXT: s_waitcnt vmcnt(0) 12282; GFX90A-NEXT: s_setpc_b64 s[30:31] 12283; 12284; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_14: 12285; GFX940: ; %bb.0: 12286; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12287; GFX940-NEXT: ;;#ASMSTART 12288; GFX940-NEXT: ; def v[0:3] 12289; GFX940-NEXT: ;;#ASMEND 12290; GFX940-NEXT: v_mov_b32_e32 v6, 0 12291; GFX940-NEXT: ;;#ASMSTART 12292; GFX940-NEXT: ; def v[2:5] 12293; GFX940-NEXT: ;;#ASMEND 12294; GFX940-NEXT: s_nop 0 12295; GFX940-NEXT: v_alignbit_b32 v0, v5, v0, 16 12296; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 12297; GFX940-NEXT: s_waitcnt vmcnt(0) 12298; GFX940-NEXT: s_setpc_b64 s[30:31] 12299 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12300 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12301 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 14> 12302 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12303 ret void 12304} 12305 12306define void @v_shuffle_v2i16_v8i16__2_14(ptr addrspace(1) inreg %ptr) { 12307; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_14: 12308; GFX900: ; %bb.0: 12309; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12310; GFX900-NEXT: ;;#ASMSTART 12311; GFX900-NEXT: ; def v[0:3] 12312; GFX900-NEXT: ;;#ASMEND 12313; GFX900-NEXT: s_mov_b32 s4, 0x5040100 12314; GFX900-NEXT: v_mov_b32_e32 v6, 0 12315; GFX900-NEXT: ;;#ASMSTART 12316; GFX900-NEXT: ; def v[2:5] 12317; GFX900-NEXT: ;;#ASMEND 12318; GFX900-NEXT: v_perm_b32 v0, v5, v1, s4 12319; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 12320; GFX900-NEXT: s_waitcnt vmcnt(0) 12321; GFX900-NEXT: s_setpc_b64 s[30:31] 12322; 12323; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_14: 12324; GFX90A: ; %bb.0: 12325; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12326; GFX90A-NEXT: ;;#ASMSTART 12327; GFX90A-NEXT: ; def v[0:3] 12328; GFX90A-NEXT: ;;#ASMEND 12329; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 12330; GFX90A-NEXT: v_mov_b32_e32 v6, 0 12331; GFX90A-NEXT: ;;#ASMSTART 12332; GFX90A-NEXT: ; def v[2:5] 12333; GFX90A-NEXT: ;;#ASMEND 12334; GFX90A-NEXT: v_perm_b32 v0, v5, v1, s4 12335; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 12336; GFX90A-NEXT: s_waitcnt vmcnt(0) 12337; GFX90A-NEXT: s_setpc_b64 s[30:31] 12338; 12339; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_14: 12340; GFX940: ; %bb.0: 12341; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12342; GFX940-NEXT: ;;#ASMSTART 12343; GFX940-NEXT: ; def v[0:3] 12344; GFX940-NEXT: ;;#ASMEND 12345; GFX940-NEXT: s_mov_b32 s2, 0x5040100 12346; GFX940-NEXT: v_mov_b32_e32 v6, 0 12347; GFX940-NEXT: ;;#ASMSTART 12348; GFX940-NEXT: ; def v[2:5] 12349; GFX940-NEXT: ;;#ASMEND 12350; GFX940-NEXT: s_nop 0 12351; GFX940-NEXT: v_perm_b32 v0, v5, v1, s2 12352; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 12353; GFX940-NEXT: s_waitcnt vmcnt(0) 12354; GFX940-NEXT: s_setpc_b64 s[30:31] 12355 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12356 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12357 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 14> 12358 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12359 ret void 12360} 12361 12362define void @v_shuffle_v2i16_v8i16__3_14(ptr addrspace(1) inreg %ptr) { 12363; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_14: 12364; GFX900: ; %bb.0: 12365; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12366; GFX900-NEXT: ;;#ASMSTART 12367; GFX900-NEXT: ; def v[0:3] 12368; GFX900-NEXT: ;;#ASMEND 12369; GFX900-NEXT: v_mov_b32_e32 v6, 0 12370; GFX900-NEXT: ;;#ASMSTART 12371; GFX900-NEXT: ; def v[2:5] 12372; GFX900-NEXT: ;;#ASMEND 12373; GFX900-NEXT: v_alignbit_b32 v0, v5, v1, 16 12374; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 12375; GFX900-NEXT: s_waitcnt vmcnt(0) 12376; GFX900-NEXT: s_setpc_b64 s[30:31] 12377; 12378; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_14: 12379; GFX90A: ; %bb.0: 12380; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12381; GFX90A-NEXT: ;;#ASMSTART 12382; GFX90A-NEXT: ; def v[0:3] 12383; GFX90A-NEXT: ;;#ASMEND 12384; GFX90A-NEXT: v_mov_b32_e32 v6, 0 12385; GFX90A-NEXT: ;;#ASMSTART 12386; GFX90A-NEXT: ; def v[2:5] 12387; GFX90A-NEXT: ;;#ASMEND 12388; GFX90A-NEXT: v_alignbit_b32 v0, v5, v1, 16 12389; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 12390; GFX90A-NEXT: s_waitcnt vmcnt(0) 12391; GFX90A-NEXT: s_setpc_b64 s[30:31] 12392; 12393; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_14: 12394; GFX940: ; %bb.0: 12395; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12396; GFX940-NEXT: ;;#ASMSTART 12397; GFX940-NEXT: ; def v[0:3] 12398; GFX940-NEXT: ;;#ASMEND 12399; GFX940-NEXT: v_mov_b32_e32 v6, 0 12400; GFX940-NEXT: ;;#ASMSTART 12401; GFX940-NEXT: ; def v[2:5] 12402; GFX940-NEXT: ;;#ASMEND 12403; GFX940-NEXT: s_nop 0 12404; GFX940-NEXT: v_alignbit_b32 v0, v5, v1, 16 12405; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 12406; GFX940-NEXT: s_waitcnt vmcnt(0) 12407; GFX940-NEXT: s_setpc_b64 s[30:31] 12408 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12409 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12410 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 14> 12411 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12412 ret void 12413} 12414 12415define void @v_shuffle_v2i16_v8i16__4_14(ptr addrspace(1) inreg %ptr) { 12416; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_14: 12417; GFX900: ; %bb.0: 12418; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12419; GFX900-NEXT: ;;#ASMSTART 12420; GFX900-NEXT: ; def v[0:3] 12421; GFX900-NEXT: ;;#ASMEND 12422; GFX900-NEXT: s_mov_b32 s4, 0x5040100 12423; GFX900-NEXT: v_mov_b32_e32 v7, 0 12424; GFX900-NEXT: ;;#ASMSTART 12425; GFX900-NEXT: ; def v[3:6] 12426; GFX900-NEXT: ;;#ASMEND 12427; GFX900-NEXT: v_perm_b32 v0, v6, v2, s4 12428; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 12429; GFX900-NEXT: s_waitcnt vmcnt(0) 12430; GFX900-NEXT: s_setpc_b64 s[30:31] 12431; 12432; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_14: 12433; GFX90A: ; %bb.0: 12434; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12435; GFX90A-NEXT: ;;#ASMSTART 12436; GFX90A-NEXT: ; def v[0:3] 12437; GFX90A-NEXT: ;;#ASMEND 12438; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 12439; GFX90A-NEXT: v_mov_b32_e32 v8, 0 12440; GFX90A-NEXT: ;;#ASMSTART 12441; GFX90A-NEXT: ; def v[4:7] 12442; GFX90A-NEXT: ;;#ASMEND 12443; GFX90A-NEXT: v_perm_b32 v0, v7, v2, s4 12444; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 12445; GFX90A-NEXT: s_waitcnt vmcnt(0) 12446; GFX90A-NEXT: s_setpc_b64 s[30:31] 12447; 12448; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_14: 12449; GFX940: ; %bb.0: 12450; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12451; GFX940-NEXT: ;;#ASMSTART 12452; GFX940-NEXT: ; def v[0:3] 12453; GFX940-NEXT: ;;#ASMEND 12454; GFX940-NEXT: s_mov_b32 s2, 0x5040100 12455; GFX940-NEXT: v_mov_b32_e32 v8, 0 12456; GFX940-NEXT: ;;#ASMSTART 12457; GFX940-NEXT: ; def v[4:7] 12458; GFX940-NEXT: ;;#ASMEND 12459; GFX940-NEXT: s_nop 0 12460; GFX940-NEXT: v_perm_b32 v0, v7, v2, s2 12461; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 12462; GFX940-NEXT: s_waitcnt vmcnt(0) 12463; GFX940-NEXT: s_setpc_b64 s[30:31] 12464 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12465 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12466 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 14> 12467 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12468 ret void 12469} 12470 12471define void @v_shuffle_v2i16_v8i16__5_14(ptr addrspace(1) inreg %ptr) { 12472; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_14: 12473; GFX900: ; %bb.0: 12474; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12475; GFX900-NEXT: ;;#ASMSTART 12476; GFX900-NEXT: ; def v[0:3] 12477; GFX900-NEXT: ;;#ASMEND 12478; GFX900-NEXT: v_mov_b32_e32 v7, 0 12479; GFX900-NEXT: ;;#ASMSTART 12480; GFX900-NEXT: ; def v[3:6] 12481; GFX900-NEXT: ;;#ASMEND 12482; GFX900-NEXT: v_alignbit_b32 v0, v6, v2, 16 12483; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 12484; GFX900-NEXT: s_waitcnt vmcnt(0) 12485; GFX900-NEXT: s_setpc_b64 s[30:31] 12486; 12487; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_14: 12488; GFX90A: ; %bb.0: 12489; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12490; GFX90A-NEXT: ;;#ASMSTART 12491; GFX90A-NEXT: ; def v[0:3] 12492; GFX90A-NEXT: ;;#ASMEND 12493; GFX90A-NEXT: v_mov_b32_e32 v8, 0 12494; GFX90A-NEXT: ;;#ASMSTART 12495; GFX90A-NEXT: ; def v[4:7] 12496; GFX90A-NEXT: ;;#ASMEND 12497; GFX90A-NEXT: v_alignbit_b32 v0, v7, v2, 16 12498; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 12499; GFX90A-NEXT: s_waitcnt vmcnt(0) 12500; GFX90A-NEXT: s_setpc_b64 s[30:31] 12501; 12502; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_14: 12503; GFX940: ; %bb.0: 12504; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12505; GFX940-NEXT: ;;#ASMSTART 12506; GFX940-NEXT: ; def v[0:3] 12507; GFX940-NEXT: ;;#ASMEND 12508; GFX940-NEXT: v_mov_b32_e32 v8, 0 12509; GFX940-NEXT: ;;#ASMSTART 12510; GFX940-NEXT: ; def v[4:7] 12511; GFX940-NEXT: ;;#ASMEND 12512; GFX940-NEXT: s_nop 0 12513; GFX940-NEXT: v_alignbit_b32 v0, v7, v2, 16 12514; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 12515; GFX940-NEXT: s_waitcnt vmcnt(0) 12516; GFX940-NEXT: s_setpc_b64 s[30:31] 12517 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12518 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12519 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 14> 12520 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12521 ret void 12522} 12523 12524define void @v_shuffle_v2i16_v8i16__6_14(ptr addrspace(1) inreg %ptr) { 12525; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_14: 12526; GFX900: ; %bb.0: 12527; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12528; GFX900-NEXT: ;;#ASMSTART 12529; GFX900-NEXT: ; def v[0:3] 12530; GFX900-NEXT: ;;#ASMEND 12531; GFX900-NEXT: s_mov_b32 s4, 0x5040100 12532; GFX900-NEXT: v_mov_b32_e32 v8, 0 12533; GFX900-NEXT: ;;#ASMSTART 12534; GFX900-NEXT: ; def v[4:7] 12535; GFX900-NEXT: ;;#ASMEND 12536; GFX900-NEXT: v_perm_b32 v0, v7, v3, s4 12537; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 12538; GFX900-NEXT: s_waitcnt vmcnt(0) 12539; GFX900-NEXT: s_setpc_b64 s[30:31] 12540; 12541; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_14: 12542; GFX90A: ; %bb.0: 12543; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12544; GFX90A-NEXT: ;;#ASMSTART 12545; GFX90A-NEXT: ; def v[0:3] 12546; GFX90A-NEXT: ;;#ASMEND 12547; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 12548; GFX90A-NEXT: v_mov_b32_e32 v8, 0 12549; GFX90A-NEXT: ;;#ASMSTART 12550; GFX90A-NEXT: ; def v[4:7] 12551; GFX90A-NEXT: ;;#ASMEND 12552; GFX90A-NEXT: v_perm_b32 v0, v7, v3, s4 12553; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 12554; GFX90A-NEXT: s_waitcnt vmcnt(0) 12555; GFX90A-NEXT: s_setpc_b64 s[30:31] 12556; 12557; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_14: 12558; GFX940: ; %bb.0: 12559; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12560; GFX940-NEXT: ;;#ASMSTART 12561; GFX940-NEXT: ; def v[0:3] 12562; GFX940-NEXT: ;;#ASMEND 12563; GFX940-NEXT: s_mov_b32 s2, 0x5040100 12564; GFX940-NEXT: v_mov_b32_e32 v8, 0 12565; GFX940-NEXT: ;;#ASMSTART 12566; GFX940-NEXT: ; def v[4:7] 12567; GFX940-NEXT: ;;#ASMEND 12568; GFX940-NEXT: s_nop 0 12569; GFX940-NEXT: v_perm_b32 v0, v7, v3, s2 12570; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 12571; GFX940-NEXT: s_waitcnt vmcnt(0) 12572; GFX940-NEXT: s_setpc_b64 s[30:31] 12573 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12574 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12575 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 14> 12576 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12577 ret void 12578} 12579 12580define void @v_shuffle_v2i16_v8i16__7_14(ptr addrspace(1) inreg %ptr) { 12581; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_14: 12582; GFX900: ; %bb.0: 12583; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12584; GFX900-NEXT: ;;#ASMSTART 12585; GFX900-NEXT: ; def v[0:3] 12586; GFX900-NEXT: ;;#ASMEND 12587; GFX900-NEXT: v_mov_b32_e32 v8, 0 12588; GFX900-NEXT: ;;#ASMSTART 12589; GFX900-NEXT: ; def v[4:7] 12590; GFX900-NEXT: ;;#ASMEND 12591; GFX900-NEXT: v_alignbit_b32 v0, v7, v3, 16 12592; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 12593; GFX900-NEXT: s_waitcnt vmcnt(0) 12594; GFX900-NEXT: s_setpc_b64 s[30:31] 12595; 12596; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_14: 12597; GFX90A: ; %bb.0: 12598; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12599; GFX90A-NEXT: ;;#ASMSTART 12600; GFX90A-NEXT: ; def v[0:3] 12601; GFX90A-NEXT: ;;#ASMEND 12602; GFX90A-NEXT: v_mov_b32_e32 v8, 0 12603; GFX90A-NEXT: ;;#ASMSTART 12604; GFX90A-NEXT: ; def v[4:7] 12605; GFX90A-NEXT: ;;#ASMEND 12606; GFX90A-NEXT: v_alignbit_b32 v0, v7, v3, 16 12607; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 12608; GFX90A-NEXT: s_waitcnt vmcnt(0) 12609; GFX90A-NEXT: s_setpc_b64 s[30:31] 12610; 12611; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_14: 12612; GFX940: ; %bb.0: 12613; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12614; GFX940-NEXT: ;;#ASMSTART 12615; GFX940-NEXT: ; def v[0:3] 12616; GFX940-NEXT: ;;#ASMEND 12617; GFX940-NEXT: v_mov_b32_e32 v8, 0 12618; GFX940-NEXT: ;;#ASMSTART 12619; GFX940-NEXT: ; def v[4:7] 12620; GFX940-NEXT: ;;#ASMEND 12621; GFX940-NEXT: s_nop 0 12622; GFX940-NEXT: v_alignbit_b32 v0, v7, v3, 16 12623; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 12624; GFX940-NEXT: s_waitcnt vmcnt(0) 12625; GFX940-NEXT: s_setpc_b64 s[30:31] 12626 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12627 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12628 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 14> 12629 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12630 ret void 12631} 12632 12633define void @v_shuffle_v2i16_v8i16__8_14(ptr addrspace(1) inreg %ptr) { 12634; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_14: 12635; GFX900: ; %bb.0: 12636; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12637; GFX900-NEXT: ;;#ASMSTART 12638; GFX900-NEXT: ; def v[0:3] 12639; GFX900-NEXT: ;;#ASMEND 12640; GFX900-NEXT: s_mov_b32 s4, 0x5040100 12641; GFX900-NEXT: v_mov_b32_e32 v4, 0 12642; GFX900-NEXT: v_perm_b32 v0, v3, v0, s4 12643; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 12644; GFX900-NEXT: s_waitcnt vmcnt(0) 12645; GFX900-NEXT: s_setpc_b64 s[30:31] 12646; 12647; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_14: 12648; GFX90A: ; %bb.0: 12649; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12650; GFX90A-NEXT: ;;#ASMSTART 12651; GFX90A-NEXT: ; def v[0:3] 12652; GFX90A-NEXT: ;;#ASMEND 12653; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 12654; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12655; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 12656; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 12657; GFX90A-NEXT: s_waitcnt vmcnt(0) 12658; GFX90A-NEXT: s_setpc_b64 s[30:31] 12659; 12660; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_14: 12661; GFX940: ; %bb.0: 12662; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12663; GFX940-NEXT: ;;#ASMSTART 12664; GFX940-NEXT: ; def v[0:3] 12665; GFX940-NEXT: ;;#ASMEND 12666; GFX940-NEXT: s_mov_b32 s2, 0x5040100 12667; GFX940-NEXT: v_mov_b32_e32 v4, 0 12668; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 12669; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 12670; GFX940-NEXT: s_waitcnt vmcnt(0) 12671; GFX940-NEXT: s_setpc_b64 s[30:31] 12672 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12673 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12674 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 14> 12675 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12676 ret void 12677} 12678 12679define void @v_shuffle_v2i16_v8i16__9_14(ptr addrspace(1) inreg %ptr) { 12680; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_14: 12681; GFX900: ; %bb.0: 12682; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12683; GFX900-NEXT: ;;#ASMSTART 12684; GFX900-NEXT: ; def v[0:3] 12685; GFX900-NEXT: ;;#ASMEND 12686; GFX900-NEXT: v_mov_b32_e32 v4, 0 12687; GFX900-NEXT: v_alignbit_b32 v0, v3, v0, 16 12688; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 12689; GFX900-NEXT: s_waitcnt vmcnt(0) 12690; GFX900-NEXT: s_setpc_b64 s[30:31] 12691; 12692; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_14: 12693; GFX90A: ; %bb.0: 12694; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12695; GFX90A-NEXT: ;;#ASMSTART 12696; GFX90A-NEXT: ; def v[0:3] 12697; GFX90A-NEXT: ;;#ASMEND 12698; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12699; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16 12700; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 12701; GFX90A-NEXT: s_waitcnt vmcnt(0) 12702; GFX90A-NEXT: s_setpc_b64 s[30:31] 12703; 12704; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_14: 12705; GFX940: ; %bb.0: 12706; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12707; GFX940-NEXT: ;;#ASMSTART 12708; GFX940-NEXT: ; def v[0:3] 12709; GFX940-NEXT: ;;#ASMEND 12710; GFX940-NEXT: v_mov_b32_e32 v4, 0 12711; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 12712; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 12713; GFX940-NEXT: s_waitcnt vmcnt(0) 12714; GFX940-NEXT: s_setpc_b64 s[30:31] 12715 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12716 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12717 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 14> 12718 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12719 ret void 12720} 12721 12722define void @v_shuffle_v2i16_v8i16__10_14(ptr addrspace(1) inreg %ptr) { 12723; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_14: 12724; GFX900: ; %bb.0: 12725; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12726; GFX900-NEXT: ;;#ASMSTART 12727; GFX900-NEXT: ; def v[0:3] 12728; GFX900-NEXT: ;;#ASMEND 12729; GFX900-NEXT: s_mov_b32 s4, 0x5040100 12730; GFX900-NEXT: v_mov_b32_e32 v4, 0 12731; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 12732; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 12733; GFX900-NEXT: s_waitcnt vmcnt(0) 12734; GFX900-NEXT: s_setpc_b64 s[30:31] 12735; 12736; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_14: 12737; GFX90A: ; %bb.0: 12738; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12739; GFX90A-NEXT: ;;#ASMSTART 12740; GFX90A-NEXT: ; def v[0:3] 12741; GFX90A-NEXT: ;;#ASMEND 12742; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 12743; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12744; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 12745; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 12746; GFX90A-NEXT: s_waitcnt vmcnt(0) 12747; GFX90A-NEXT: s_setpc_b64 s[30:31] 12748; 12749; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_14: 12750; GFX940: ; %bb.0: 12751; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12752; GFX940-NEXT: ;;#ASMSTART 12753; GFX940-NEXT: ; def v[0:3] 12754; GFX940-NEXT: ;;#ASMEND 12755; GFX940-NEXT: s_mov_b32 s2, 0x5040100 12756; GFX940-NEXT: v_mov_b32_e32 v4, 0 12757; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 12758; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 12759; GFX940-NEXT: s_waitcnt vmcnt(0) 12760; GFX940-NEXT: s_setpc_b64 s[30:31] 12761 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12762 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12763 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 14> 12764 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12765 ret void 12766} 12767 12768define void @v_shuffle_v2i16_v8i16__11_14(ptr addrspace(1) inreg %ptr) { 12769; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_14: 12770; GFX900: ; %bb.0: 12771; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12772; GFX900-NEXT: ;;#ASMSTART 12773; GFX900-NEXT: ; def v[0:3] 12774; GFX900-NEXT: ;;#ASMEND 12775; GFX900-NEXT: v_mov_b32_e32 v4, 0 12776; GFX900-NEXT: v_alignbit_b32 v0, v3, v1, 16 12777; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 12778; GFX900-NEXT: s_waitcnt vmcnt(0) 12779; GFX900-NEXT: s_setpc_b64 s[30:31] 12780; 12781; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_14: 12782; GFX90A: ; %bb.0: 12783; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12784; GFX90A-NEXT: ;;#ASMSTART 12785; GFX90A-NEXT: ; def v[0:3] 12786; GFX90A-NEXT: ;;#ASMEND 12787; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12788; GFX90A-NEXT: v_alignbit_b32 v0, v3, v1, 16 12789; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 12790; GFX90A-NEXT: s_waitcnt vmcnt(0) 12791; GFX90A-NEXT: s_setpc_b64 s[30:31] 12792; 12793; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_14: 12794; GFX940: ; %bb.0: 12795; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12796; GFX940-NEXT: ;;#ASMSTART 12797; GFX940-NEXT: ; def v[0:3] 12798; GFX940-NEXT: ;;#ASMEND 12799; GFX940-NEXT: v_mov_b32_e32 v4, 0 12800; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 12801; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 12802; GFX940-NEXT: s_waitcnt vmcnt(0) 12803; GFX940-NEXT: s_setpc_b64 s[30:31] 12804 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12805 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12806 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 14> 12807 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12808 ret void 12809} 12810 12811define void @v_shuffle_v2i16_v8i16__12_14(ptr addrspace(1) inreg %ptr) { 12812; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_14: 12813; GFX900: ; %bb.0: 12814; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12815; GFX900-NEXT: ;;#ASMSTART 12816; GFX900-NEXT: ; def v[0:3] 12817; GFX900-NEXT: ;;#ASMEND 12818; GFX900-NEXT: s_mov_b32 s4, 0x5040100 12819; GFX900-NEXT: v_mov_b32_e32 v4, 0 12820; GFX900-NEXT: v_perm_b32 v0, v3, v2, s4 12821; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 12822; GFX900-NEXT: s_waitcnt vmcnt(0) 12823; GFX900-NEXT: s_setpc_b64 s[30:31] 12824; 12825; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_14: 12826; GFX90A: ; %bb.0: 12827; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12828; GFX90A-NEXT: ;;#ASMSTART 12829; GFX90A-NEXT: ; def v[0:3] 12830; GFX90A-NEXT: ;;#ASMEND 12831; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 12832; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12833; GFX90A-NEXT: v_perm_b32 v0, v3, v2, s4 12834; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 12835; GFX90A-NEXT: s_waitcnt vmcnt(0) 12836; GFX90A-NEXT: s_setpc_b64 s[30:31] 12837; 12838; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_14: 12839; GFX940: ; %bb.0: 12840; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12841; GFX940-NEXT: ;;#ASMSTART 12842; GFX940-NEXT: ; def v[0:3] 12843; GFX940-NEXT: ;;#ASMEND 12844; GFX940-NEXT: s_mov_b32 s2, 0x5040100 12845; GFX940-NEXT: v_mov_b32_e32 v4, 0 12846; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 12847; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 12848; GFX940-NEXT: s_waitcnt vmcnt(0) 12849; GFX940-NEXT: s_setpc_b64 s[30:31] 12850 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12851 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12852 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 14> 12853 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12854 ret void 12855} 12856 12857define void @v_shuffle_v2i16_v8i16__13_14(ptr addrspace(1) inreg %ptr) { 12858; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_14: 12859; GFX900: ; %bb.0: 12860; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12861; GFX900-NEXT: ;;#ASMSTART 12862; GFX900-NEXT: ; def v[0:3] 12863; GFX900-NEXT: ;;#ASMEND 12864; GFX900-NEXT: v_mov_b32_e32 v4, 0 12865; GFX900-NEXT: v_alignbit_b32 v0, v3, v2, 16 12866; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 12867; GFX900-NEXT: s_waitcnt vmcnt(0) 12868; GFX900-NEXT: s_setpc_b64 s[30:31] 12869; 12870; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_14: 12871; GFX90A: ; %bb.0: 12872; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12873; GFX90A-NEXT: ;;#ASMSTART 12874; GFX90A-NEXT: ; def v[0:3] 12875; GFX90A-NEXT: ;;#ASMEND 12876; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12877; GFX90A-NEXT: v_alignbit_b32 v0, v3, v2, 16 12878; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 12879; GFX90A-NEXT: s_waitcnt vmcnt(0) 12880; GFX90A-NEXT: s_setpc_b64 s[30:31] 12881; 12882; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_14: 12883; GFX940: ; %bb.0: 12884; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12885; GFX940-NEXT: ;;#ASMSTART 12886; GFX940-NEXT: ; def v[0:3] 12887; GFX940-NEXT: ;;#ASMEND 12888; GFX940-NEXT: v_mov_b32_e32 v4, 0 12889; GFX940-NEXT: v_alignbit_b32 v0, v3, v2, 16 12890; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 12891; GFX940-NEXT: s_waitcnt vmcnt(0) 12892; GFX940-NEXT: s_setpc_b64 s[30:31] 12893 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12894 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12895 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 14> 12896 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12897 ret void 12898} 12899 12900define void @v_shuffle_v2i16_v8i16__14_14(ptr addrspace(1) inreg %ptr) { 12901; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_14: 12902; GFX900: ; %bb.0: 12903; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12904; GFX900-NEXT: ;;#ASMSTART 12905; GFX900-NEXT: ; def v[0:3] 12906; GFX900-NEXT: ;;#ASMEND 12907; GFX900-NEXT: s_mov_b32 s4, 0x5040100 12908; GFX900-NEXT: v_mov_b32_e32 v4, 0 12909; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 12910; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 12911; GFX900-NEXT: s_waitcnt vmcnt(0) 12912; GFX900-NEXT: s_setpc_b64 s[30:31] 12913; 12914; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_14: 12915; GFX90A: ; %bb.0: 12916; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12917; GFX90A-NEXT: ;;#ASMSTART 12918; GFX90A-NEXT: ; def v[0:3] 12919; GFX90A-NEXT: ;;#ASMEND 12920; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 12921; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12922; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 12923; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 12924; GFX90A-NEXT: s_waitcnt vmcnt(0) 12925; GFX90A-NEXT: s_setpc_b64 s[30:31] 12926; 12927; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_14: 12928; GFX940: ; %bb.0: 12929; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12930; GFX940-NEXT: ;;#ASMSTART 12931; GFX940-NEXT: ; def v[0:3] 12932; GFX940-NEXT: ;;#ASMEND 12933; GFX940-NEXT: s_mov_b32 s2, 0x5040100 12934; GFX940-NEXT: v_mov_b32_e32 v4, 0 12935; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 12936; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 12937; GFX940-NEXT: s_waitcnt vmcnt(0) 12938; GFX940-NEXT: s_setpc_b64 s[30:31] 12939 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12940 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12941 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 14> 12942 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12943 ret void 12944} 12945 12946define void @v_shuffle_v2i16_v8i16__u_15(ptr addrspace(1) inreg %ptr) { 12947; GFX900-LABEL: v_shuffle_v2i16_v8i16__u_15: 12948; GFX900: ; %bb.0: 12949; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12950; GFX900-NEXT: v_mov_b32_e32 v4, 0 12951; GFX900-NEXT: ;;#ASMSTART 12952; GFX900-NEXT: ; def v[0:3] 12953; GFX900-NEXT: ;;#ASMEND 12954; GFX900-NEXT: global_store_dword v4, v3, s[16:17] 12955; GFX900-NEXT: s_waitcnt vmcnt(0) 12956; GFX900-NEXT: s_setpc_b64 s[30:31] 12957; 12958; GFX90A-LABEL: v_shuffle_v2i16_v8i16__u_15: 12959; GFX90A: ; %bb.0: 12960; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12961; GFX90A-NEXT: v_mov_b32_e32 v4, 0 12962; GFX90A-NEXT: ;;#ASMSTART 12963; GFX90A-NEXT: ; def v[0:3] 12964; GFX90A-NEXT: ;;#ASMEND 12965; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] 12966; GFX90A-NEXT: s_waitcnt vmcnt(0) 12967; GFX90A-NEXT: s_setpc_b64 s[30:31] 12968; 12969; GFX940-LABEL: v_shuffle_v2i16_v8i16__u_15: 12970; GFX940: ; %bb.0: 12971; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12972; GFX940-NEXT: v_mov_b32_e32 v4, 0 12973; GFX940-NEXT: ;;#ASMSTART 12974; GFX940-NEXT: ; def v[0:3] 12975; GFX940-NEXT: ;;#ASMEND 12976; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 12977; GFX940-NEXT: s_waitcnt vmcnt(0) 12978; GFX940-NEXT: s_setpc_b64 s[30:31] 12979 %vec0 = call <8 x i16> asm "; def $0", "=v"() 12980 %vec1 = call <8 x i16> asm "; def $0", "=v"() 12981 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 15> 12982 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 12983 ret void 12984} 12985 12986define void @v_shuffle_v2i16_v8i16__0_15(ptr addrspace(1) inreg %ptr) { 12987; GFX900-LABEL: v_shuffle_v2i16_v8i16__0_15: 12988; GFX900: ; %bb.0: 12989; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12990; GFX900-NEXT: ;;#ASMSTART 12991; GFX900-NEXT: ; def v[0:3] 12992; GFX900-NEXT: ;;#ASMEND 12993; GFX900-NEXT: s_mov_b32 s4, 0xffff 12994; GFX900-NEXT: v_mov_b32_e32 v5, 0 12995; GFX900-NEXT: ;;#ASMSTART 12996; GFX900-NEXT: ; def v[1:4] 12997; GFX900-NEXT: ;;#ASMEND 12998; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v4 12999; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 13000; GFX900-NEXT: s_waitcnt vmcnt(0) 13001; GFX900-NEXT: s_setpc_b64 s[30:31] 13002; 13003; GFX90A-LABEL: v_shuffle_v2i16_v8i16__0_15: 13004; GFX90A: ; %bb.0: 13005; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13006; GFX90A-NEXT: ;;#ASMSTART 13007; GFX90A-NEXT: ; def v[0:3] 13008; GFX90A-NEXT: ;;#ASMEND 13009; GFX90A-NEXT: s_mov_b32 s4, 0xffff 13010; GFX90A-NEXT: v_mov_b32_e32 v6, 0 13011; GFX90A-NEXT: ;;#ASMSTART 13012; GFX90A-NEXT: ; def v[2:5] 13013; GFX90A-NEXT: ;;#ASMEND 13014; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v5 13015; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 13016; GFX90A-NEXT: s_waitcnt vmcnt(0) 13017; GFX90A-NEXT: s_setpc_b64 s[30:31] 13018; 13019; GFX940-LABEL: v_shuffle_v2i16_v8i16__0_15: 13020; GFX940: ; %bb.0: 13021; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13022; GFX940-NEXT: ;;#ASMSTART 13023; GFX940-NEXT: ; def v[0:3] 13024; GFX940-NEXT: ;;#ASMEND 13025; GFX940-NEXT: s_mov_b32 s2, 0xffff 13026; GFX940-NEXT: v_mov_b32_e32 v6, 0 13027; GFX940-NEXT: ;;#ASMSTART 13028; GFX940-NEXT: ; def v[2:5] 13029; GFX940-NEXT: ;;#ASMEND 13030; GFX940-NEXT: s_nop 0 13031; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v5 13032; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 13033; GFX940-NEXT: s_waitcnt vmcnt(0) 13034; GFX940-NEXT: s_setpc_b64 s[30:31] 13035 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13036 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13037 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 15> 13038 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13039 ret void 13040} 13041 13042define void @v_shuffle_v2i16_v8i16__1_15(ptr addrspace(1) inreg %ptr) { 13043; GFX900-LABEL: v_shuffle_v2i16_v8i16__1_15: 13044; GFX900: ; %bb.0: 13045; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13046; GFX900-NEXT: ;;#ASMSTART 13047; GFX900-NEXT: ; def v[0:3] 13048; GFX900-NEXT: ;;#ASMEND 13049; GFX900-NEXT: s_mov_b32 s4, 0x7060302 13050; GFX900-NEXT: v_mov_b32_e32 v5, 0 13051; GFX900-NEXT: ;;#ASMSTART 13052; GFX900-NEXT: ; def v[1:4] 13053; GFX900-NEXT: ;;#ASMEND 13054; GFX900-NEXT: v_perm_b32 v0, v4, v0, s4 13055; GFX900-NEXT: global_store_dword v5, v0, s[16:17] 13056; GFX900-NEXT: s_waitcnt vmcnt(0) 13057; GFX900-NEXT: s_setpc_b64 s[30:31] 13058; 13059; GFX90A-LABEL: v_shuffle_v2i16_v8i16__1_15: 13060; GFX90A: ; %bb.0: 13061; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13062; GFX90A-NEXT: ;;#ASMSTART 13063; GFX90A-NEXT: ; def v[0:3] 13064; GFX90A-NEXT: ;;#ASMEND 13065; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 13066; GFX90A-NEXT: v_mov_b32_e32 v6, 0 13067; GFX90A-NEXT: ;;#ASMSTART 13068; GFX90A-NEXT: ; def v[2:5] 13069; GFX90A-NEXT: ;;#ASMEND 13070; GFX90A-NEXT: v_perm_b32 v0, v5, v0, s4 13071; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 13072; GFX90A-NEXT: s_waitcnt vmcnt(0) 13073; GFX90A-NEXT: s_setpc_b64 s[30:31] 13074; 13075; GFX940-LABEL: v_shuffle_v2i16_v8i16__1_15: 13076; GFX940: ; %bb.0: 13077; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13078; GFX940-NEXT: ;;#ASMSTART 13079; GFX940-NEXT: ; def v[0:3] 13080; GFX940-NEXT: ;;#ASMEND 13081; GFX940-NEXT: s_mov_b32 s2, 0x7060302 13082; GFX940-NEXT: v_mov_b32_e32 v6, 0 13083; GFX940-NEXT: ;;#ASMSTART 13084; GFX940-NEXT: ; def v[2:5] 13085; GFX940-NEXT: ;;#ASMEND 13086; GFX940-NEXT: s_nop 0 13087; GFX940-NEXT: v_perm_b32 v0, v5, v0, s2 13088; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 13089; GFX940-NEXT: s_waitcnt vmcnt(0) 13090; GFX940-NEXT: s_setpc_b64 s[30:31] 13091 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13092 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13093 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 15> 13094 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13095 ret void 13096} 13097 13098define void @v_shuffle_v2i16_v8i16__2_15(ptr addrspace(1) inreg %ptr) { 13099; GFX900-LABEL: v_shuffle_v2i16_v8i16__2_15: 13100; GFX900: ; %bb.0: 13101; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13102; GFX900-NEXT: ;;#ASMSTART 13103; GFX900-NEXT: ; def v[0:3] 13104; GFX900-NEXT: ;;#ASMEND 13105; GFX900-NEXT: s_mov_b32 s4, 0xffff 13106; GFX900-NEXT: v_mov_b32_e32 v6, 0 13107; GFX900-NEXT: ;;#ASMSTART 13108; GFX900-NEXT: ; def v[2:5] 13109; GFX900-NEXT: ;;#ASMEND 13110; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v5 13111; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 13112; GFX900-NEXT: s_waitcnt vmcnt(0) 13113; GFX900-NEXT: s_setpc_b64 s[30:31] 13114; 13115; GFX90A-LABEL: v_shuffle_v2i16_v8i16__2_15: 13116; GFX90A: ; %bb.0: 13117; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13118; GFX90A-NEXT: ;;#ASMSTART 13119; GFX90A-NEXT: ; def v[0:3] 13120; GFX90A-NEXT: ;;#ASMEND 13121; GFX90A-NEXT: s_mov_b32 s4, 0xffff 13122; GFX90A-NEXT: v_mov_b32_e32 v6, 0 13123; GFX90A-NEXT: ;;#ASMSTART 13124; GFX90A-NEXT: ; def v[2:5] 13125; GFX90A-NEXT: ;;#ASMEND 13126; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v5 13127; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 13128; GFX90A-NEXT: s_waitcnt vmcnt(0) 13129; GFX90A-NEXT: s_setpc_b64 s[30:31] 13130; 13131; GFX940-LABEL: v_shuffle_v2i16_v8i16__2_15: 13132; GFX940: ; %bb.0: 13133; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13134; GFX940-NEXT: ;;#ASMSTART 13135; GFX940-NEXT: ; def v[0:3] 13136; GFX940-NEXT: ;;#ASMEND 13137; GFX940-NEXT: s_mov_b32 s2, 0xffff 13138; GFX940-NEXT: v_mov_b32_e32 v6, 0 13139; GFX940-NEXT: ;;#ASMSTART 13140; GFX940-NEXT: ; def v[2:5] 13141; GFX940-NEXT: ;;#ASMEND 13142; GFX940-NEXT: s_nop 0 13143; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v5 13144; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 13145; GFX940-NEXT: s_waitcnt vmcnt(0) 13146; GFX940-NEXT: s_setpc_b64 s[30:31] 13147 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13148 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13149 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 15> 13150 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13151 ret void 13152} 13153 13154define void @v_shuffle_v2i16_v8i16__3_15(ptr addrspace(1) inreg %ptr) { 13155; GFX900-LABEL: v_shuffle_v2i16_v8i16__3_15: 13156; GFX900: ; %bb.0: 13157; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13158; GFX900-NEXT: ;;#ASMSTART 13159; GFX900-NEXT: ; def v[0:3] 13160; GFX900-NEXT: ;;#ASMEND 13161; GFX900-NEXT: s_mov_b32 s4, 0x7060302 13162; GFX900-NEXT: v_mov_b32_e32 v6, 0 13163; GFX900-NEXT: ;;#ASMSTART 13164; GFX900-NEXT: ; def v[2:5] 13165; GFX900-NEXT: ;;#ASMEND 13166; GFX900-NEXT: v_perm_b32 v0, v5, v1, s4 13167; GFX900-NEXT: global_store_dword v6, v0, s[16:17] 13168; GFX900-NEXT: s_waitcnt vmcnt(0) 13169; GFX900-NEXT: s_setpc_b64 s[30:31] 13170; 13171; GFX90A-LABEL: v_shuffle_v2i16_v8i16__3_15: 13172; GFX90A: ; %bb.0: 13173; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13174; GFX90A-NEXT: ;;#ASMSTART 13175; GFX90A-NEXT: ; def v[0:3] 13176; GFX90A-NEXT: ;;#ASMEND 13177; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 13178; GFX90A-NEXT: v_mov_b32_e32 v6, 0 13179; GFX90A-NEXT: ;;#ASMSTART 13180; GFX90A-NEXT: ; def v[2:5] 13181; GFX90A-NEXT: ;;#ASMEND 13182; GFX90A-NEXT: v_perm_b32 v0, v5, v1, s4 13183; GFX90A-NEXT: global_store_dword v6, v0, s[16:17] 13184; GFX90A-NEXT: s_waitcnt vmcnt(0) 13185; GFX90A-NEXT: s_setpc_b64 s[30:31] 13186; 13187; GFX940-LABEL: v_shuffle_v2i16_v8i16__3_15: 13188; GFX940: ; %bb.0: 13189; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13190; GFX940-NEXT: ;;#ASMSTART 13191; GFX940-NEXT: ; def v[0:3] 13192; GFX940-NEXT: ;;#ASMEND 13193; GFX940-NEXT: s_mov_b32 s2, 0x7060302 13194; GFX940-NEXT: v_mov_b32_e32 v6, 0 13195; GFX940-NEXT: ;;#ASMSTART 13196; GFX940-NEXT: ; def v[2:5] 13197; GFX940-NEXT: ;;#ASMEND 13198; GFX940-NEXT: s_nop 0 13199; GFX940-NEXT: v_perm_b32 v0, v5, v1, s2 13200; GFX940-NEXT: global_store_dword v6, v0, s[0:1] sc0 sc1 13201; GFX940-NEXT: s_waitcnt vmcnt(0) 13202; GFX940-NEXT: s_setpc_b64 s[30:31] 13203 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13204 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13205 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 15> 13206 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13207 ret void 13208} 13209 13210define void @v_shuffle_v2i16_v8i16__4_15(ptr addrspace(1) inreg %ptr) { 13211; GFX900-LABEL: v_shuffle_v2i16_v8i16__4_15: 13212; GFX900: ; %bb.0: 13213; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13214; GFX900-NEXT: ;;#ASMSTART 13215; GFX900-NEXT: ; def v[0:3] 13216; GFX900-NEXT: ;;#ASMEND 13217; GFX900-NEXT: s_mov_b32 s4, 0xffff 13218; GFX900-NEXT: v_mov_b32_e32 v7, 0 13219; GFX900-NEXT: ;;#ASMSTART 13220; GFX900-NEXT: ; def v[3:6] 13221; GFX900-NEXT: ;;#ASMEND 13222; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v6 13223; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 13224; GFX900-NEXT: s_waitcnt vmcnt(0) 13225; GFX900-NEXT: s_setpc_b64 s[30:31] 13226; 13227; GFX90A-LABEL: v_shuffle_v2i16_v8i16__4_15: 13228; GFX90A: ; %bb.0: 13229; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13230; GFX90A-NEXT: ;;#ASMSTART 13231; GFX90A-NEXT: ; def v[0:3] 13232; GFX90A-NEXT: ;;#ASMEND 13233; GFX90A-NEXT: s_mov_b32 s4, 0xffff 13234; GFX90A-NEXT: v_mov_b32_e32 v8, 0 13235; GFX90A-NEXT: ;;#ASMSTART 13236; GFX90A-NEXT: ; def v[4:7] 13237; GFX90A-NEXT: ;;#ASMEND 13238; GFX90A-NEXT: v_bfi_b32 v0, s4, v2, v7 13239; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 13240; GFX90A-NEXT: s_waitcnt vmcnt(0) 13241; GFX90A-NEXT: s_setpc_b64 s[30:31] 13242; 13243; GFX940-LABEL: v_shuffle_v2i16_v8i16__4_15: 13244; GFX940: ; %bb.0: 13245; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13246; GFX940-NEXT: ;;#ASMSTART 13247; GFX940-NEXT: ; def v[0:3] 13248; GFX940-NEXT: ;;#ASMEND 13249; GFX940-NEXT: s_mov_b32 s2, 0xffff 13250; GFX940-NEXT: v_mov_b32_e32 v8, 0 13251; GFX940-NEXT: ;;#ASMSTART 13252; GFX940-NEXT: ; def v[4:7] 13253; GFX940-NEXT: ;;#ASMEND 13254; GFX940-NEXT: s_nop 0 13255; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v7 13256; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 13257; GFX940-NEXT: s_waitcnt vmcnt(0) 13258; GFX940-NEXT: s_setpc_b64 s[30:31] 13259 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13260 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13261 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 15> 13262 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13263 ret void 13264} 13265 13266define void @v_shuffle_v2i16_v8i16__5_15(ptr addrspace(1) inreg %ptr) { 13267; GFX900-LABEL: v_shuffle_v2i16_v8i16__5_15: 13268; GFX900: ; %bb.0: 13269; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13270; GFX900-NEXT: ;;#ASMSTART 13271; GFX900-NEXT: ; def v[0:3] 13272; GFX900-NEXT: ;;#ASMEND 13273; GFX900-NEXT: s_mov_b32 s4, 0x7060302 13274; GFX900-NEXT: v_mov_b32_e32 v7, 0 13275; GFX900-NEXT: ;;#ASMSTART 13276; GFX900-NEXT: ; def v[3:6] 13277; GFX900-NEXT: ;;#ASMEND 13278; GFX900-NEXT: v_perm_b32 v0, v6, v2, s4 13279; GFX900-NEXT: global_store_dword v7, v0, s[16:17] 13280; GFX900-NEXT: s_waitcnt vmcnt(0) 13281; GFX900-NEXT: s_setpc_b64 s[30:31] 13282; 13283; GFX90A-LABEL: v_shuffle_v2i16_v8i16__5_15: 13284; GFX90A: ; %bb.0: 13285; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13286; GFX90A-NEXT: ;;#ASMSTART 13287; GFX90A-NEXT: ; def v[0:3] 13288; GFX90A-NEXT: ;;#ASMEND 13289; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 13290; GFX90A-NEXT: v_mov_b32_e32 v8, 0 13291; GFX90A-NEXT: ;;#ASMSTART 13292; GFX90A-NEXT: ; def v[4:7] 13293; GFX90A-NEXT: ;;#ASMEND 13294; GFX90A-NEXT: v_perm_b32 v0, v7, v2, s4 13295; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 13296; GFX90A-NEXT: s_waitcnt vmcnt(0) 13297; GFX90A-NEXT: s_setpc_b64 s[30:31] 13298; 13299; GFX940-LABEL: v_shuffle_v2i16_v8i16__5_15: 13300; GFX940: ; %bb.0: 13301; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13302; GFX940-NEXT: ;;#ASMSTART 13303; GFX940-NEXT: ; def v[0:3] 13304; GFX940-NEXT: ;;#ASMEND 13305; GFX940-NEXT: s_mov_b32 s2, 0x7060302 13306; GFX940-NEXT: v_mov_b32_e32 v8, 0 13307; GFX940-NEXT: ;;#ASMSTART 13308; GFX940-NEXT: ; def v[4:7] 13309; GFX940-NEXT: ;;#ASMEND 13310; GFX940-NEXT: s_nop 0 13311; GFX940-NEXT: v_perm_b32 v0, v7, v2, s2 13312; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 13313; GFX940-NEXT: s_waitcnt vmcnt(0) 13314; GFX940-NEXT: s_setpc_b64 s[30:31] 13315 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13316 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13317 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 15> 13318 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13319 ret void 13320} 13321 13322define void @v_shuffle_v2i16_v8i16__6_15(ptr addrspace(1) inreg %ptr) { 13323; GFX900-LABEL: v_shuffle_v2i16_v8i16__6_15: 13324; GFX900: ; %bb.0: 13325; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13326; GFX900-NEXT: ;;#ASMSTART 13327; GFX900-NEXT: ; def v[0:3] 13328; GFX900-NEXT: ;;#ASMEND 13329; GFX900-NEXT: s_mov_b32 s4, 0xffff 13330; GFX900-NEXT: v_mov_b32_e32 v8, 0 13331; GFX900-NEXT: ;;#ASMSTART 13332; GFX900-NEXT: ; def v[4:7] 13333; GFX900-NEXT: ;;#ASMEND 13334; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v7 13335; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 13336; GFX900-NEXT: s_waitcnt vmcnt(0) 13337; GFX900-NEXT: s_setpc_b64 s[30:31] 13338; 13339; GFX90A-LABEL: v_shuffle_v2i16_v8i16__6_15: 13340; GFX90A: ; %bb.0: 13341; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13342; GFX90A-NEXT: ;;#ASMSTART 13343; GFX90A-NEXT: ; def v[0:3] 13344; GFX90A-NEXT: ;;#ASMEND 13345; GFX90A-NEXT: s_mov_b32 s4, 0xffff 13346; GFX90A-NEXT: v_mov_b32_e32 v8, 0 13347; GFX90A-NEXT: ;;#ASMSTART 13348; GFX90A-NEXT: ; def v[4:7] 13349; GFX90A-NEXT: ;;#ASMEND 13350; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v7 13351; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 13352; GFX90A-NEXT: s_waitcnt vmcnt(0) 13353; GFX90A-NEXT: s_setpc_b64 s[30:31] 13354; 13355; GFX940-LABEL: v_shuffle_v2i16_v8i16__6_15: 13356; GFX940: ; %bb.0: 13357; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13358; GFX940-NEXT: ;;#ASMSTART 13359; GFX940-NEXT: ; def v[0:3] 13360; GFX940-NEXT: ;;#ASMEND 13361; GFX940-NEXT: s_mov_b32 s2, 0xffff 13362; GFX940-NEXT: v_mov_b32_e32 v8, 0 13363; GFX940-NEXT: ;;#ASMSTART 13364; GFX940-NEXT: ; def v[4:7] 13365; GFX940-NEXT: ;;#ASMEND 13366; GFX940-NEXT: s_nop 0 13367; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v7 13368; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 13369; GFX940-NEXT: s_waitcnt vmcnt(0) 13370; GFX940-NEXT: s_setpc_b64 s[30:31] 13371 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13372 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13373 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 15> 13374 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13375 ret void 13376} 13377 13378define void @v_shuffle_v2i16_v8i16__7_15(ptr addrspace(1) inreg %ptr) { 13379; GFX900-LABEL: v_shuffle_v2i16_v8i16__7_15: 13380; GFX900: ; %bb.0: 13381; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13382; GFX900-NEXT: ;;#ASMSTART 13383; GFX900-NEXT: ; def v[0:3] 13384; GFX900-NEXT: ;;#ASMEND 13385; GFX900-NEXT: s_mov_b32 s4, 0x7060302 13386; GFX900-NEXT: v_mov_b32_e32 v8, 0 13387; GFX900-NEXT: ;;#ASMSTART 13388; GFX900-NEXT: ; def v[4:7] 13389; GFX900-NEXT: ;;#ASMEND 13390; GFX900-NEXT: v_perm_b32 v0, v7, v3, s4 13391; GFX900-NEXT: global_store_dword v8, v0, s[16:17] 13392; GFX900-NEXT: s_waitcnt vmcnt(0) 13393; GFX900-NEXT: s_setpc_b64 s[30:31] 13394; 13395; GFX90A-LABEL: v_shuffle_v2i16_v8i16__7_15: 13396; GFX90A: ; %bb.0: 13397; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13398; GFX90A-NEXT: ;;#ASMSTART 13399; GFX90A-NEXT: ; def v[0:3] 13400; GFX90A-NEXT: ;;#ASMEND 13401; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 13402; GFX90A-NEXT: v_mov_b32_e32 v8, 0 13403; GFX90A-NEXT: ;;#ASMSTART 13404; GFX90A-NEXT: ; def v[4:7] 13405; GFX90A-NEXT: ;;#ASMEND 13406; GFX90A-NEXT: v_perm_b32 v0, v7, v3, s4 13407; GFX90A-NEXT: global_store_dword v8, v0, s[16:17] 13408; GFX90A-NEXT: s_waitcnt vmcnt(0) 13409; GFX90A-NEXT: s_setpc_b64 s[30:31] 13410; 13411; GFX940-LABEL: v_shuffle_v2i16_v8i16__7_15: 13412; GFX940: ; %bb.0: 13413; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13414; GFX940-NEXT: ;;#ASMSTART 13415; GFX940-NEXT: ; def v[0:3] 13416; GFX940-NEXT: ;;#ASMEND 13417; GFX940-NEXT: s_mov_b32 s2, 0x7060302 13418; GFX940-NEXT: v_mov_b32_e32 v8, 0 13419; GFX940-NEXT: ;;#ASMSTART 13420; GFX940-NEXT: ; def v[4:7] 13421; GFX940-NEXT: ;;#ASMEND 13422; GFX940-NEXT: s_nop 0 13423; GFX940-NEXT: v_perm_b32 v0, v7, v3, s2 13424; GFX940-NEXT: global_store_dword v8, v0, s[0:1] sc0 sc1 13425; GFX940-NEXT: s_waitcnt vmcnt(0) 13426; GFX940-NEXT: s_setpc_b64 s[30:31] 13427 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13428 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13429 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 15> 13430 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13431 ret void 13432} 13433 13434define void @v_shuffle_v2i16_v8i16__8_15(ptr addrspace(1) inreg %ptr) { 13435; GFX900-LABEL: v_shuffle_v2i16_v8i16__8_15: 13436; GFX900: ; %bb.0: 13437; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13438; GFX900-NEXT: ;;#ASMSTART 13439; GFX900-NEXT: ; def v[0:3] 13440; GFX900-NEXT: ;;#ASMEND 13441; GFX900-NEXT: s_mov_b32 s4, 0xffff 13442; GFX900-NEXT: v_mov_b32_e32 v4, 0 13443; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v3 13444; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 13445; GFX900-NEXT: s_waitcnt vmcnt(0) 13446; GFX900-NEXT: s_setpc_b64 s[30:31] 13447; 13448; GFX90A-LABEL: v_shuffle_v2i16_v8i16__8_15: 13449; GFX90A: ; %bb.0: 13450; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13451; GFX90A-NEXT: ;;#ASMSTART 13452; GFX90A-NEXT: ; def v[0:3] 13453; GFX90A-NEXT: ;;#ASMEND 13454; GFX90A-NEXT: s_mov_b32 s4, 0xffff 13455; GFX90A-NEXT: v_mov_b32_e32 v4, 0 13456; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v3 13457; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 13458; GFX90A-NEXT: s_waitcnt vmcnt(0) 13459; GFX90A-NEXT: s_setpc_b64 s[30:31] 13460; 13461; GFX940-LABEL: v_shuffle_v2i16_v8i16__8_15: 13462; GFX940: ; %bb.0: 13463; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13464; GFX940-NEXT: ;;#ASMSTART 13465; GFX940-NEXT: ; def v[0:3] 13466; GFX940-NEXT: ;;#ASMEND 13467; GFX940-NEXT: s_mov_b32 s2, 0xffff 13468; GFX940-NEXT: v_mov_b32_e32 v4, 0 13469; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 13470; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 13471; GFX940-NEXT: s_waitcnt vmcnt(0) 13472; GFX940-NEXT: s_setpc_b64 s[30:31] 13473 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13474 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13475 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 15> 13476 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13477 ret void 13478} 13479 13480define void @v_shuffle_v2i16_v8i16__9_15(ptr addrspace(1) inreg %ptr) { 13481; GFX900-LABEL: v_shuffle_v2i16_v8i16__9_15: 13482; GFX900: ; %bb.0: 13483; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13484; GFX900-NEXT: ;;#ASMSTART 13485; GFX900-NEXT: ; def v[0:3] 13486; GFX900-NEXT: ;;#ASMEND 13487; GFX900-NEXT: s_mov_b32 s4, 0x7060302 13488; GFX900-NEXT: v_mov_b32_e32 v4, 0 13489; GFX900-NEXT: v_perm_b32 v0, v3, v0, s4 13490; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 13491; GFX900-NEXT: s_waitcnt vmcnt(0) 13492; GFX900-NEXT: s_setpc_b64 s[30:31] 13493; 13494; GFX90A-LABEL: v_shuffle_v2i16_v8i16__9_15: 13495; GFX90A: ; %bb.0: 13496; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13497; GFX90A-NEXT: ;;#ASMSTART 13498; GFX90A-NEXT: ; def v[0:3] 13499; GFX90A-NEXT: ;;#ASMEND 13500; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 13501; GFX90A-NEXT: v_mov_b32_e32 v4, 0 13502; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 13503; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 13504; GFX90A-NEXT: s_waitcnt vmcnt(0) 13505; GFX90A-NEXT: s_setpc_b64 s[30:31] 13506; 13507; GFX940-LABEL: v_shuffle_v2i16_v8i16__9_15: 13508; GFX940: ; %bb.0: 13509; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13510; GFX940-NEXT: ;;#ASMSTART 13511; GFX940-NEXT: ; def v[0:3] 13512; GFX940-NEXT: ;;#ASMEND 13513; GFX940-NEXT: s_mov_b32 s2, 0x7060302 13514; GFX940-NEXT: v_mov_b32_e32 v4, 0 13515; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 13516; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 13517; GFX940-NEXT: s_waitcnt vmcnt(0) 13518; GFX940-NEXT: s_setpc_b64 s[30:31] 13519 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13520 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13521 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 15> 13522 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13523 ret void 13524} 13525 13526define void @v_shuffle_v2i16_v8i16__10_15(ptr addrspace(1) inreg %ptr) { 13527; GFX900-LABEL: v_shuffle_v2i16_v8i16__10_15: 13528; GFX900: ; %bb.0: 13529; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13530; GFX900-NEXT: ;;#ASMSTART 13531; GFX900-NEXT: ; def v[0:3] 13532; GFX900-NEXT: ;;#ASMEND 13533; GFX900-NEXT: s_mov_b32 s4, 0xffff 13534; GFX900-NEXT: v_mov_b32_e32 v4, 0 13535; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v3 13536; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 13537; GFX900-NEXT: s_waitcnt vmcnt(0) 13538; GFX900-NEXT: s_setpc_b64 s[30:31] 13539; 13540; GFX90A-LABEL: v_shuffle_v2i16_v8i16__10_15: 13541; GFX90A: ; %bb.0: 13542; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13543; GFX90A-NEXT: ;;#ASMSTART 13544; GFX90A-NEXT: ; def v[0:3] 13545; GFX90A-NEXT: ;;#ASMEND 13546; GFX90A-NEXT: s_mov_b32 s4, 0xffff 13547; GFX90A-NEXT: v_mov_b32_e32 v4, 0 13548; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v3 13549; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 13550; GFX90A-NEXT: s_waitcnt vmcnt(0) 13551; GFX90A-NEXT: s_setpc_b64 s[30:31] 13552; 13553; GFX940-LABEL: v_shuffle_v2i16_v8i16__10_15: 13554; GFX940: ; %bb.0: 13555; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13556; GFX940-NEXT: ;;#ASMSTART 13557; GFX940-NEXT: ; def v[0:3] 13558; GFX940-NEXT: ;;#ASMEND 13559; GFX940-NEXT: s_mov_b32 s2, 0xffff 13560; GFX940-NEXT: v_mov_b32_e32 v4, 0 13561; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 13562; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 13563; GFX940-NEXT: s_waitcnt vmcnt(0) 13564; GFX940-NEXT: s_setpc_b64 s[30:31] 13565 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13566 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13567 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 15> 13568 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13569 ret void 13570} 13571 13572define void @v_shuffle_v2i16_v8i16__11_15(ptr addrspace(1) inreg %ptr) { 13573; GFX900-LABEL: v_shuffle_v2i16_v8i16__11_15: 13574; GFX900: ; %bb.0: 13575; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13576; GFX900-NEXT: ;;#ASMSTART 13577; GFX900-NEXT: ; def v[0:3] 13578; GFX900-NEXT: ;;#ASMEND 13579; GFX900-NEXT: s_mov_b32 s4, 0x7060302 13580; GFX900-NEXT: v_mov_b32_e32 v4, 0 13581; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 13582; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 13583; GFX900-NEXT: s_waitcnt vmcnt(0) 13584; GFX900-NEXT: s_setpc_b64 s[30:31] 13585; 13586; GFX90A-LABEL: v_shuffle_v2i16_v8i16__11_15: 13587; GFX90A: ; %bb.0: 13588; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13589; GFX90A-NEXT: ;;#ASMSTART 13590; GFX90A-NEXT: ; def v[0:3] 13591; GFX90A-NEXT: ;;#ASMEND 13592; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 13593; GFX90A-NEXT: v_mov_b32_e32 v4, 0 13594; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 13595; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 13596; GFX90A-NEXT: s_waitcnt vmcnt(0) 13597; GFX90A-NEXT: s_setpc_b64 s[30:31] 13598; 13599; GFX940-LABEL: v_shuffle_v2i16_v8i16__11_15: 13600; GFX940: ; %bb.0: 13601; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13602; GFX940-NEXT: ;;#ASMSTART 13603; GFX940-NEXT: ; def v[0:3] 13604; GFX940-NEXT: ;;#ASMEND 13605; GFX940-NEXT: s_mov_b32 s2, 0x7060302 13606; GFX940-NEXT: v_mov_b32_e32 v4, 0 13607; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 13608; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 13609; GFX940-NEXT: s_waitcnt vmcnt(0) 13610; GFX940-NEXT: s_setpc_b64 s[30:31] 13611 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13612 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13613 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 15> 13614 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13615 ret void 13616} 13617 13618define void @v_shuffle_v2i16_v8i16__12_15(ptr addrspace(1) inreg %ptr) { 13619; GFX900-LABEL: v_shuffle_v2i16_v8i16__12_15: 13620; GFX900: ; %bb.0: 13621; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13622; GFX900-NEXT: ;;#ASMSTART 13623; GFX900-NEXT: ; def v[0:3] 13624; GFX900-NEXT: ;;#ASMEND 13625; GFX900-NEXT: s_mov_b32 s4, 0xffff 13626; GFX900-NEXT: v_mov_b32_e32 v4, 0 13627; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v3 13628; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 13629; GFX900-NEXT: s_waitcnt vmcnt(0) 13630; GFX900-NEXT: s_setpc_b64 s[30:31] 13631; 13632; GFX90A-LABEL: v_shuffle_v2i16_v8i16__12_15: 13633; GFX90A: ; %bb.0: 13634; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13635; GFX90A-NEXT: ;;#ASMSTART 13636; GFX90A-NEXT: ; def v[0:3] 13637; GFX90A-NEXT: ;;#ASMEND 13638; GFX90A-NEXT: s_mov_b32 s4, 0xffff 13639; GFX90A-NEXT: v_mov_b32_e32 v4, 0 13640; GFX90A-NEXT: v_bfi_b32 v0, s4, v2, v3 13641; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 13642; GFX90A-NEXT: s_waitcnt vmcnt(0) 13643; GFX90A-NEXT: s_setpc_b64 s[30:31] 13644; 13645; GFX940-LABEL: v_shuffle_v2i16_v8i16__12_15: 13646; GFX940: ; %bb.0: 13647; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13648; GFX940-NEXT: ;;#ASMSTART 13649; GFX940-NEXT: ; def v[0:3] 13650; GFX940-NEXT: ;;#ASMEND 13651; GFX940-NEXT: s_mov_b32 s2, 0xffff 13652; GFX940-NEXT: v_mov_b32_e32 v4, 0 13653; GFX940-NEXT: v_bfi_b32 v0, s2, v2, v3 13654; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 13655; GFX940-NEXT: s_waitcnt vmcnt(0) 13656; GFX940-NEXT: s_setpc_b64 s[30:31] 13657 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13658 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13659 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 15> 13660 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13661 ret void 13662} 13663 13664define void @v_shuffle_v2i16_v8i16__13_15(ptr addrspace(1) inreg %ptr) { 13665; GFX900-LABEL: v_shuffle_v2i16_v8i16__13_15: 13666; GFX900: ; %bb.0: 13667; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13668; GFX900-NEXT: ;;#ASMSTART 13669; GFX900-NEXT: ; def v[0:3] 13670; GFX900-NEXT: ;;#ASMEND 13671; GFX900-NEXT: s_mov_b32 s4, 0x7060302 13672; GFX900-NEXT: v_mov_b32_e32 v4, 0 13673; GFX900-NEXT: v_perm_b32 v0, v3, v2, s4 13674; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 13675; GFX900-NEXT: s_waitcnt vmcnt(0) 13676; GFX900-NEXT: s_setpc_b64 s[30:31] 13677; 13678; GFX90A-LABEL: v_shuffle_v2i16_v8i16__13_15: 13679; GFX90A: ; %bb.0: 13680; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13681; GFX90A-NEXT: ;;#ASMSTART 13682; GFX90A-NEXT: ; def v[0:3] 13683; GFX90A-NEXT: ;;#ASMEND 13684; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 13685; GFX90A-NEXT: v_mov_b32_e32 v4, 0 13686; GFX90A-NEXT: v_perm_b32 v0, v3, v2, s4 13687; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 13688; GFX90A-NEXT: s_waitcnt vmcnt(0) 13689; GFX90A-NEXT: s_setpc_b64 s[30:31] 13690; 13691; GFX940-LABEL: v_shuffle_v2i16_v8i16__13_15: 13692; GFX940: ; %bb.0: 13693; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13694; GFX940-NEXT: ;;#ASMSTART 13695; GFX940-NEXT: ; def v[0:3] 13696; GFX940-NEXT: ;;#ASMEND 13697; GFX940-NEXT: s_mov_b32 s2, 0x7060302 13698; GFX940-NEXT: v_mov_b32_e32 v4, 0 13699; GFX940-NEXT: v_perm_b32 v0, v3, v2, s2 13700; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 13701; GFX940-NEXT: s_waitcnt vmcnt(0) 13702; GFX940-NEXT: s_setpc_b64 s[30:31] 13703 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13704 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13705 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 15> 13706 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13707 ret void 13708} 13709 13710define void @v_shuffle_v2i16_v8i16__14_15(ptr addrspace(1) inreg %ptr) { 13711; GFX900-LABEL: v_shuffle_v2i16_v8i16__14_15: 13712; GFX900: ; %bb.0: 13713; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13714; GFX900-NEXT: v_mov_b32_e32 v4, 0 13715; GFX900-NEXT: ;;#ASMSTART 13716; GFX900-NEXT: ; def v[0:3] 13717; GFX900-NEXT: ;;#ASMEND 13718; GFX900-NEXT: global_store_dword v4, v3, s[16:17] 13719; GFX900-NEXT: s_waitcnt vmcnt(0) 13720; GFX900-NEXT: s_setpc_b64 s[30:31] 13721; 13722; GFX90A-LABEL: v_shuffle_v2i16_v8i16__14_15: 13723; GFX90A: ; %bb.0: 13724; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13725; GFX90A-NEXT: v_mov_b32_e32 v4, 0 13726; GFX90A-NEXT: ;;#ASMSTART 13727; GFX90A-NEXT: ; def v[0:3] 13728; GFX90A-NEXT: ;;#ASMEND 13729; GFX90A-NEXT: global_store_dword v4, v3, s[16:17] 13730; GFX90A-NEXT: s_waitcnt vmcnt(0) 13731; GFX90A-NEXT: s_setpc_b64 s[30:31] 13732; 13733; GFX940-LABEL: v_shuffle_v2i16_v8i16__14_15: 13734; GFX940: ; %bb.0: 13735; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13736; GFX940-NEXT: v_mov_b32_e32 v4, 0 13737; GFX940-NEXT: ;;#ASMSTART 13738; GFX940-NEXT: ; def v[0:3] 13739; GFX940-NEXT: ;;#ASMEND 13740; GFX940-NEXT: global_store_dword v4, v3, s[0:1] sc0 sc1 13741; GFX940-NEXT: s_waitcnt vmcnt(0) 13742; GFX940-NEXT: s_setpc_b64 s[30:31] 13743 %vec0 = call <8 x i16> asm "; def $0", "=v"() 13744 %vec1 = call <8 x i16> asm "; def $0", "=v"() 13745 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 15> 13746 store <2 x i16> %shuf, ptr addrspace(1) %ptr, align 4 13747 ret void 13748} 13749 13750define void @s_shuffle_v2i16_v8i16__u_u() { 13751; GFX9-LABEL: s_shuffle_v2i16_v8i16__u_u: 13752; GFX9: ; %bb.0: 13753; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13754; GFX9-NEXT: ;;#ASMSTART 13755; GFX9-NEXT: ; use s8 13756; GFX9-NEXT: ;;#ASMEND 13757; GFX9-NEXT: s_setpc_b64 s[30:31] 13758 %vec0 = call <8 x i16> asm "; def $0", "=s"() 13759 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> poison 13760 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 13761 ret void 13762} 13763 13764define void @s_shuffle_v2i16_v8i16__0_u() { 13765; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_u: 13766; GFX900: ; %bb.0: 13767; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13768; GFX900-NEXT: ;;#ASMSTART 13769; GFX900-NEXT: ; def s[8:11] 13770; GFX900-NEXT: ;;#ASMEND 13771; GFX900-NEXT: ;;#ASMSTART 13772; GFX900-NEXT: ; use s8 13773; GFX900-NEXT: ;;#ASMEND 13774; GFX900-NEXT: s_setpc_b64 s[30:31] 13775; 13776; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_u: 13777; GFX90A: ; %bb.0: 13778; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13779; GFX90A-NEXT: ;;#ASMSTART 13780; GFX90A-NEXT: ; def s[8:11] 13781; GFX90A-NEXT: ;;#ASMEND 13782; GFX90A-NEXT: ;;#ASMSTART 13783; GFX90A-NEXT: ; use s8 13784; GFX90A-NEXT: ;;#ASMEND 13785; GFX90A-NEXT: s_setpc_b64 s[30:31] 13786; 13787; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_u: 13788; GFX940: ; %bb.0: 13789; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13790; GFX940-NEXT: ;;#ASMSTART 13791; GFX940-NEXT: ; def s[8:11] 13792; GFX940-NEXT: ;;#ASMEND 13793; GFX940-NEXT: s_nop 0 13794; GFX940-NEXT: ;;#ASMSTART 13795; GFX940-NEXT: ; use s8 13796; GFX940-NEXT: ;;#ASMEND 13797; GFX940-NEXT: s_setpc_b64 s[30:31] 13798 %vec0 = call <8 x i16> asm "; def $0", "=s"() 13799 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 poison> 13800 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 13801 ret void 13802} 13803 13804define void @s_shuffle_v2i16_v8i16__1_u() { 13805; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_u: 13806; GFX900: ; %bb.0: 13807; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13808; GFX900-NEXT: ;;#ASMSTART 13809; GFX900-NEXT: ; def s[4:7] 13810; GFX900-NEXT: ;;#ASMEND 13811; GFX900-NEXT: s_lshr_b32 s8, s4, 16 13812; GFX900-NEXT: ;;#ASMSTART 13813; GFX900-NEXT: ; use s8 13814; GFX900-NEXT: ;;#ASMEND 13815; GFX900-NEXT: s_setpc_b64 s[30:31] 13816; 13817; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_u: 13818; GFX90A: ; %bb.0: 13819; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13820; GFX90A-NEXT: ;;#ASMSTART 13821; GFX90A-NEXT: ; def s[4:7] 13822; GFX90A-NEXT: ;;#ASMEND 13823; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 13824; GFX90A-NEXT: ;;#ASMSTART 13825; GFX90A-NEXT: ; use s8 13826; GFX90A-NEXT: ;;#ASMEND 13827; GFX90A-NEXT: s_setpc_b64 s[30:31] 13828; 13829; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_u: 13830; GFX940: ; %bb.0: 13831; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13832; GFX940-NEXT: ;;#ASMSTART 13833; GFX940-NEXT: ; def s[0:3] 13834; GFX940-NEXT: ;;#ASMEND 13835; GFX940-NEXT: s_lshr_b32 s8, s0, 16 13836; GFX940-NEXT: ;;#ASMSTART 13837; GFX940-NEXT: ; use s8 13838; GFX940-NEXT: ;;#ASMEND 13839; GFX940-NEXT: s_setpc_b64 s[30:31] 13840 %vec0 = call <8 x i16> asm "; def $0", "=s"() 13841 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 poison> 13842 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 13843 ret void 13844} 13845 13846define void @s_shuffle_v2i16_v8i16__2_u() { 13847; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_u: 13848; GFX900: ; %bb.0: 13849; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13850; GFX900-NEXT: ;;#ASMSTART 13851; GFX900-NEXT: ; def s[4:7] 13852; GFX900-NEXT: ;;#ASMEND 13853; GFX900-NEXT: s_mov_b32 s8, s5 13854; GFX900-NEXT: ;;#ASMSTART 13855; GFX900-NEXT: ; use s8 13856; GFX900-NEXT: ;;#ASMEND 13857; GFX900-NEXT: s_setpc_b64 s[30:31] 13858; 13859; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_u: 13860; GFX90A: ; %bb.0: 13861; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13862; GFX90A-NEXT: ;;#ASMSTART 13863; GFX90A-NEXT: ; def s[4:7] 13864; GFX90A-NEXT: ;;#ASMEND 13865; GFX90A-NEXT: s_mov_b32 s8, s5 13866; GFX90A-NEXT: ;;#ASMSTART 13867; GFX90A-NEXT: ; use s8 13868; GFX90A-NEXT: ;;#ASMEND 13869; GFX90A-NEXT: s_setpc_b64 s[30:31] 13870; 13871; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_u: 13872; GFX940: ; %bb.0: 13873; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13874; GFX940-NEXT: ;;#ASMSTART 13875; GFX940-NEXT: ; def s[0:3] 13876; GFX940-NEXT: ;;#ASMEND 13877; GFX940-NEXT: s_mov_b32 s8, s1 13878; GFX940-NEXT: ;;#ASMSTART 13879; GFX940-NEXT: ; use s8 13880; GFX940-NEXT: ;;#ASMEND 13881; GFX940-NEXT: s_setpc_b64 s[30:31] 13882 %vec0 = call <8 x i16> asm "; def $0", "=s"() 13883 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 poison> 13884 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 13885 ret void 13886} 13887 13888define void @s_shuffle_v2i16_v8i16__3_u() { 13889; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_u: 13890; GFX900: ; %bb.0: 13891; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13892; GFX900-NEXT: ;;#ASMSTART 13893; GFX900-NEXT: ; def s[4:7] 13894; GFX900-NEXT: ;;#ASMEND 13895; GFX900-NEXT: s_lshr_b32 s8, s5, 16 13896; GFX900-NEXT: ;;#ASMSTART 13897; GFX900-NEXT: ; use s8 13898; GFX900-NEXT: ;;#ASMEND 13899; GFX900-NEXT: s_setpc_b64 s[30:31] 13900; 13901; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_u: 13902; GFX90A: ; %bb.0: 13903; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13904; GFX90A-NEXT: ;;#ASMSTART 13905; GFX90A-NEXT: ; def s[4:7] 13906; GFX90A-NEXT: ;;#ASMEND 13907; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 13908; GFX90A-NEXT: ;;#ASMSTART 13909; GFX90A-NEXT: ; use s8 13910; GFX90A-NEXT: ;;#ASMEND 13911; GFX90A-NEXT: s_setpc_b64 s[30:31] 13912; 13913; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_u: 13914; GFX940: ; %bb.0: 13915; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13916; GFX940-NEXT: ;;#ASMSTART 13917; GFX940-NEXT: ; def s[0:3] 13918; GFX940-NEXT: ;;#ASMEND 13919; GFX940-NEXT: s_lshr_b32 s8, s1, 16 13920; GFX940-NEXT: ;;#ASMSTART 13921; GFX940-NEXT: ; use s8 13922; GFX940-NEXT: ;;#ASMEND 13923; GFX940-NEXT: s_setpc_b64 s[30:31] 13924 %vec0 = call <8 x i16> asm "; def $0", "=s"() 13925 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 poison> 13926 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 13927 ret void 13928} 13929 13930define void @s_shuffle_v2i16_v8i16__4_u() { 13931; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_u: 13932; GFX900: ; %bb.0: 13933; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13934; GFX900-NEXT: ;;#ASMSTART 13935; GFX900-NEXT: ; def s[4:7] 13936; GFX900-NEXT: ;;#ASMEND 13937; GFX900-NEXT: s_mov_b32 s8, s6 13938; GFX900-NEXT: ;;#ASMSTART 13939; GFX900-NEXT: ; use s8 13940; GFX900-NEXT: ;;#ASMEND 13941; GFX900-NEXT: s_setpc_b64 s[30:31] 13942; 13943; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_u: 13944; GFX90A: ; %bb.0: 13945; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13946; GFX90A-NEXT: ;;#ASMSTART 13947; GFX90A-NEXT: ; def s[4:7] 13948; GFX90A-NEXT: ;;#ASMEND 13949; GFX90A-NEXT: s_mov_b32 s8, s6 13950; GFX90A-NEXT: ;;#ASMSTART 13951; GFX90A-NEXT: ; use s8 13952; GFX90A-NEXT: ;;#ASMEND 13953; GFX90A-NEXT: s_setpc_b64 s[30:31] 13954; 13955; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_u: 13956; GFX940: ; %bb.0: 13957; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13958; GFX940-NEXT: ;;#ASMSTART 13959; GFX940-NEXT: ; def s[0:3] 13960; GFX940-NEXT: ;;#ASMEND 13961; GFX940-NEXT: s_mov_b32 s8, s2 13962; GFX940-NEXT: ;;#ASMSTART 13963; GFX940-NEXT: ; use s8 13964; GFX940-NEXT: ;;#ASMEND 13965; GFX940-NEXT: s_setpc_b64 s[30:31] 13966 %vec0 = call <8 x i16> asm "; def $0", "=s"() 13967 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 poison> 13968 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 13969 ret void 13970} 13971 13972define void @s_shuffle_v2i16_v8i16__5_u() { 13973; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_u: 13974; GFX900: ; %bb.0: 13975; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13976; GFX900-NEXT: ;;#ASMSTART 13977; GFX900-NEXT: ; def s[4:7] 13978; GFX900-NEXT: ;;#ASMEND 13979; GFX900-NEXT: s_lshr_b32 s8, s6, 16 13980; GFX900-NEXT: ;;#ASMSTART 13981; GFX900-NEXT: ; use s8 13982; GFX900-NEXT: ;;#ASMEND 13983; GFX900-NEXT: s_setpc_b64 s[30:31] 13984; 13985; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_u: 13986; GFX90A: ; %bb.0: 13987; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13988; GFX90A-NEXT: ;;#ASMSTART 13989; GFX90A-NEXT: ; def s[4:7] 13990; GFX90A-NEXT: ;;#ASMEND 13991; GFX90A-NEXT: s_lshr_b32 s8, s6, 16 13992; GFX90A-NEXT: ;;#ASMSTART 13993; GFX90A-NEXT: ; use s8 13994; GFX90A-NEXT: ;;#ASMEND 13995; GFX90A-NEXT: s_setpc_b64 s[30:31] 13996; 13997; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_u: 13998; GFX940: ; %bb.0: 13999; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14000; GFX940-NEXT: ;;#ASMSTART 14001; GFX940-NEXT: ; def s[0:3] 14002; GFX940-NEXT: ;;#ASMEND 14003; GFX940-NEXT: s_lshr_b32 s8, s2, 16 14004; GFX940-NEXT: ;;#ASMSTART 14005; GFX940-NEXT: ; use s8 14006; GFX940-NEXT: ;;#ASMEND 14007; GFX940-NEXT: s_setpc_b64 s[30:31] 14008 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14009 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 poison> 14010 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14011 ret void 14012} 14013 14014define void @s_shuffle_v2i16_v8i16__6_u() { 14015; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_u: 14016; GFX900: ; %bb.0: 14017; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14018; GFX900-NEXT: ;;#ASMSTART 14019; GFX900-NEXT: ; def s[4:7] 14020; GFX900-NEXT: ;;#ASMEND 14021; GFX900-NEXT: s_mov_b32 s8, s7 14022; GFX900-NEXT: ;;#ASMSTART 14023; GFX900-NEXT: ; use s8 14024; GFX900-NEXT: ;;#ASMEND 14025; GFX900-NEXT: s_setpc_b64 s[30:31] 14026; 14027; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_u: 14028; GFX90A: ; %bb.0: 14029; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14030; GFX90A-NEXT: ;;#ASMSTART 14031; GFX90A-NEXT: ; def s[4:7] 14032; GFX90A-NEXT: ;;#ASMEND 14033; GFX90A-NEXT: s_mov_b32 s8, s7 14034; GFX90A-NEXT: ;;#ASMSTART 14035; GFX90A-NEXT: ; use s8 14036; GFX90A-NEXT: ;;#ASMEND 14037; GFX90A-NEXT: s_setpc_b64 s[30:31] 14038; 14039; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_u: 14040; GFX940: ; %bb.0: 14041; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14042; GFX940-NEXT: ;;#ASMSTART 14043; GFX940-NEXT: ; def s[0:3] 14044; GFX940-NEXT: ;;#ASMEND 14045; GFX940-NEXT: s_mov_b32 s8, s3 14046; GFX940-NEXT: ;;#ASMSTART 14047; GFX940-NEXT: ; use s8 14048; GFX940-NEXT: ;;#ASMEND 14049; GFX940-NEXT: s_setpc_b64 s[30:31] 14050 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14051 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 poison> 14052 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14053 ret void 14054} 14055 14056define void @s_shuffle_v2i16_v8i16__7_u() { 14057; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_u: 14058; GFX900: ; %bb.0: 14059; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14060; GFX900-NEXT: ;;#ASMSTART 14061; GFX900-NEXT: ; def s[4:7] 14062; GFX900-NEXT: ;;#ASMEND 14063; GFX900-NEXT: s_lshr_b32 s8, s7, 16 14064; GFX900-NEXT: ;;#ASMSTART 14065; GFX900-NEXT: ; use s8 14066; GFX900-NEXT: ;;#ASMEND 14067; GFX900-NEXT: s_setpc_b64 s[30:31] 14068; 14069; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_u: 14070; GFX90A: ; %bb.0: 14071; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14072; GFX90A-NEXT: ;;#ASMSTART 14073; GFX90A-NEXT: ; def s[4:7] 14074; GFX90A-NEXT: ;;#ASMEND 14075; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 14076; GFX90A-NEXT: ;;#ASMSTART 14077; GFX90A-NEXT: ; use s8 14078; GFX90A-NEXT: ;;#ASMEND 14079; GFX90A-NEXT: s_setpc_b64 s[30:31] 14080; 14081; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_u: 14082; GFX940: ; %bb.0: 14083; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14084; GFX940-NEXT: ;;#ASMSTART 14085; GFX940-NEXT: ; def s[0:3] 14086; GFX940-NEXT: ;;#ASMEND 14087; GFX940-NEXT: s_lshr_b32 s8, s3, 16 14088; GFX940-NEXT: ;;#ASMSTART 14089; GFX940-NEXT: ; use s8 14090; GFX940-NEXT: ;;#ASMEND 14091; GFX940-NEXT: s_setpc_b64 s[30:31] 14092 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14093 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 poison> 14094 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14095 ret void 14096} 14097 14098define void @s_shuffle_v2i16_v8i16__8_u() { 14099; GFX9-LABEL: s_shuffle_v2i16_v8i16__8_u: 14100; GFX9: ; %bb.0: 14101; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14102; GFX9-NEXT: ;;#ASMSTART 14103; GFX9-NEXT: ; use s8 14104; GFX9-NEXT: ;;#ASMEND 14105; GFX9-NEXT: s_setpc_b64 s[30:31] 14106 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14107 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 poison> 14108 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14109 ret void 14110} 14111 14112define void @s_shuffle_v2i16_v8i16__9_u() { 14113; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_u: 14114; GFX900: ; %bb.0: 14115; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14116; GFX900-NEXT: ;;#ASMSTART 14117; GFX900-NEXT: ; def s[4:7] 14118; GFX900-NEXT: ;;#ASMEND 14119; GFX900-NEXT: s_lshr_b32 s8, s4, 16 14120; GFX900-NEXT: ;;#ASMSTART 14121; GFX900-NEXT: ; use s8 14122; GFX900-NEXT: ;;#ASMEND 14123; GFX900-NEXT: s_setpc_b64 s[30:31] 14124; 14125; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_u: 14126; GFX90A: ; %bb.0: 14127; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14128; GFX90A-NEXT: ;;#ASMSTART 14129; GFX90A-NEXT: ; def s[4:7] 14130; GFX90A-NEXT: ;;#ASMEND 14131; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 14132; GFX90A-NEXT: ;;#ASMSTART 14133; GFX90A-NEXT: ; use s8 14134; GFX90A-NEXT: ;;#ASMEND 14135; GFX90A-NEXT: s_setpc_b64 s[30:31] 14136; 14137; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_u: 14138; GFX940: ; %bb.0: 14139; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14140; GFX940-NEXT: ;;#ASMSTART 14141; GFX940-NEXT: ; def s[0:3] 14142; GFX940-NEXT: ;;#ASMEND 14143; GFX940-NEXT: s_lshr_b32 s8, s0, 16 14144; GFX940-NEXT: ;;#ASMSTART 14145; GFX940-NEXT: ; use s8 14146; GFX940-NEXT: ;;#ASMEND 14147; GFX940-NEXT: s_setpc_b64 s[30:31] 14148 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14149 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14150 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 poison> 14151 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14152 ret void 14153} 14154 14155define void @s_shuffle_v2i16_v8i16__10_u() { 14156; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_u: 14157; GFX900: ; %bb.0: 14158; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14159; GFX900-NEXT: ;;#ASMSTART 14160; GFX900-NEXT: ; def s[4:7] 14161; GFX900-NEXT: ;;#ASMEND 14162; GFX900-NEXT: s_mov_b32 s8, s5 14163; GFX900-NEXT: ;;#ASMSTART 14164; GFX900-NEXT: ; use s8 14165; GFX900-NEXT: ;;#ASMEND 14166; GFX900-NEXT: s_setpc_b64 s[30:31] 14167; 14168; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_u: 14169; GFX90A: ; %bb.0: 14170; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14171; GFX90A-NEXT: ;;#ASMSTART 14172; GFX90A-NEXT: ; def s[4:7] 14173; GFX90A-NEXT: ;;#ASMEND 14174; GFX90A-NEXT: s_mov_b32 s8, s5 14175; GFX90A-NEXT: ;;#ASMSTART 14176; GFX90A-NEXT: ; use s8 14177; GFX90A-NEXT: ;;#ASMEND 14178; GFX90A-NEXT: s_setpc_b64 s[30:31] 14179; 14180; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_u: 14181; GFX940: ; %bb.0: 14182; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14183; GFX940-NEXT: ;;#ASMSTART 14184; GFX940-NEXT: ; def s[0:3] 14185; GFX940-NEXT: ;;#ASMEND 14186; GFX940-NEXT: s_mov_b32 s8, s1 14187; GFX940-NEXT: ;;#ASMSTART 14188; GFX940-NEXT: ; use s8 14189; GFX940-NEXT: ;;#ASMEND 14190; GFX940-NEXT: s_setpc_b64 s[30:31] 14191 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14192 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14193 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 poison> 14194 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14195 ret void 14196} 14197 14198define void @s_shuffle_v2i16_v8i16__11_u() { 14199; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_u: 14200; GFX900: ; %bb.0: 14201; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14202; GFX900-NEXT: ;;#ASMSTART 14203; GFX900-NEXT: ; def s[4:7] 14204; GFX900-NEXT: ;;#ASMEND 14205; GFX900-NEXT: s_lshr_b32 s8, s5, 16 14206; GFX900-NEXT: ;;#ASMSTART 14207; GFX900-NEXT: ; use s8 14208; GFX900-NEXT: ;;#ASMEND 14209; GFX900-NEXT: s_setpc_b64 s[30:31] 14210; 14211; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_u: 14212; GFX90A: ; %bb.0: 14213; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14214; GFX90A-NEXT: ;;#ASMSTART 14215; GFX90A-NEXT: ; def s[4:7] 14216; GFX90A-NEXT: ;;#ASMEND 14217; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 14218; GFX90A-NEXT: ;;#ASMSTART 14219; GFX90A-NEXT: ; use s8 14220; GFX90A-NEXT: ;;#ASMEND 14221; GFX90A-NEXT: s_setpc_b64 s[30:31] 14222; 14223; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_u: 14224; GFX940: ; %bb.0: 14225; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14226; GFX940-NEXT: ;;#ASMSTART 14227; GFX940-NEXT: ; def s[0:3] 14228; GFX940-NEXT: ;;#ASMEND 14229; GFX940-NEXT: s_lshr_b32 s8, s1, 16 14230; GFX940-NEXT: ;;#ASMSTART 14231; GFX940-NEXT: ; use s8 14232; GFX940-NEXT: ;;#ASMEND 14233; GFX940-NEXT: s_setpc_b64 s[30:31] 14234 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14235 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14236 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 poison> 14237 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14238 ret void 14239} 14240 14241define void @s_shuffle_v2i16_v8i16__12_u() { 14242; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_u: 14243; GFX900: ; %bb.0: 14244; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14245; GFX900-NEXT: ;;#ASMSTART 14246; GFX900-NEXT: ; def s[4:7] 14247; GFX900-NEXT: ;;#ASMEND 14248; GFX900-NEXT: s_mov_b32 s8, s6 14249; GFX900-NEXT: ;;#ASMSTART 14250; GFX900-NEXT: ; use s8 14251; GFX900-NEXT: ;;#ASMEND 14252; GFX900-NEXT: s_setpc_b64 s[30:31] 14253; 14254; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_u: 14255; GFX90A: ; %bb.0: 14256; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14257; GFX90A-NEXT: ;;#ASMSTART 14258; GFX90A-NEXT: ; def s[4:7] 14259; GFX90A-NEXT: ;;#ASMEND 14260; GFX90A-NEXT: s_mov_b32 s8, s6 14261; GFX90A-NEXT: ;;#ASMSTART 14262; GFX90A-NEXT: ; use s8 14263; GFX90A-NEXT: ;;#ASMEND 14264; GFX90A-NEXT: s_setpc_b64 s[30:31] 14265; 14266; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_u: 14267; GFX940: ; %bb.0: 14268; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14269; GFX940-NEXT: ;;#ASMSTART 14270; GFX940-NEXT: ; def s[0:3] 14271; GFX940-NEXT: ;;#ASMEND 14272; GFX940-NEXT: s_mov_b32 s8, s2 14273; GFX940-NEXT: ;;#ASMSTART 14274; GFX940-NEXT: ; use s8 14275; GFX940-NEXT: ;;#ASMEND 14276; GFX940-NEXT: s_setpc_b64 s[30:31] 14277 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14278 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14279 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 poison> 14280 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14281 ret void 14282} 14283 14284define void @s_shuffle_v2i16_v8i16__13_u() { 14285; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_u: 14286; GFX900: ; %bb.0: 14287; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14288; GFX900-NEXT: ;;#ASMSTART 14289; GFX900-NEXT: ; def s[4:7] 14290; GFX900-NEXT: ;;#ASMEND 14291; GFX900-NEXT: s_lshr_b32 s8, s6, 16 14292; GFX900-NEXT: ;;#ASMSTART 14293; GFX900-NEXT: ; use s8 14294; GFX900-NEXT: ;;#ASMEND 14295; GFX900-NEXT: s_setpc_b64 s[30:31] 14296; 14297; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_u: 14298; GFX90A: ; %bb.0: 14299; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14300; GFX90A-NEXT: ;;#ASMSTART 14301; GFX90A-NEXT: ; def s[4:7] 14302; GFX90A-NEXT: ;;#ASMEND 14303; GFX90A-NEXT: s_lshr_b32 s8, s6, 16 14304; GFX90A-NEXT: ;;#ASMSTART 14305; GFX90A-NEXT: ; use s8 14306; GFX90A-NEXT: ;;#ASMEND 14307; GFX90A-NEXT: s_setpc_b64 s[30:31] 14308; 14309; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_u: 14310; GFX940: ; %bb.0: 14311; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14312; GFX940-NEXT: ;;#ASMSTART 14313; GFX940-NEXT: ; def s[0:3] 14314; GFX940-NEXT: ;;#ASMEND 14315; GFX940-NEXT: s_lshr_b32 s8, s2, 16 14316; GFX940-NEXT: ;;#ASMSTART 14317; GFX940-NEXT: ; use s8 14318; GFX940-NEXT: ;;#ASMEND 14319; GFX940-NEXT: s_setpc_b64 s[30:31] 14320 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14321 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14322 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 poison> 14323 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14324 ret void 14325} 14326 14327define void @s_shuffle_v2i16_v8i16__14_u() { 14328; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_u: 14329; GFX900: ; %bb.0: 14330; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14331; GFX900-NEXT: ;;#ASMSTART 14332; GFX900-NEXT: ; def s[4:7] 14333; GFX900-NEXT: ;;#ASMEND 14334; GFX900-NEXT: s_mov_b32 s8, s7 14335; GFX900-NEXT: ;;#ASMSTART 14336; GFX900-NEXT: ; use s8 14337; GFX900-NEXT: ;;#ASMEND 14338; GFX900-NEXT: s_setpc_b64 s[30:31] 14339; 14340; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_u: 14341; GFX90A: ; %bb.0: 14342; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14343; GFX90A-NEXT: ;;#ASMSTART 14344; GFX90A-NEXT: ; def s[4:7] 14345; GFX90A-NEXT: ;;#ASMEND 14346; GFX90A-NEXT: s_mov_b32 s8, s7 14347; GFX90A-NEXT: ;;#ASMSTART 14348; GFX90A-NEXT: ; use s8 14349; GFX90A-NEXT: ;;#ASMEND 14350; GFX90A-NEXT: s_setpc_b64 s[30:31] 14351; 14352; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_u: 14353; GFX940: ; %bb.0: 14354; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14355; GFX940-NEXT: ;;#ASMSTART 14356; GFX940-NEXT: ; def s[0:3] 14357; GFX940-NEXT: ;;#ASMEND 14358; GFX940-NEXT: s_mov_b32 s8, s3 14359; GFX940-NEXT: ;;#ASMSTART 14360; GFX940-NEXT: ; use s8 14361; GFX940-NEXT: ;;#ASMEND 14362; GFX940-NEXT: s_setpc_b64 s[30:31] 14363 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14364 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14365 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 poison> 14366 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14367 ret void 14368} 14369 14370define void @s_shuffle_v2i16_v8i16__15_u() { 14371; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_u: 14372; GFX900: ; %bb.0: 14373; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14374; GFX900-NEXT: ;;#ASMSTART 14375; GFX900-NEXT: ; def s[4:7] 14376; GFX900-NEXT: ;;#ASMEND 14377; GFX900-NEXT: s_lshr_b32 s8, s7, 16 14378; GFX900-NEXT: ;;#ASMSTART 14379; GFX900-NEXT: ; use s8 14380; GFX900-NEXT: ;;#ASMEND 14381; GFX900-NEXT: s_setpc_b64 s[30:31] 14382; 14383; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_u: 14384; GFX90A: ; %bb.0: 14385; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14386; GFX90A-NEXT: ;;#ASMSTART 14387; GFX90A-NEXT: ; def s[4:7] 14388; GFX90A-NEXT: ;;#ASMEND 14389; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 14390; GFX90A-NEXT: ;;#ASMSTART 14391; GFX90A-NEXT: ; use s8 14392; GFX90A-NEXT: ;;#ASMEND 14393; GFX90A-NEXT: s_setpc_b64 s[30:31] 14394; 14395; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_u: 14396; GFX940: ; %bb.0: 14397; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14398; GFX940-NEXT: ;;#ASMSTART 14399; GFX940-NEXT: ; def s[0:3] 14400; GFX940-NEXT: ;;#ASMEND 14401; GFX940-NEXT: s_lshr_b32 s8, s3, 16 14402; GFX940-NEXT: ;;#ASMSTART 14403; GFX940-NEXT: ; use s8 14404; GFX940-NEXT: ;;#ASMEND 14405; GFX940-NEXT: s_setpc_b64 s[30:31] 14406 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14407 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14408 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 poison> 14409 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14410 ret void 14411} 14412 14413define void @s_shuffle_v2i16_v8i16__15_0() { 14414; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_0: 14415; GFX900: ; %bb.0: 14416; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14417; GFX900-NEXT: ;;#ASMSTART 14418; GFX900-NEXT: ; def s[4:7] 14419; GFX900-NEXT: ;;#ASMEND 14420; GFX900-NEXT: ;;#ASMSTART 14421; GFX900-NEXT: ; def s[8:11] 14422; GFX900-NEXT: ;;#ASMEND 14423; GFX900-NEXT: s_lshr_b32 s5, s11, 16 14424; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14425; GFX900-NEXT: ;;#ASMSTART 14426; GFX900-NEXT: ; use s8 14427; GFX900-NEXT: ;;#ASMEND 14428; GFX900-NEXT: s_setpc_b64 s[30:31] 14429; 14430; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_0: 14431; GFX90A: ; %bb.0: 14432; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14433; GFX90A-NEXT: ;;#ASMSTART 14434; GFX90A-NEXT: ; def s[4:7] 14435; GFX90A-NEXT: ;;#ASMEND 14436; GFX90A-NEXT: ;;#ASMSTART 14437; GFX90A-NEXT: ; def s[8:11] 14438; GFX90A-NEXT: ;;#ASMEND 14439; GFX90A-NEXT: s_lshr_b32 s5, s11, 16 14440; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14441; GFX90A-NEXT: ;;#ASMSTART 14442; GFX90A-NEXT: ; use s8 14443; GFX90A-NEXT: ;;#ASMEND 14444; GFX90A-NEXT: s_setpc_b64 s[30:31] 14445; 14446; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_0: 14447; GFX940: ; %bb.0: 14448; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14449; GFX940-NEXT: ;;#ASMSTART 14450; GFX940-NEXT: ; def s[0:3] 14451; GFX940-NEXT: ;;#ASMEND 14452; GFX940-NEXT: ;;#ASMSTART 14453; GFX940-NEXT: ; def s[4:7] 14454; GFX940-NEXT: ;;#ASMEND 14455; GFX940-NEXT: s_lshr_b32 s1, s7, 16 14456; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 14457; GFX940-NEXT: ;;#ASMSTART 14458; GFX940-NEXT: ; use s8 14459; GFX940-NEXT: ;;#ASMEND 14460; GFX940-NEXT: s_setpc_b64 s[30:31] 14461 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14462 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14463 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 0> 14464 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14465 ret void 14466} 14467 14468define void @s_shuffle_v2i16_v8i16__15_1() { 14469; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_1: 14470; GFX900: ; %bb.0: 14471; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14472; GFX900-NEXT: ;;#ASMSTART 14473; GFX900-NEXT: ; def s[8:11] 14474; GFX900-NEXT: ;;#ASMEND 14475; GFX900-NEXT: ;;#ASMSTART 14476; GFX900-NEXT: ; def s[4:7] 14477; GFX900-NEXT: ;;#ASMEND 14478; GFX900-NEXT: s_pack_hh_b32_b16 s8, s11, s4 14479; GFX900-NEXT: ;;#ASMSTART 14480; GFX900-NEXT: ; use s8 14481; GFX900-NEXT: ;;#ASMEND 14482; GFX900-NEXT: s_setpc_b64 s[30:31] 14483; 14484; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_1: 14485; GFX90A: ; %bb.0: 14486; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14487; GFX90A-NEXT: ;;#ASMSTART 14488; GFX90A-NEXT: ; def s[8:11] 14489; GFX90A-NEXT: ;;#ASMEND 14490; GFX90A-NEXT: ;;#ASMSTART 14491; GFX90A-NEXT: ; def s[4:7] 14492; GFX90A-NEXT: ;;#ASMEND 14493; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s11, s4 14494; GFX90A-NEXT: ;;#ASMSTART 14495; GFX90A-NEXT: ; use s8 14496; GFX90A-NEXT: ;;#ASMEND 14497; GFX90A-NEXT: s_setpc_b64 s[30:31] 14498; 14499; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_1: 14500; GFX940: ; %bb.0: 14501; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14502; GFX940-NEXT: ;;#ASMSTART 14503; GFX940-NEXT: ; def s[0:3] 14504; GFX940-NEXT: ;;#ASMEND 14505; GFX940-NEXT: ;;#ASMSTART 14506; GFX940-NEXT: ; def s[4:7] 14507; GFX940-NEXT: ;;#ASMEND 14508; GFX940-NEXT: s_pack_hh_b32_b16 s8, s7, s0 14509; GFX940-NEXT: ;;#ASMSTART 14510; GFX940-NEXT: ; use s8 14511; GFX940-NEXT: ;;#ASMEND 14512; GFX940-NEXT: s_setpc_b64 s[30:31] 14513 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14514 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14515 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 1> 14516 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14517 ret void 14518} 14519 14520define void @s_shuffle_v2i16_v8i16__15_2() { 14521; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_2: 14522; GFX900: ; %bb.0: 14523; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14524; GFX900-NEXT: ;;#ASMSTART 14525; GFX900-NEXT: ; def s[4:7] 14526; GFX900-NEXT: ;;#ASMEND 14527; GFX900-NEXT: ;;#ASMSTART 14528; GFX900-NEXT: ; def s[8:11] 14529; GFX900-NEXT: ;;#ASMEND 14530; GFX900-NEXT: s_lshr_b32 s4, s11, 16 14531; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 14532; GFX900-NEXT: ;;#ASMSTART 14533; GFX900-NEXT: ; use s8 14534; GFX900-NEXT: ;;#ASMEND 14535; GFX900-NEXT: s_setpc_b64 s[30:31] 14536; 14537; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_2: 14538; GFX90A: ; %bb.0: 14539; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14540; GFX90A-NEXT: ;;#ASMSTART 14541; GFX90A-NEXT: ; def s[4:7] 14542; GFX90A-NEXT: ;;#ASMEND 14543; GFX90A-NEXT: ;;#ASMSTART 14544; GFX90A-NEXT: ; def s[8:11] 14545; GFX90A-NEXT: ;;#ASMEND 14546; GFX90A-NEXT: s_lshr_b32 s4, s11, 16 14547; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 14548; GFX90A-NEXT: ;;#ASMSTART 14549; GFX90A-NEXT: ; use s8 14550; GFX90A-NEXT: ;;#ASMEND 14551; GFX90A-NEXT: s_setpc_b64 s[30:31] 14552; 14553; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_2: 14554; GFX940: ; %bb.0: 14555; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14556; GFX940-NEXT: ;;#ASMSTART 14557; GFX940-NEXT: ; def s[0:3] 14558; GFX940-NEXT: ;;#ASMEND 14559; GFX940-NEXT: ;;#ASMSTART 14560; GFX940-NEXT: ; def s[4:7] 14561; GFX940-NEXT: ;;#ASMEND 14562; GFX940-NEXT: s_lshr_b32 s0, s7, 16 14563; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 14564; GFX940-NEXT: ;;#ASMSTART 14565; GFX940-NEXT: ; use s8 14566; GFX940-NEXT: ;;#ASMEND 14567; GFX940-NEXT: s_setpc_b64 s[30:31] 14568 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14569 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14570 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 2> 14571 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14572 ret void 14573} 14574 14575define void @s_shuffle_v2i16_v8i16__15_3() { 14576; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_3: 14577; GFX900: ; %bb.0: 14578; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14579; GFX900-NEXT: ;;#ASMSTART 14580; GFX900-NEXT: ; def s[8:11] 14581; GFX900-NEXT: ;;#ASMEND 14582; GFX900-NEXT: ;;#ASMSTART 14583; GFX900-NEXT: ; def s[4:7] 14584; GFX900-NEXT: ;;#ASMEND 14585; GFX900-NEXT: s_pack_hh_b32_b16 s8, s11, s5 14586; GFX900-NEXT: ;;#ASMSTART 14587; GFX900-NEXT: ; use s8 14588; GFX900-NEXT: ;;#ASMEND 14589; GFX900-NEXT: s_setpc_b64 s[30:31] 14590; 14591; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_3: 14592; GFX90A: ; %bb.0: 14593; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14594; GFX90A-NEXT: ;;#ASMSTART 14595; GFX90A-NEXT: ; def s[8:11] 14596; GFX90A-NEXT: ;;#ASMEND 14597; GFX90A-NEXT: ;;#ASMSTART 14598; GFX90A-NEXT: ; def s[4:7] 14599; GFX90A-NEXT: ;;#ASMEND 14600; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s11, s5 14601; GFX90A-NEXT: ;;#ASMSTART 14602; GFX90A-NEXT: ; use s8 14603; GFX90A-NEXT: ;;#ASMEND 14604; GFX90A-NEXT: s_setpc_b64 s[30:31] 14605; 14606; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_3: 14607; GFX940: ; %bb.0: 14608; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14609; GFX940-NEXT: ;;#ASMSTART 14610; GFX940-NEXT: ; def s[0:3] 14611; GFX940-NEXT: ;;#ASMEND 14612; GFX940-NEXT: ;;#ASMSTART 14613; GFX940-NEXT: ; def s[4:7] 14614; GFX940-NEXT: ;;#ASMEND 14615; GFX940-NEXT: s_pack_hh_b32_b16 s8, s7, s1 14616; GFX940-NEXT: ;;#ASMSTART 14617; GFX940-NEXT: ; use s8 14618; GFX940-NEXT: ;;#ASMEND 14619; GFX940-NEXT: s_setpc_b64 s[30:31] 14620 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14621 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14622 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 3> 14623 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14624 ret void 14625} 14626 14627define void @s_shuffle_v2i16_v8i16__15_4() { 14628; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_4: 14629; GFX900: ; %bb.0: 14630; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14631; GFX900-NEXT: ;;#ASMSTART 14632; GFX900-NEXT: ; def s[4:7] 14633; GFX900-NEXT: ;;#ASMEND 14634; GFX900-NEXT: ;;#ASMSTART 14635; GFX900-NEXT: ; def s[8:11] 14636; GFX900-NEXT: ;;#ASMEND 14637; GFX900-NEXT: s_lshr_b32 s4, s11, 16 14638; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 14639; GFX900-NEXT: ;;#ASMSTART 14640; GFX900-NEXT: ; use s8 14641; GFX900-NEXT: ;;#ASMEND 14642; GFX900-NEXT: s_setpc_b64 s[30:31] 14643; 14644; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_4: 14645; GFX90A: ; %bb.0: 14646; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14647; GFX90A-NEXT: ;;#ASMSTART 14648; GFX90A-NEXT: ; def s[4:7] 14649; GFX90A-NEXT: ;;#ASMEND 14650; GFX90A-NEXT: ;;#ASMSTART 14651; GFX90A-NEXT: ; def s[8:11] 14652; GFX90A-NEXT: ;;#ASMEND 14653; GFX90A-NEXT: s_lshr_b32 s4, s11, 16 14654; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 14655; GFX90A-NEXT: ;;#ASMSTART 14656; GFX90A-NEXT: ; use s8 14657; GFX90A-NEXT: ;;#ASMEND 14658; GFX90A-NEXT: s_setpc_b64 s[30:31] 14659; 14660; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_4: 14661; GFX940: ; %bb.0: 14662; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14663; GFX940-NEXT: ;;#ASMSTART 14664; GFX940-NEXT: ; def s[0:3] 14665; GFX940-NEXT: ;;#ASMEND 14666; GFX940-NEXT: ;;#ASMSTART 14667; GFX940-NEXT: ; def s[4:7] 14668; GFX940-NEXT: ;;#ASMEND 14669; GFX940-NEXT: s_lshr_b32 s0, s7, 16 14670; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 14671; GFX940-NEXT: ;;#ASMSTART 14672; GFX940-NEXT: ; use s8 14673; GFX940-NEXT: ;;#ASMEND 14674; GFX940-NEXT: s_setpc_b64 s[30:31] 14675 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14676 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14677 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 4> 14678 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14679 ret void 14680} 14681 14682define void @s_shuffle_v2i16_v8i16__15_5() { 14683; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_5: 14684; GFX900: ; %bb.0: 14685; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14686; GFX900-NEXT: ;;#ASMSTART 14687; GFX900-NEXT: ; def s[8:11] 14688; GFX900-NEXT: ;;#ASMEND 14689; GFX900-NEXT: ;;#ASMSTART 14690; GFX900-NEXT: ; def s[4:7] 14691; GFX900-NEXT: ;;#ASMEND 14692; GFX900-NEXT: s_pack_hh_b32_b16 s8, s11, s6 14693; GFX900-NEXT: ;;#ASMSTART 14694; GFX900-NEXT: ; use s8 14695; GFX900-NEXT: ;;#ASMEND 14696; GFX900-NEXT: s_setpc_b64 s[30:31] 14697; 14698; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_5: 14699; GFX90A: ; %bb.0: 14700; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14701; GFX90A-NEXT: ;;#ASMSTART 14702; GFX90A-NEXT: ; def s[8:11] 14703; GFX90A-NEXT: ;;#ASMEND 14704; GFX90A-NEXT: ;;#ASMSTART 14705; GFX90A-NEXT: ; def s[4:7] 14706; GFX90A-NEXT: ;;#ASMEND 14707; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s11, s6 14708; GFX90A-NEXT: ;;#ASMSTART 14709; GFX90A-NEXT: ; use s8 14710; GFX90A-NEXT: ;;#ASMEND 14711; GFX90A-NEXT: s_setpc_b64 s[30:31] 14712; 14713; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_5: 14714; GFX940: ; %bb.0: 14715; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14716; GFX940-NEXT: ;;#ASMSTART 14717; GFX940-NEXT: ; def s[0:3] 14718; GFX940-NEXT: ;;#ASMEND 14719; GFX940-NEXT: ;;#ASMSTART 14720; GFX940-NEXT: ; def s[4:7] 14721; GFX940-NEXT: ;;#ASMEND 14722; GFX940-NEXT: s_pack_hh_b32_b16 s8, s7, s2 14723; GFX940-NEXT: ;;#ASMSTART 14724; GFX940-NEXT: ; use s8 14725; GFX940-NEXT: ;;#ASMEND 14726; GFX940-NEXT: s_setpc_b64 s[30:31] 14727 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14728 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14729 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 5> 14730 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14731 ret void 14732} 14733 14734define void @s_shuffle_v2i16_v8i16__15_6() { 14735; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_6: 14736; GFX900: ; %bb.0: 14737; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14738; GFX900-NEXT: ;;#ASMSTART 14739; GFX900-NEXT: ; def s[4:7] 14740; GFX900-NEXT: ;;#ASMEND 14741; GFX900-NEXT: ;;#ASMSTART 14742; GFX900-NEXT: ; def s[8:11] 14743; GFX900-NEXT: ;;#ASMEND 14744; GFX900-NEXT: s_lshr_b32 s4, s11, 16 14745; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 14746; GFX900-NEXT: ;;#ASMSTART 14747; GFX900-NEXT: ; use s8 14748; GFX900-NEXT: ;;#ASMEND 14749; GFX900-NEXT: s_setpc_b64 s[30:31] 14750; 14751; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_6: 14752; GFX90A: ; %bb.0: 14753; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14754; GFX90A-NEXT: ;;#ASMSTART 14755; GFX90A-NEXT: ; def s[4:7] 14756; GFX90A-NEXT: ;;#ASMEND 14757; GFX90A-NEXT: ;;#ASMSTART 14758; GFX90A-NEXT: ; def s[8:11] 14759; GFX90A-NEXT: ;;#ASMEND 14760; GFX90A-NEXT: s_lshr_b32 s4, s11, 16 14761; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 14762; GFX90A-NEXT: ;;#ASMSTART 14763; GFX90A-NEXT: ; use s8 14764; GFX90A-NEXT: ;;#ASMEND 14765; GFX90A-NEXT: s_setpc_b64 s[30:31] 14766; 14767; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_6: 14768; GFX940: ; %bb.0: 14769; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14770; GFX940-NEXT: ;;#ASMSTART 14771; GFX940-NEXT: ; def s[0:3] 14772; GFX940-NEXT: ;;#ASMEND 14773; GFX940-NEXT: ;;#ASMSTART 14774; GFX940-NEXT: ; def s[4:7] 14775; GFX940-NEXT: ;;#ASMEND 14776; GFX940-NEXT: s_lshr_b32 s0, s7, 16 14777; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 14778; GFX940-NEXT: ;;#ASMSTART 14779; GFX940-NEXT: ; use s8 14780; GFX940-NEXT: ;;#ASMEND 14781; GFX940-NEXT: s_setpc_b64 s[30:31] 14782 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14783 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14784 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 6> 14785 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14786 ret void 14787} 14788 14789define void @s_shuffle_v2i16_v8i16__15_7() { 14790; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_7: 14791; GFX900: ; %bb.0: 14792; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14793; GFX900-NEXT: ;;#ASMSTART 14794; GFX900-NEXT: ; def s[8:11] 14795; GFX900-NEXT: ;;#ASMEND 14796; GFX900-NEXT: ;;#ASMSTART 14797; GFX900-NEXT: ; def s[4:7] 14798; GFX900-NEXT: ;;#ASMEND 14799; GFX900-NEXT: s_pack_hh_b32_b16 s8, s11, s7 14800; GFX900-NEXT: ;;#ASMSTART 14801; GFX900-NEXT: ; use s8 14802; GFX900-NEXT: ;;#ASMEND 14803; GFX900-NEXT: s_setpc_b64 s[30:31] 14804; 14805; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_7: 14806; GFX90A: ; %bb.0: 14807; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14808; GFX90A-NEXT: ;;#ASMSTART 14809; GFX90A-NEXT: ; def s[8:11] 14810; GFX90A-NEXT: ;;#ASMEND 14811; GFX90A-NEXT: ;;#ASMSTART 14812; GFX90A-NEXT: ; def s[4:7] 14813; GFX90A-NEXT: ;;#ASMEND 14814; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s11, s7 14815; GFX90A-NEXT: ;;#ASMSTART 14816; GFX90A-NEXT: ; use s8 14817; GFX90A-NEXT: ;;#ASMEND 14818; GFX90A-NEXT: s_setpc_b64 s[30:31] 14819; 14820; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_7: 14821; GFX940: ; %bb.0: 14822; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14823; GFX940-NEXT: ;;#ASMSTART 14824; GFX940-NEXT: ; def s[0:3] 14825; GFX940-NEXT: ;;#ASMEND 14826; GFX940-NEXT: ;;#ASMSTART 14827; GFX940-NEXT: ; def s[4:7] 14828; GFX940-NEXT: ;;#ASMEND 14829; GFX940-NEXT: s_pack_hh_b32_b16 s8, s7, s3 14830; GFX940-NEXT: ;;#ASMSTART 14831; GFX940-NEXT: ; use s8 14832; GFX940-NEXT: ;;#ASMEND 14833; GFX940-NEXT: s_setpc_b64 s[30:31] 14834 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14835 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14836 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 7> 14837 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14838 ret void 14839} 14840 14841define void @s_shuffle_v2i16_v8i16__15_8() { 14842; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_8: 14843; GFX900: ; %bb.0: 14844; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14845; GFX900-NEXT: ;;#ASMSTART 14846; GFX900-NEXT: ; def s[4:7] 14847; GFX900-NEXT: ;;#ASMEND 14848; GFX900-NEXT: s_lshr_b32 s5, s7, 16 14849; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14850; GFX900-NEXT: ;;#ASMSTART 14851; GFX900-NEXT: ; use s8 14852; GFX900-NEXT: ;;#ASMEND 14853; GFX900-NEXT: s_setpc_b64 s[30:31] 14854; 14855; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_8: 14856; GFX90A: ; %bb.0: 14857; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14858; GFX90A-NEXT: ;;#ASMSTART 14859; GFX90A-NEXT: ; def s[4:7] 14860; GFX90A-NEXT: ;;#ASMEND 14861; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 14862; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14863; GFX90A-NEXT: ;;#ASMSTART 14864; GFX90A-NEXT: ; use s8 14865; GFX90A-NEXT: ;;#ASMEND 14866; GFX90A-NEXT: s_setpc_b64 s[30:31] 14867; 14868; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_8: 14869; GFX940: ; %bb.0: 14870; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14871; GFX940-NEXT: ;;#ASMSTART 14872; GFX940-NEXT: ; def s[0:3] 14873; GFX940-NEXT: ;;#ASMEND 14874; GFX940-NEXT: s_lshr_b32 s1, s3, 16 14875; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 14876; GFX940-NEXT: ;;#ASMSTART 14877; GFX940-NEXT: ; use s8 14878; GFX940-NEXT: ;;#ASMEND 14879; GFX940-NEXT: s_setpc_b64 s[30:31] 14880 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14881 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14882 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 8> 14883 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14884 ret void 14885} 14886 14887define void @s_shuffle_v2i16_v8i16__15_9() { 14888; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_9: 14889; GFX900: ; %bb.0: 14890; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14891; GFX900-NEXT: ;;#ASMSTART 14892; GFX900-NEXT: ; def s[4:7] 14893; GFX900-NEXT: ;;#ASMEND 14894; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 14895; GFX900-NEXT: ;;#ASMSTART 14896; GFX900-NEXT: ; use s8 14897; GFX900-NEXT: ;;#ASMEND 14898; GFX900-NEXT: s_setpc_b64 s[30:31] 14899; 14900; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_9: 14901; GFX90A: ; %bb.0: 14902; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14903; GFX90A-NEXT: ;;#ASMSTART 14904; GFX90A-NEXT: ; def s[4:7] 14905; GFX90A-NEXT: ;;#ASMEND 14906; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 14907; GFX90A-NEXT: ;;#ASMSTART 14908; GFX90A-NEXT: ; use s8 14909; GFX90A-NEXT: ;;#ASMEND 14910; GFX90A-NEXT: s_setpc_b64 s[30:31] 14911; 14912; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_9: 14913; GFX940: ; %bb.0: 14914; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14915; GFX940-NEXT: ;;#ASMSTART 14916; GFX940-NEXT: ; def s[0:3] 14917; GFX940-NEXT: ;;#ASMEND 14918; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 14919; GFX940-NEXT: ;;#ASMSTART 14920; GFX940-NEXT: ; use s8 14921; GFX940-NEXT: ;;#ASMEND 14922; GFX940-NEXT: s_setpc_b64 s[30:31] 14923 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14924 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14925 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 9> 14926 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14927 ret void 14928} 14929 14930define void @s_shuffle_v2i16_v8i16__15_10() { 14931; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_10: 14932; GFX900: ; %bb.0: 14933; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14934; GFX900-NEXT: ;;#ASMSTART 14935; GFX900-NEXT: ; def s[4:7] 14936; GFX900-NEXT: ;;#ASMEND 14937; GFX900-NEXT: s_lshr_b32 s4, s7, 16 14938; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 14939; GFX900-NEXT: ;;#ASMSTART 14940; GFX900-NEXT: ; use s8 14941; GFX900-NEXT: ;;#ASMEND 14942; GFX900-NEXT: s_setpc_b64 s[30:31] 14943; 14944; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_10: 14945; GFX90A: ; %bb.0: 14946; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14947; GFX90A-NEXT: ;;#ASMSTART 14948; GFX90A-NEXT: ; def s[4:7] 14949; GFX90A-NEXT: ;;#ASMEND 14950; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 14951; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 14952; GFX90A-NEXT: ;;#ASMSTART 14953; GFX90A-NEXT: ; use s8 14954; GFX90A-NEXT: ;;#ASMEND 14955; GFX90A-NEXT: s_setpc_b64 s[30:31] 14956; 14957; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_10: 14958; GFX940: ; %bb.0: 14959; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14960; GFX940-NEXT: ;;#ASMSTART 14961; GFX940-NEXT: ; def s[0:3] 14962; GFX940-NEXT: ;;#ASMEND 14963; GFX940-NEXT: s_lshr_b32 s0, s3, 16 14964; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 14965; GFX940-NEXT: ;;#ASMSTART 14966; GFX940-NEXT: ; use s8 14967; GFX940-NEXT: ;;#ASMEND 14968; GFX940-NEXT: s_setpc_b64 s[30:31] 14969 %vec0 = call <8 x i16> asm "; def $0", "=s"() 14970 %vec1 = call <8 x i16> asm "; def $0", "=s"() 14971 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 10> 14972 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 14973 ret void 14974} 14975 14976define void @s_shuffle_v2i16_v8i16__15_11() { 14977; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_11: 14978; GFX900: ; %bb.0: 14979; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14980; GFX900-NEXT: ;;#ASMSTART 14981; GFX900-NEXT: ; def s[4:7] 14982; GFX900-NEXT: ;;#ASMEND 14983; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 14984; GFX900-NEXT: ;;#ASMSTART 14985; GFX900-NEXT: ; use s8 14986; GFX900-NEXT: ;;#ASMEND 14987; GFX900-NEXT: s_setpc_b64 s[30:31] 14988; 14989; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_11: 14990; GFX90A: ; %bb.0: 14991; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14992; GFX90A-NEXT: ;;#ASMSTART 14993; GFX90A-NEXT: ; def s[4:7] 14994; GFX90A-NEXT: ;;#ASMEND 14995; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 14996; GFX90A-NEXT: ;;#ASMSTART 14997; GFX90A-NEXT: ; use s8 14998; GFX90A-NEXT: ;;#ASMEND 14999; GFX90A-NEXT: s_setpc_b64 s[30:31] 15000; 15001; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_11: 15002; GFX940: ; %bb.0: 15003; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15004; GFX940-NEXT: ;;#ASMSTART 15005; GFX940-NEXT: ; def s[0:3] 15006; GFX940-NEXT: ;;#ASMEND 15007; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 15008; GFX940-NEXT: ;;#ASMSTART 15009; GFX940-NEXT: ; use s8 15010; GFX940-NEXT: ;;#ASMEND 15011; GFX940-NEXT: s_setpc_b64 s[30:31] 15012 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15013 %vec1 = call <8 x i16> asm "; def $0", "=s"() 15014 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 11> 15015 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15016 ret void 15017} 15018 15019define void @s_shuffle_v2i16_v8i16__15_12() { 15020; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_12: 15021; GFX900: ; %bb.0: 15022; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15023; GFX900-NEXT: ;;#ASMSTART 15024; GFX900-NEXT: ; def s[4:7] 15025; GFX900-NEXT: ;;#ASMEND 15026; GFX900-NEXT: s_lshr_b32 s4, s7, 16 15027; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 15028; GFX900-NEXT: ;;#ASMSTART 15029; GFX900-NEXT: ; use s8 15030; GFX900-NEXT: ;;#ASMEND 15031; GFX900-NEXT: s_setpc_b64 s[30:31] 15032; 15033; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_12: 15034; GFX90A: ; %bb.0: 15035; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15036; GFX90A-NEXT: ;;#ASMSTART 15037; GFX90A-NEXT: ; def s[4:7] 15038; GFX90A-NEXT: ;;#ASMEND 15039; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 15040; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 15041; GFX90A-NEXT: ;;#ASMSTART 15042; GFX90A-NEXT: ; use s8 15043; GFX90A-NEXT: ;;#ASMEND 15044; GFX90A-NEXT: s_setpc_b64 s[30:31] 15045; 15046; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_12: 15047; GFX940: ; %bb.0: 15048; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15049; GFX940-NEXT: ;;#ASMSTART 15050; GFX940-NEXT: ; def s[0:3] 15051; GFX940-NEXT: ;;#ASMEND 15052; GFX940-NEXT: s_lshr_b32 s0, s3, 16 15053; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 15054; GFX940-NEXT: ;;#ASMSTART 15055; GFX940-NEXT: ; use s8 15056; GFX940-NEXT: ;;#ASMEND 15057; GFX940-NEXT: s_setpc_b64 s[30:31] 15058 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15059 %vec1 = call <8 x i16> asm "; def $0", "=s"() 15060 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 12> 15061 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15062 ret void 15063} 15064 15065define void @s_shuffle_v2i16_v8i16__15_13() { 15066; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_13: 15067; GFX900: ; %bb.0: 15068; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15069; GFX900-NEXT: ;;#ASMSTART 15070; GFX900-NEXT: ; def s[4:7] 15071; GFX900-NEXT: ;;#ASMEND 15072; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s6 15073; GFX900-NEXT: ;;#ASMSTART 15074; GFX900-NEXT: ; use s8 15075; GFX900-NEXT: ;;#ASMEND 15076; GFX900-NEXT: s_setpc_b64 s[30:31] 15077; 15078; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_13: 15079; GFX90A: ; %bb.0: 15080; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15081; GFX90A-NEXT: ;;#ASMSTART 15082; GFX90A-NEXT: ; def s[4:7] 15083; GFX90A-NEXT: ;;#ASMEND 15084; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s6 15085; GFX90A-NEXT: ;;#ASMSTART 15086; GFX90A-NEXT: ; use s8 15087; GFX90A-NEXT: ;;#ASMEND 15088; GFX90A-NEXT: s_setpc_b64 s[30:31] 15089; 15090; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_13: 15091; GFX940: ; %bb.0: 15092; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15093; GFX940-NEXT: ;;#ASMSTART 15094; GFX940-NEXT: ; def s[0:3] 15095; GFX940-NEXT: ;;#ASMEND 15096; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 15097; GFX940-NEXT: ;;#ASMSTART 15098; GFX940-NEXT: ; use s8 15099; GFX940-NEXT: ;;#ASMEND 15100; GFX940-NEXT: s_setpc_b64 s[30:31] 15101 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15102 %vec1 = call <8 x i16> asm "; def $0", "=s"() 15103 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 13> 15104 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15105 ret void 15106} 15107 15108define void @s_shuffle_v2i16_v8i16__15_14() { 15109; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_14: 15110; GFX900: ; %bb.0: 15111; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15112; GFX900-NEXT: ;;#ASMSTART 15113; GFX900-NEXT: ; def s[4:7] 15114; GFX900-NEXT: ;;#ASMEND 15115; GFX900-NEXT: s_lshr_b32 s4, s7, 16 15116; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 15117; GFX900-NEXT: ;;#ASMSTART 15118; GFX900-NEXT: ; use s8 15119; GFX900-NEXT: ;;#ASMEND 15120; GFX900-NEXT: s_setpc_b64 s[30:31] 15121; 15122; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_14: 15123; GFX90A: ; %bb.0: 15124; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15125; GFX90A-NEXT: ;;#ASMSTART 15126; GFX90A-NEXT: ; def s[4:7] 15127; GFX90A-NEXT: ;;#ASMEND 15128; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 15129; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 15130; GFX90A-NEXT: ;;#ASMSTART 15131; GFX90A-NEXT: ; use s8 15132; GFX90A-NEXT: ;;#ASMEND 15133; GFX90A-NEXT: s_setpc_b64 s[30:31] 15134; 15135; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_14: 15136; GFX940: ; %bb.0: 15137; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15138; GFX940-NEXT: ;;#ASMSTART 15139; GFX940-NEXT: ; def s[0:3] 15140; GFX940-NEXT: ;;#ASMEND 15141; GFX940-NEXT: s_lshr_b32 s0, s3, 16 15142; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 15143; GFX940-NEXT: ;;#ASMSTART 15144; GFX940-NEXT: ; use s8 15145; GFX940-NEXT: ;;#ASMEND 15146; GFX940-NEXT: s_setpc_b64 s[30:31] 15147 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15148 %vec1 = call <8 x i16> asm "; def $0", "=s"() 15149 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 14> 15150 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15151 ret void 15152} 15153 15154define void @s_shuffle_v2i16_v8i16__15_15() { 15155; GFX900-LABEL: s_shuffle_v2i16_v8i16__15_15: 15156; GFX900: ; %bb.0: 15157; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15158; GFX900-NEXT: ;;#ASMSTART 15159; GFX900-NEXT: ; def s[4:7] 15160; GFX900-NEXT: ;;#ASMEND 15161; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15162; GFX900-NEXT: ;;#ASMSTART 15163; GFX900-NEXT: ; use s8 15164; GFX900-NEXT: ;;#ASMEND 15165; GFX900-NEXT: s_setpc_b64 s[30:31] 15166; 15167; GFX90A-LABEL: s_shuffle_v2i16_v8i16__15_15: 15168; GFX90A: ; %bb.0: 15169; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15170; GFX90A-NEXT: ;;#ASMSTART 15171; GFX90A-NEXT: ; def s[4:7] 15172; GFX90A-NEXT: ;;#ASMEND 15173; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 15174; GFX90A-NEXT: ;;#ASMSTART 15175; GFX90A-NEXT: ; use s8 15176; GFX90A-NEXT: ;;#ASMEND 15177; GFX90A-NEXT: s_setpc_b64 s[30:31] 15178; 15179; GFX940-LABEL: s_shuffle_v2i16_v8i16__15_15: 15180; GFX940: ; %bb.0: 15181; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15182; GFX940-NEXT: ;;#ASMSTART 15183; GFX940-NEXT: ; def s[0:3] 15184; GFX940-NEXT: ;;#ASMEND 15185; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 15186; GFX940-NEXT: ;;#ASMSTART 15187; GFX940-NEXT: ; use s8 15188; GFX940-NEXT: ;;#ASMEND 15189; GFX940-NEXT: s_setpc_b64 s[30:31] 15190 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15191 %vec1 = call <8 x i16> asm "; def $0", "=s"() 15192 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 15, i32 15> 15193 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15194 ret void 15195} 15196 15197define void @s_shuffle_v2i16_v8i16__u_0() { 15198; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_0: 15199; GFX900: ; %bb.0: 15200; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15201; GFX900-NEXT: ;;#ASMSTART 15202; GFX900-NEXT: ; def s[4:7] 15203; GFX900-NEXT: ;;#ASMEND 15204; GFX900-NEXT: s_lshl_b32 s8, s4, 16 15205; GFX900-NEXT: ;;#ASMSTART 15206; GFX900-NEXT: ; use s8 15207; GFX900-NEXT: ;;#ASMEND 15208; GFX900-NEXT: s_setpc_b64 s[30:31] 15209; 15210; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_0: 15211; GFX90A: ; %bb.0: 15212; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15213; GFX90A-NEXT: ;;#ASMSTART 15214; GFX90A-NEXT: ; def s[4:7] 15215; GFX90A-NEXT: ;;#ASMEND 15216; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 15217; GFX90A-NEXT: ;;#ASMSTART 15218; GFX90A-NEXT: ; use s8 15219; GFX90A-NEXT: ;;#ASMEND 15220; GFX90A-NEXT: s_setpc_b64 s[30:31] 15221; 15222; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_0: 15223; GFX940: ; %bb.0: 15224; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15225; GFX940-NEXT: ;;#ASMSTART 15226; GFX940-NEXT: ; def s[0:3] 15227; GFX940-NEXT: ;;#ASMEND 15228; GFX940-NEXT: s_lshl_b32 s8, s0, 16 15229; GFX940-NEXT: ;;#ASMSTART 15230; GFX940-NEXT: ; use s8 15231; GFX940-NEXT: ;;#ASMEND 15232; GFX940-NEXT: s_setpc_b64 s[30:31] 15233 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15234 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 0> 15235 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15236 ret void 15237} 15238 15239define void @s_shuffle_v2i16_v8i16__0_0() { 15240; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_0: 15241; GFX900: ; %bb.0: 15242; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15243; GFX900-NEXT: ;;#ASMSTART 15244; GFX900-NEXT: ; def s[4:7] 15245; GFX900-NEXT: ;;#ASMEND 15246; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 15247; GFX900-NEXT: ;;#ASMSTART 15248; GFX900-NEXT: ; use s8 15249; GFX900-NEXT: ;;#ASMEND 15250; GFX900-NEXT: s_setpc_b64 s[30:31] 15251; 15252; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_0: 15253; GFX90A: ; %bb.0: 15254; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15255; GFX90A-NEXT: ;;#ASMSTART 15256; GFX90A-NEXT: ; def s[4:7] 15257; GFX90A-NEXT: ;;#ASMEND 15258; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 15259; GFX90A-NEXT: ;;#ASMSTART 15260; GFX90A-NEXT: ; use s8 15261; GFX90A-NEXT: ;;#ASMEND 15262; GFX90A-NEXT: s_setpc_b64 s[30:31] 15263; 15264; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_0: 15265; GFX940: ; %bb.0: 15266; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15267; GFX940-NEXT: ;;#ASMSTART 15268; GFX940-NEXT: ; def s[0:3] 15269; GFX940-NEXT: ;;#ASMEND 15270; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 15271; GFX940-NEXT: ;;#ASMSTART 15272; GFX940-NEXT: ; use s8 15273; GFX940-NEXT: ;;#ASMEND 15274; GFX940-NEXT: s_setpc_b64 s[30:31] 15275 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15276 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> zeroinitializer 15277 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15278 ret void 15279} 15280 15281define void @s_shuffle_v2i16_v8i16__1_0() { 15282; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_0: 15283; GFX900: ; %bb.0: 15284; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15285; GFX900-NEXT: ;;#ASMSTART 15286; GFX900-NEXT: ; def s[4:7] 15287; GFX900-NEXT: ;;#ASMEND 15288; GFX900-NEXT: s_lshr_b32 s5, s4, 16 15289; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15290; GFX900-NEXT: ;;#ASMSTART 15291; GFX900-NEXT: ; use s8 15292; GFX900-NEXT: ;;#ASMEND 15293; GFX900-NEXT: s_setpc_b64 s[30:31] 15294; 15295; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_0: 15296; GFX90A: ; %bb.0: 15297; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15298; GFX90A-NEXT: ;;#ASMSTART 15299; GFX90A-NEXT: ; def s[4:7] 15300; GFX90A-NEXT: ;;#ASMEND 15301; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 15302; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15303; GFX90A-NEXT: ;;#ASMSTART 15304; GFX90A-NEXT: ; use s8 15305; GFX90A-NEXT: ;;#ASMEND 15306; GFX90A-NEXT: s_setpc_b64 s[30:31] 15307; 15308; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_0: 15309; GFX940: ; %bb.0: 15310; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15311; GFX940-NEXT: ;;#ASMSTART 15312; GFX940-NEXT: ; def s[0:3] 15313; GFX940-NEXT: ;;#ASMEND 15314; GFX940-NEXT: s_lshr_b32 s1, s0, 16 15315; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 15316; GFX940-NEXT: ;;#ASMSTART 15317; GFX940-NEXT: ; use s8 15318; GFX940-NEXT: ;;#ASMEND 15319; GFX940-NEXT: s_setpc_b64 s[30:31] 15320 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15321 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 0> 15322 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15323 ret void 15324} 15325 15326define void @s_shuffle_v2i16_v8i16__2_0() { 15327; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_0: 15328; GFX900: ; %bb.0: 15329; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15330; GFX900-NEXT: ;;#ASMSTART 15331; GFX900-NEXT: ; def s[4:7] 15332; GFX900-NEXT: ;;#ASMEND 15333; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15334; GFX900-NEXT: ;;#ASMSTART 15335; GFX900-NEXT: ; use s8 15336; GFX900-NEXT: ;;#ASMEND 15337; GFX900-NEXT: s_setpc_b64 s[30:31] 15338; 15339; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_0: 15340; GFX90A: ; %bb.0: 15341; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15342; GFX90A-NEXT: ;;#ASMSTART 15343; GFX90A-NEXT: ; def s[4:7] 15344; GFX90A-NEXT: ;;#ASMEND 15345; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15346; GFX90A-NEXT: ;;#ASMSTART 15347; GFX90A-NEXT: ; use s8 15348; GFX90A-NEXT: ;;#ASMEND 15349; GFX90A-NEXT: s_setpc_b64 s[30:31] 15350; 15351; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_0: 15352; GFX940: ; %bb.0: 15353; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15354; GFX940-NEXT: ;;#ASMSTART 15355; GFX940-NEXT: ; def s[0:3] 15356; GFX940-NEXT: ;;#ASMEND 15357; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 15358; GFX940-NEXT: ;;#ASMSTART 15359; GFX940-NEXT: ; use s8 15360; GFX940-NEXT: ;;#ASMEND 15361; GFX940-NEXT: s_setpc_b64 s[30:31] 15362 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15363 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 0> 15364 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15365 ret void 15366} 15367 15368define void @s_shuffle_v2i16_v8i16__3_0() { 15369; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_0: 15370; GFX900: ; %bb.0: 15371; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15372; GFX900-NEXT: ;;#ASMSTART 15373; GFX900-NEXT: ; def s[4:7] 15374; GFX900-NEXT: ;;#ASMEND 15375; GFX900-NEXT: s_lshr_b32 s5, s5, 16 15376; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15377; GFX900-NEXT: ;;#ASMSTART 15378; GFX900-NEXT: ; use s8 15379; GFX900-NEXT: ;;#ASMEND 15380; GFX900-NEXT: s_setpc_b64 s[30:31] 15381; 15382; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_0: 15383; GFX90A: ; %bb.0: 15384; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15385; GFX90A-NEXT: ;;#ASMSTART 15386; GFX90A-NEXT: ; def s[4:7] 15387; GFX90A-NEXT: ;;#ASMEND 15388; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 15389; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15390; GFX90A-NEXT: ;;#ASMSTART 15391; GFX90A-NEXT: ; use s8 15392; GFX90A-NEXT: ;;#ASMEND 15393; GFX90A-NEXT: s_setpc_b64 s[30:31] 15394; 15395; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_0: 15396; GFX940: ; %bb.0: 15397; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15398; GFX940-NEXT: ;;#ASMSTART 15399; GFX940-NEXT: ; def s[0:3] 15400; GFX940-NEXT: ;;#ASMEND 15401; GFX940-NEXT: s_lshr_b32 s1, s1, 16 15402; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 15403; GFX940-NEXT: ;;#ASMSTART 15404; GFX940-NEXT: ; use s8 15405; GFX940-NEXT: ;;#ASMEND 15406; GFX940-NEXT: s_setpc_b64 s[30:31] 15407 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15408 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 0> 15409 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15410 ret void 15411} 15412 15413define void @s_shuffle_v2i16_v8i16__4_0() { 15414; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_0: 15415; GFX900: ; %bb.0: 15416; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15417; GFX900-NEXT: ;;#ASMSTART 15418; GFX900-NEXT: ; def s[4:7] 15419; GFX900-NEXT: ;;#ASMEND 15420; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4 15421; GFX900-NEXT: ;;#ASMSTART 15422; GFX900-NEXT: ; use s8 15423; GFX900-NEXT: ;;#ASMEND 15424; GFX900-NEXT: s_setpc_b64 s[30:31] 15425; 15426; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_0: 15427; GFX90A: ; %bb.0: 15428; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15429; GFX90A-NEXT: ;;#ASMSTART 15430; GFX90A-NEXT: ; def s[4:7] 15431; GFX90A-NEXT: ;;#ASMEND 15432; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4 15433; GFX90A-NEXT: ;;#ASMSTART 15434; GFX90A-NEXT: ; use s8 15435; GFX90A-NEXT: ;;#ASMEND 15436; GFX90A-NEXT: s_setpc_b64 s[30:31] 15437; 15438; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_0: 15439; GFX940: ; %bb.0: 15440; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15441; GFX940-NEXT: ;;#ASMSTART 15442; GFX940-NEXT: ; def s[0:3] 15443; GFX940-NEXT: ;;#ASMEND 15444; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 15445; GFX940-NEXT: ;;#ASMSTART 15446; GFX940-NEXT: ; use s8 15447; GFX940-NEXT: ;;#ASMEND 15448; GFX940-NEXT: s_setpc_b64 s[30:31] 15449 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15450 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 0> 15451 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15452 ret void 15453} 15454 15455define void @s_shuffle_v2i16_v8i16__5_0() { 15456; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_0: 15457; GFX900: ; %bb.0: 15458; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15459; GFX900-NEXT: ;;#ASMSTART 15460; GFX900-NEXT: ; def s[4:7] 15461; GFX900-NEXT: ;;#ASMEND 15462; GFX900-NEXT: s_lshr_b32 s5, s6, 16 15463; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15464; GFX900-NEXT: ;;#ASMSTART 15465; GFX900-NEXT: ; use s8 15466; GFX900-NEXT: ;;#ASMEND 15467; GFX900-NEXT: s_setpc_b64 s[30:31] 15468; 15469; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_0: 15470; GFX90A: ; %bb.0: 15471; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15472; GFX90A-NEXT: ;;#ASMSTART 15473; GFX90A-NEXT: ; def s[4:7] 15474; GFX90A-NEXT: ;;#ASMEND 15475; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 15476; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15477; GFX90A-NEXT: ;;#ASMSTART 15478; GFX90A-NEXT: ; use s8 15479; GFX90A-NEXT: ;;#ASMEND 15480; GFX90A-NEXT: s_setpc_b64 s[30:31] 15481; 15482; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_0: 15483; GFX940: ; %bb.0: 15484; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15485; GFX940-NEXT: ;;#ASMSTART 15486; GFX940-NEXT: ; def s[0:3] 15487; GFX940-NEXT: ;;#ASMEND 15488; GFX940-NEXT: s_lshr_b32 s1, s2, 16 15489; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 15490; GFX940-NEXT: ;;#ASMSTART 15491; GFX940-NEXT: ; use s8 15492; GFX940-NEXT: ;;#ASMEND 15493; GFX940-NEXT: s_setpc_b64 s[30:31] 15494 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15495 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 0> 15496 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15497 ret void 15498} 15499 15500define void @s_shuffle_v2i16_v8i16__6_0() { 15501; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_0: 15502; GFX900: ; %bb.0: 15503; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15504; GFX900-NEXT: ;;#ASMSTART 15505; GFX900-NEXT: ; def s[4:7] 15506; GFX900-NEXT: ;;#ASMEND 15507; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 15508; GFX900-NEXT: ;;#ASMSTART 15509; GFX900-NEXT: ; use s8 15510; GFX900-NEXT: ;;#ASMEND 15511; GFX900-NEXT: s_setpc_b64 s[30:31] 15512; 15513; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_0: 15514; GFX90A: ; %bb.0: 15515; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15516; GFX90A-NEXT: ;;#ASMSTART 15517; GFX90A-NEXT: ; def s[4:7] 15518; GFX90A-NEXT: ;;#ASMEND 15519; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 15520; GFX90A-NEXT: ;;#ASMSTART 15521; GFX90A-NEXT: ; use s8 15522; GFX90A-NEXT: ;;#ASMEND 15523; GFX90A-NEXT: s_setpc_b64 s[30:31] 15524; 15525; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_0: 15526; GFX940: ; %bb.0: 15527; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15528; GFX940-NEXT: ;;#ASMSTART 15529; GFX940-NEXT: ; def s[0:3] 15530; GFX940-NEXT: ;;#ASMEND 15531; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 15532; GFX940-NEXT: ;;#ASMSTART 15533; GFX940-NEXT: ; use s8 15534; GFX940-NEXT: ;;#ASMEND 15535; GFX940-NEXT: s_setpc_b64 s[30:31] 15536 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15537 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 0> 15538 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15539 ret void 15540} 15541 15542define void @s_shuffle_v2i16_v8i16__7_0() { 15543; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_0: 15544; GFX900: ; %bb.0: 15545; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15546; GFX900-NEXT: ;;#ASMSTART 15547; GFX900-NEXT: ; def s[4:7] 15548; GFX900-NEXT: ;;#ASMEND 15549; GFX900-NEXT: s_lshr_b32 s5, s7, 16 15550; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15551; GFX900-NEXT: ;;#ASMSTART 15552; GFX900-NEXT: ; use s8 15553; GFX900-NEXT: ;;#ASMEND 15554; GFX900-NEXT: s_setpc_b64 s[30:31] 15555; 15556; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_0: 15557; GFX90A: ; %bb.0: 15558; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15559; GFX90A-NEXT: ;;#ASMSTART 15560; GFX90A-NEXT: ; def s[4:7] 15561; GFX90A-NEXT: ;;#ASMEND 15562; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 15563; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15564; GFX90A-NEXT: ;;#ASMSTART 15565; GFX90A-NEXT: ; use s8 15566; GFX90A-NEXT: ;;#ASMEND 15567; GFX90A-NEXT: s_setpc_b64 s[30:31] 15568; 15569; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_0: 15570; GFX940: ; %bb.0: 15571; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15572; GFX940-NEXT: ;;#ASMSTART 15573; GFX940-NEXT: ; def s[0:3] 15574; GFX940-NEXT: ;;#ASMEND 15575; GFX940-NEXT: s_lshr_b32 s1, s3, 16 15576; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 15577; GFX940-NEXT: ;;#ASMSTART 15578; GFX940-NEXT: ; use s8 15579; GFX940-NEXT: ;;#ASMEND 15580; GFX940-NEXT: s_setpc_b64 s[30:31] 15581 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15582 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 0> 15583 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15584 ret void 15585} 15586 15587define void @s_shuffle_v2i16_v8i16__8_0() { 15588; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_0: 15589; GFX900: ; %bb.0: 15590; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15591; GFX900-NEXT: ;;#ASMSTART 15592; GFX900-NEXT: ; def s[4:7] 15593; GFX900-NEXT: ;;#ASMEND 15594; GFX900-NEXT: s_lshl_b32 s8, s4, 16 15595; GFX900-NEXT: ;;#ASMSTART 15596; GFX900-NEXT: ; use s8 15597; GFX900-NEXT: ;;#ASMEND 15598; GFX900-NEXT: s_setpc_b64 s[30:31] 15599; 15600; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_0: 15601; GFX90A: ; %bb.0: 15602; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15603; GFX90A-NEXT: ;;#ASMSTART 15604; GFX90A-NEXT: ; def s[4:7] 15605; GFX90A-NEXT: ;;#ASMEND 15606; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 15607; GFX90A-NEXT: ;;#ASMSTART 15608; GFX90A-NEXT: ; use s8 15609; GFX90A-NEXT: ;;#ASMEND 15610; GFX90A-NEXT: s_setpc_b64 s[30:31] 15611; 15612; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_0: 15613; GFX940: ; %bb.0: 15614; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15615; GFX940-NEXT: ;;#ASMSTART 15616; GFX940-NEXT: ; def s[0:3] 15617; GFX940-NEXT: ;;#ASMEND 15618; GFX940-NEXT: s_lshl_b32 s8, s0, 16 15619; GFX940-NEXT: ;;#ASMSTART 15620; GFX940-NEXT: ; use s8 15621; GFX940-NEXT: ;;#ASMEND 15622; GFX940-NEXT: s_setpc_b64 s[30:31] 15623 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15624 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 0> 15625 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15626 ret void 15627} 15628 15629define void @s_shuffle_v2i16_v8i16__9_0() { 15630; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_0: 15631; GFX900: ; %bb.0: 15632; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15633; GFX900-NEXT: ;;#ASMSTART 15634; GFX900-NEXT: ; def s[4:7] 15635; GFX900-NEXT: ;;#ASMEND 15636; GFX900-NEXT: ;;#ASMSTART 15637; GFX900-NEXT: ; def s[8:11] 15638; GFX900-NEXT: ;;#ASMEND 15639; GFX900-NEXT: s_lshr_b32 s5, s8, 16 15640; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15641; GFX900-NEXT: ;;#ASMSTART 15642; GFX900-NEXT: ; use s8 15643; GFX900-NEXT: ;;#ASMEND 15644; GFX900-NEXT: s_setpc_b64 s[30:31] 15645; 15646; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_0: 15647; GFX90A: ; %bb.0: 15648; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15649; GFX90A-NEXT: ;;#ASMSTART 15650; GFX90A-NEXT: ; def s[4:7] 15651; GFX90A-NEXT: ;;#ASMEND 15652; GFX90A-NEXT: ;;#ASMSTART 15653; GFX90A-NEXT: ; def s[8:11] 15654; GFX90A-NEXT: ;;#ASMEND 15655; GFX90A-NEXT: s_lshr_b32 s5, s8, 16 15656; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15657; GFX90A-NEXT: ;;#ASMSTART 15658; GFX90A-NEXT: ; use s8 15659; GFX90A-NEXT: ;;#ASMEND 15660; GFX90A-NEXT: s_setpc_b64 s[30:31] 15661; 15662; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_0: 15663; GFX940: ; %bb.0: 15664; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15665; GFX940-NEXT: ;;#ASMSTART 15666; GFX940-NEXT: ; def s[0:3] 15667; GFX940-NEXT: ;;#ASMEND 15668; GFX940-NEXT: ;;#ASMSTART 15669; GFX940-NEXT: ; def s[4:7] 15670; GFX940-NEXT: ;;#ASMEND 15671; GFX940-NEXT: s_lshr_b32 s1, s4, 16 15672; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 15673; GFX940-NEXT: ;;#ASMSTART 15674; GFX940-NEXT: ; use s8 15675; GFX940-NEXT: ;;#ASMEND 15676; GFX940-NEXT: s_setpc_b64 s[30:31] 15677 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15678 %vec1 = call <8 x i16> asm "; def $0", "=s"() 15679 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 0> 15680 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15681 ret void 15682} 15683 15684define void @s_shuffle_v2i16_v8i16__10_0() { 15685; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_0: 15686; GFX900: ; %bb.0: 15687; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15688; GFX900-NEXT: ;;#ASMSTART 15689; GFX900-NEXT: ; def s[8:11] 15690; GFX900-NEXT: ;;#ASMEND 15691; GFX900-NEXT: ;;#ASMSTART 15692; GFX900-NEXT: ; def s[4:7] 15693; GFX900-NEXT: ;;#ASMEND 15694; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15695; GFX900-NEXT: ;;#ASMSTART 15696; GFX900-NEXT: ; use s8 15697; GFX900-NEXT: ;;#ASMEND 15698; GFX900-NEXT: s_setpc_b64 s[30:31] 15699; 15700; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_0: 15701; GFX90A: ; %bb.0: 15702; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15703; GFX90A-NEXT: ;;#ASMSTART 15704; GFX90A-NEXT: ; def s[8:11] 15705; GFX90A-NEXT: ;;#ASMEND 15706; GFX90A-NEXT: ;;#ASMSTART 15707; GFX90A-NEXT: ; def s[4:7] 15708; GFX90A-NEXT: ;;#ASMEND 15709; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15710; GFX90A-NEXT: ;;#ASMSTART 15711; GFX90A-NEXT: ; use s8 15712; GFX90A-NEXT: ;;#ASMEND 15713; GFX90A-NEXT: s_setpc_b64 s[30:31] 15714; 15715; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_0: 15716; GFX940: ; %bb.0: 15717; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15718; GFX940-NEXT: ;;#ASMSTART 15719; GFX940-NEXT: ; def s[0:3] 15720; GFX940-NEXT: ;;#ASMEND 15721; GFX940-NEXT: ;;#ASMSTART 15722; GFX940-NEXT: ; def s[4:7] 15723; GFX940-NEXT: ;;#ASMEND 15724; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s0 15725; GFX940-NEXT: ;;#ASMSTART 15726; GFX940-NEXT: ; use s8 15727; GFX940-NEXT: ;;#ASMEND 15728; GFX940-NEXT: s_setpc_b64 s[30:31] 15729 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15730 %vec1 = call <8 x i16> asm "; def $0", "=s"() 15731 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 0> 15732 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15733 ret void 15734} 15735 15736define void @s_shuffle_v2i16_v8i16__11_0() { 15737; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_0: 15738; GFX900: ; %bb.0: 15739; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15740; GFX900-NEXT: ;;#ASMSTART 15741; GFX900-NEXT: ; def s[4:7] 15742; GFX900-NEXT: ;;#ASMEND 15743; GFX900-NEXT: ;;#ASMSTART 15744; GFX900-NEXT: ; def s[8:11] 15745; GFX900-NEXT: ;;#ASMEND 15746; GFX900-NEXT: s_lshr_b32 s5, s9, 16 15747; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15748; GFX900-NEXT: ;;#ASMSTART 15749; GFX900-NEXT: ; use s8 15750; GFX900-NEXT: ;;#ASMEND 15751; GFX900-NEXT: s_setpc_b64 s[30:31] 15752; 15753; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_0: 15754; GFX90A: ; %bb.0: 15755; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15756; GFX90A-NEXT: ;;#ASMSTART 15757; GFX90A-NEXT: ; def s[4:7] 15758; GFX90A-NEXT: ;;#ASMEND 15759; GFX90A-NEXT: ;;#ASMSTART 15760; GFX90A-NEXT: ; def s[8:11] 15761; GFX90A-NEXT: ;;#ASMEND 15762; GFX90A-NEXT: s_lshr_b32 s5, s9, 16 15763; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15764; GFX90A-NEXT: ;;#ASMSTART 15765; GFX90A-NEXT: ; use s8 15766; GFX90A-NEXT: ;;#ASMEND 15767; GFX90A-NEXT: s_setpc_b64 s[30:31] 15768; 15769; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_0: 15770; GFX940: ; %bb.0: 15771; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15772; GFX940-NEXT: ;;#ASMSTART 15773; GFX940-NEXT: ; def s[0:3] 15774; GFX940-NEXT: ;;#ASMEND 15775; GFX940-NEXT: ;;#ASMSTART 15776; GFX940-NEXT: ; def s[4:7] 15777; GFX940-NEXT: ;;#ASMEND 15778; GFX940-NEXT: s_lshr_b32 s1, s5, 16 15779; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 15780; GFX940-NEXT: ;;#ASMSTART 15781; GFX940-NEXT: ; use s8 15782; GFX940-NEXT: ;;#ASMEND 15783; GFX940-NEXT: s_setpc_b64 s[30:31] 15784 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15785 %vec1 = call <8 x i16> asm "; def $0", "=s"() 15786 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 0> 15787 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15788 ret void 15789} 15790 15791define void @s_shuffle_v2i16_v8i16__12_0() { 15792; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_0: 15793; GFX900: ; %bb.0: 15794; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15795; GFX900-NEXT: ;;#ASMSTART 15796; GFX900-NEXT: ; def s[8:11] 15797; GFX900-NEXT: ;;#ASMEND 15798; GFX900-NEXT: ;;#ASMSTART 15799; GFX900-NEXT: ; def s[4:7] 15800; GFX900-NEXT: ;;#ASMEND 15801; GFX900-NEXT: s_pack_ll_b32_b16 s8, s10, s4 15802; GFX900-NEXT: ;;#ASMSTART 15803; GFX900-NEXT: ; use s8 15804; GFX900-NEXT: ;;#ASMEND 15805; GFX900-NEXT: s_setpc_b64 s[30:31] 15806; 15807; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_0: 15808; GFX90A: ; %bb.0: 15809; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15810; GFX90A-NEXT: ;;#ASMSTART 15811; GFX90A-NEXT: ; def s[8:11] 15812; GFX90A-NEXT: ;;#ASMEND 15813; GFX90A-NEXT: ;;#ASMSTART 15814; GFX90A-NEXT: ; def s[4:7] 15815; GFX90A-NEXT: ;;#ASMEND 15816; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s10, s4 15817; GFX90A-NEXT: ;;#ASMSTART 15818; GFX90A-NEXT: ; use s8 15819; GFX90A-NEXT: ;;#ASMEND 15820; GFX90A-NEXT: s_setpc_b64 s[30:31] 15821; 15822; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_0: 15823; GFX940: ; %bb.0: 15824; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15825; GFX940-NEXT: ;;#ASMSTART 15826; GFX940-NEXT: ; def s[0:3] 15827; GFX940-NEXT: ;;#ASMEND 15828; GFX940-NEXT: ;;#ASMSTART 15829; GFX940-NEXT: ; def s[4:7] 15830; GFX940-NEXT: ;;#ASMEND 15831; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s0 15832; GFX940-NEXT: ;;#ASMSTART 15833; GFX940-NEXT: ; use s8 15834; GFX940-NEXT: ;;#ASMEND 15835; GFX940-NEXT: s_setpc_b64 s[30:31] 15836 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15837 %vec1 = call <8 x i16> asm "; def $0", "=s"() 15838 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 0> 15839 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15840 ret void 15841} 15842 15843define void @s_shuffle_v2i16_v8i16__13_0() { 15844; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_0: 15845; GFX900: ; %bb.0: 15846; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15847; GFX900-NEXT: ;;#ASMSTART 15848; GFX900-NEXT: ; def s[4:7] 15849; GFX900-NEXT: ;;#ASMEND 15850; GFX900-NEXT: ;;#ASMSTART 15851; GFX900-NEXT: ; def s[8:11] 15852; GFX900-NEXT: ;;#ASMEND 15853; GFX900-NEXT: s_lshr_b32 s5, s10, 16 15854; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15855; GFX900-NEXT: ;;#ASMSTART 15856; GFX900-NEXT: ; use s8 15857; GFX900-NEXT: ;;#ASMEND 15858; GFX900-NEXT: s_setpc_b64 s[30:31] 15859; 15860; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_0: 15861; GFX90A: ; %bb.0: 15862; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15863; GFX90A-NEXT: ;;#ASMSTART 15864; GFX90A-NEXT: ; def s[4:7] 15865; GFX90A-NEXT: ;;#ASMEND 15866; GFX90A-NEXT: ;;#ASMSTART 15867; GFX90A-NEXT: ; def s[8:11] 15868; GFX90A-NEXT: ;;#ASMEND 15869; GFX90A-NEXT: s_lshr_b32 s5, s10, 16 15870; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 15871; GFX90A-NEXT: ;;#ASMSTART 15872; GFX90A-NEXT: ; use s8 15873; GFX90A-NEXT: ;;#ASMEND 15874; GFX90A-NEXT: s_setpc_b64 s[30:31] 15875; 15876; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_0: 15877; GFX940: ; %bb.0: 15878; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15879; GFX940-NEXT: ;;#ASMSTART 15880; GFX940-NEXT: ; def s[0:3] 15881; GFX940-NEXT: ;;#ASMEND 15882; GFX940-NEXT: ;;#ASMSTART 15883; GFX940-NEXT: ; def s[4:7] 15884; GFX940-NEXT: ;;#ASMEND 15885; GFX940-NEXT: s_lshr_b32 s1, s6, 16 15886; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 15887; GFX940-NEXT: ;;#ASMSTART 15888; GFX940-NEXT: ; use s8 15889; GFX940-NEXT: ;;#ASMEND 15890; GFX940-NEXT: s_setpc_b64 s[30:31] 15891 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15892 %vec1 = call <8 x i16> asm "; def $0", "=s"() 15893 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 0> 15894 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15895 ret void 15896} 15897 15898define void @s_shuffle_v2i16_v8i16__14_0() { 15899; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_0: 15900; GFX900: ; %bb.0: 15901; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15902; GFX900-NEXT: ;;#ASMSTART 15903; GFX900-NEXT: ; def s[8:11] 15904; GFX900-NEXT: ;;#ASMEND 15905; GFX900-NEXT: ;;#ASMSTART 15906; GFX900-NEXT: ; def s[4:7] 15907; GFX900-NEXT: ;;#ASMEND 15908; GFX900-NEXT: s_pack_ll_b32_b16 s8, s11, s4 15909; GFX900-NEXT: ;;#ASMSTART 15910; GFX900-NEXT: ; use s8 15911; GFX900-NEXT: ;;#ASMEND 15912; GFX900-NEXT: s_setpc_b64 s[30:31] 15913; 15914; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_0: 15915; GFX90A: ; %bb.0: 15916; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15917; GFX90A-NEXT: ;;#ASMSTART 15918; GFX90A-NEXT: ; def s[8:11] 15919; GFX90A-NEXT: ;;#ASMEND 15920; GFX90A-NEXT: ;;#ASMSTART 15921; GFX90A-NEXT: ; def s[4:7] 15922; GFX90A-NEXT: ;;#ASMEND 15923; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s11, s4 15924; GFX90A-NEXT: ;;#ASMSTART 15925; GFX90A-NEXT: ; use s8 15926; GFX90A-NEXT: ;;#ASMEND 15927; GFX90A-NEXT: s_setpc_b64 s[30:31] 15928; 15929; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_0: 15930; GFX940: ; %bb.0: 15931; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15932; GFX940-NEXT: ;;#ASMSTART 15933; GFX940-NEXT: ; def s[0:3] 15934; GFX940-NEXT: ;;#ASMEND 15935; GFX940-NEXT: ;;#ASMSTART 15936; GFX940-NEXT: ; def s[4:7] 15937; GFX940-NEXT: ;;#ASMEND 15938; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s0 15939; GFX940-NEXT: ;;#ASMSTART 15940; GFX940-NEXT: ; use s8 15941; GFX940-NEXT: ;;#ASMEND 15942; GFX940-NEXT: s_setpc_b64 s[30:31] 15943 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15944 %vec1 = call <8 x i16> asm "; def $0", "=s"() 15945 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 0> 15946 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15947 ret void 15948} 15949 15950define void @s_shuffle_v2i16_v8i16__u_1() { 15951; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_1: 15952; GFX900: ; %bb.0: 15953; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15954; GFX900-NEXT: ;;#ASMSTART 15955; GFX900-NEXT: ; def s[8:11] 15956; GFX900-NEXT: ;;#ASMEND 15957; GFX900-NEXT: ;;#ASMSTART 15958; GFX900-NEXT: ; use s8 15959; GFX900-NEXT: ;;#ASMEND 15960; GFX900-NEXT: s_setpc_b64 s[30:31] 15961; 15962; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_1: 15963; GFX90A: ; %bb.0: 15964; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15965; GFX90A-NEXT: ;;#ASMSTART 15966; GFX90A-NEXT: ; def s[8:11] 15967; GFX90A-NEXT: ;;#ASMEND 15968; GFX90A-NEXT: ;;#ASMSTART 15969; GFX90A-NEXT: ; use s8 15970; GFX90A-NEXT: ;;#ASMEND 15971; GFX90A-NEXT: s_setpc_b64 s[30:31] 15972; 15973; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_1: 15974; GFX940: ; %bb.0: 15975; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15976; GFX940-NEXT: ;;#ASMSTART 15977; GFX940-NEXT: ; def s[8:11] 15978; GFX940-NEXT: ;;#ASMEND 15979; GFX940-NEXT: s_nop 0 15980; GFX940-NEXT: ;;#ASMSTART 15981; GFX940-NEXT: ; use s8 15982; GFX940-NEXT: ;;#ASMEND 15983; GFX940-NEXT: s_setpc_b64 s[30:31] 15984 %vec0 = call <8 x i16> asm "; def $0", "=s"() 15985 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 1> 15986 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 15987 ret void 15988} 15989 15990define void @s_shuffle_v2i16_v8i16__0_1() { 15991; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_1: 15992; GFX900: ; %bb.0: 15993; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15994; GFX900-NEXT: ;;#ASMSTART 15995; GFX900-NEXT: ; def s[8:11] 15996; GFX900-NEXT: ;;#ASMEND 15997; GFX900-NEXT: ;;#ASMSTART 15998; GFX900-NEXT: ; use s8 15999; GFX900-NEXT: ;;#ASMEND 16000; GFX900-NEXT: s_setpc_b64 s[30:31] 16001; 16002; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_1: 16003; GFX90A: ; %bb.0: 16004; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16005; GFX90A-NEXT: ;;#ASMSTART 16006; GFX90A-NEXT: ; def s[8:11] 16007; GFX90A-NEXT: ;;#ASMEND 16008; GFX90A-NEXT: ;;#ASMSTART 16009; GFX90A-NEXT: ; use s8 16010; GFX90A-NEXT: ;;#ASMEND 16011; GFX90A-NEXT: s_setpc_b64 s[30:31] 16012; 16013; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_1: 16014; GFX940: ; %bb.0: 16015; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16016; GFX940-NEXT: ;;#ASMSTART 16017; GFX940-NEXT: ; def s[8:11] 16018; GFX940-NEXT: ;;#ASMEND 16019; GFX940-NEXT: s_nop 0 16020; GFX940-NEXT: ;;#ASMSTART 16021; GFX940-NEXT: ; use s8 16022; GFX940-NEXT: ;;#ASMEND 16023; GFX940-NEXT: s_setpc_b64 s[30:31] 16024 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16025 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 1> 16026 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16027 ret void 16028} 16029 16030define void @s_shuffle_v2i16_v8i16__1_1() { 16031; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_1: 16032; GFX900: ; %bb.0: 16033; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16034; GFX900-NEXT: ;;#ASMSTART 16035; GFX900-NEXT: ; def s[4:7] 16036; GFX900-NEXT: ;;#ASMEND 16037; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4 16038; GFX900-NEXT: ;;#ASMSTART 16039; GFX900-NEXT: ; use s8 16040; GFX900-NEXT: ;;#ASMEND 16041; GFX900-NEXT: s_setpc_b64 s[30:31] 16042; 16043; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_1: 16044; GFX90A: ; %bb.0: 16045; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16046; GFX90A-NEXT: ;;#ASMSTART 16047; GFX90A-NEXT: ; def s[4:7] 16048; GFX90A-NEXT: ;;#ASMEND 16049; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4 16050; GFX90A-NEXT: ;;#ASMSTART 16051; GFX90A-NEXT: ; use s8 16052; GFX90A-NEXT: ;;#ASMEND 16053; GFX90A-NEXT: s_setpc_b64 s[30:31] 16054; 16055; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_1: 16056; GFX940: ; %bb.0: 16057; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16058; GFX940-NEXT: ;;#ASMSTART 16059; GFX940-NEXT: ; def s[0:3] 16060; GFX940-NEXT: ;;#ASMEND 16061; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 16062; GFX940-NEXT: ;;#ASMSTART 16063; GFX940-NEXT: ; use s8 16064; GFX940-NEXT: ;;#ASMEND 16065; GFX940-NEXT: s_setpc_b64 s[30:31] 16066 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16067 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 1> 16068 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16069 ret void 16070} 16071 16072define void @s_shuffle_v2i16_v8i16__2_1() { 16073; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_1: 16074; GFX900: ; %bb.0: 16075; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16076; GFX900-NEXT: ;;#ASMSTART 16077; GFX900-NEXT: ; def s[4:7] 16078; GFX900-NEXT: ;;#ASMEND 16079; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 16080; GFX900-NEXT: ;;#ASMSTART 16081; GFX900-NEXT: ; use s8 16082; GFX900-NEXT: ;;#ASMEND 16083; GFX900-NEXT: s_setpc_b64 s[30:31] 16084; 16085; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_1: 16086; GFX90A: ; %bb.0: 16087; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16088; GFX90A-NEXT: ;;#ASMSTART 16089; GFX90A-NEXT: ; def s[4:7] 16090; GFX90A-NEXT: ;;#ASMEND 16091; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 16092; GFX90A-NEXT: ;;#ASMSTART 16093; GFX90A-NEXT: ; use s8 16094; GFX90A-NEXT: ;;#ASMEND 16095; GFX90A-NEXT: s_setpc_b64 s[30:31] 16096; 16097; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_1: 16098; GFX940: ; %bb.0: 16099; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16100; GFX940-NEXT: ;;#ASMSTART 16101; GFX940-NEXT: ; def s[0:3] 16102; GFX940-NEXT: ;;#ASMEND 16103; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 16104; GFX940-NEXT: ;;#ASMSTART 16105; GFX940-NEXT: ; use s8 16106; GFX940-NEXT: ;;#ASMEND 16107; GFX940-NEXT: s_setpc_b64 s[30:31] 16108 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16109 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 1> 16110 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16111 ret void 16112} 16113 16114define void @s_shuffle_v2i16_v8i16__3_1() { 16115; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_1: 16116; GFX900: ; %bb.0: 16117; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16118; GFX900-NEXT: ;;#ASMSTART 16119; GFX900-NEXT: ; def s[4:7] 16120; GFX900-NEXT: ;;#ASMEND 16121; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 16122; GFX900-NEXT: ;;#ASMSTART 16123; GFX900-NEXT: ; use s8 16124; GFX900-NEXT: ;;#ASMEND 16125; GFX900-NEXT: s_setpc_b64 s[30:31] 16126; 16127; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_1: 16128; GFX90A: ; %bb.0: 16129; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16130; GFX90A-NEXT: ;;#ASMSTART 16131; GFX90A-NEXT: ; def s[4:7] 16132; GFX90A-NEXT: ;;#ASMEND 16133; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 16134; GFX90A-NEXT: ;;#ASMSTART 16135; GFX90A-NEXT: ; use s8 16136; GFX90A-NEXT: ;;#ASMEND 16137; GFX90A-NEXT: s_setpc_b64 s[30:31] 16138; 16139; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_1: 16140; GFX940: ; %bb.0: 16141; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16142; GFX940-NEXT: ;;#ASMSTART 16143; GFX940-NEXT: ; def s[0:3] 16144; GFX940-NEXT: ;;#ASMEND 16145; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 16146; GFX940-NEXT: ;;#ASMSTART 16147; GFX940-NEXT: ; use s8 16148; GFX940-NEXT: ;;#ASMEND 16149; GFX940-NEXT: s_setpc_b64 s[30:31] 16150 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16151 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 1> 16152 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16153 ret void 16154} 16155 16156define void @s_shuffle_v2i16_v8i16__4_1() { 16157; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_1: 16158; GFX900: ; %bb.0: 16159; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16160; GFX900-NEXT: ;;#ASMSTART 16161; GFX900-NEXT: ; def s[4:7] 16162; GFX900-NEXT: ;;#ASMEND 16163; GFX900-NEXT: s_pack_lh_b32_b16 s8, s6, s4 16164; GFX900-NEXT: ;;#ASMSTART 16165; GFX900-NEXT: ; use s8 16166; GFX900-NEXT: ;;#ASMEND 16167; GFX900-NEXT: s_setpc_b64 s[30:31] 16168; 16169; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_1: 16170; GFX90A: ; %bb.0: 16171; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16172; GFX90A-NEXT: ;;#ASMSTART 16173; GFX90A-NEXT: ; def s[4:7] 16174; GFX90A-NEXT: ;;#ASMEND 16175; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s6, s4 16176; GFX90A-NEXT: ;;#ASMSTART 16177; GFX90A-NEXT: ; use s8 16178; GFX90A-NEXT: ;;#ASMEND 16179; GFX90A-NEXT: s_setpc_b64 s[30:31] 16180; 16181; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_1: 16182; GFX940: ; %bb.0: 16183; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16184; GFX940-NEXT: ;;#ASMSTART 16185; GFX940-NEXT: ; def s[0:3] 16186; GFX940-NEXT: ;;#ASMEND 16187; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s0 16188; GFX940-NEXT: ;;#ASMSTART 16189; GFX940-NEXT: ; use s8 16190; GFX940-NEXT: ;;#ASMEND 16191; GFX940-NEXT: s_setpc_b64 s[30:31] 16192 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16193 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 1> 16194 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16195 ret void 16196} 16197 16198define void @s_shuffle_v2i16_v8i16__5_1() { 16199; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_1: 16200; GFX900: ; %bb.0: 16201; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16202; GFX900-NEXT: ;;#ASMSTART 16203; GFX900-NEXT: ; def s[4:7] 16204; GFX900-NEXT: ;;#ASMEND 16205; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s4 16206; GFX900-NEXT: ;;#ASMSTART 16207; GFX900-NEXT: ; use s8 16208; GFX900-NEXT: ;;#ASMEND 16209; GFX900-NEXT: s_setpc_b64 s[30:31] 16210; 16211; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_1: 16212; GFX90A: ; %bb.0: 16213; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16214; GFX90A-NEXT: ;;#ASMSTART 16215; GFX90A-NEXT: ; def s[4:7] 16216; GFX90A-NEXT: ;;#ASMEND 16217; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s4 16218; GFX90A-NEXT: ;;#ASMSTART 16219; GFX90A-NEXT: ; use s8 16220; GFX90A-NEXT: ;;#ASMEND 16221; GFX90A-NEXT: s_setpc_b64 s[30:31] 16222; 16223; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_1: 16224; GFX940: ; %bb.0: 16225; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16226; GFX940-NEXT: ;;#ASMSTART 16227; GFX940-NEXT: ; def s[0:3] 16228; GFX940-NEXT: ;;#ASMEND 16229; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 16230; GFX940-NEXT: ;;#ASMSTART 16231; GFX940-NEXT: ; use s8 16232; GFX940-NEXT: ;;#ASMEND 16233; GFX940-NEXT: s_setpc_b64 s[30:31] 16234 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16235 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 1> 16236 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16237 ret void 16238} 16239 16240define void @s_shuffle_v2i16_v8i16__6_1() { 16241; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_1: 16242; GFX900: ; %bb.0: 16243; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16244; GFX900-NEXT: ;;#ASMSTART 16245; GFX900-NEXT: ; def s[4:7] 16246; GFX900-NEXT: ;;#ASMEND 16247; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 16248; GFX900-NEXT: ;;#ASMSTART 16249; GFX900-NEXT: ; use s8 16250; GFX900-NEXT: ;;#ASMEND 16251; GFX900-NEXT: s_setpc_b64 s[30:31] 16252; 16253; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_1: 16254; GFX90A: ; %bb.0: 16255; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16256; GFX90A-NEXT: ;;#ASMSTART 16257; GFX90A-NEXT: ; def s[4:7] 16258; GFX90A-NEXT: ;;#ASMEND 16259; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 16260; GFX90A-NEXT: ;;#ASMSTART 16261; GFX90A-NEXT: ; use s8 16262; GFX90A-NEXT: ;;#ASMEND 16263; GFX90A-NEXT: s_setpc_b64 s[30:31] 16264; 16265; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_1: 16266; GFX940: ; %bb.0: 16267; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16268; GFX940-NEXT: ;;#ASMSTART 16269; GFX940-NEXT: ; def s[0:3] 16270; GFX940-NEXT: ;;#ASMEND 16271; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 16272; GFX940-NEXT: ;;#ASMSTART 16273; GFX940-NEXT: ; use s8 16274; GFX940-NEXT: ;;#ASMEND 16275; GFX940-NEXT: s_setpc_b64 s[30:31] 16276 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16277 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 1> 16278 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16279 ret void 16280} 16281 16282define void @s_shuffle_v2i16_v8i16__7_1() { 16283; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_1: 16284; GFX900: ; %bb.0: 16285; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16286; GFX900-NEXT: ;;#ASMSTART 16287; GFX900-NEXT: ; def s[4:7] 16288; GFX900-NEXT: ;;#ASMEND 16289; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s4 16290; GFX900-NEXT: ;;#ASMSTART 16291; GFX900-NEXT: ; use s8 16292; GFX900-NEXT: ;;#ASMEND 16293; GFX900-NEXT: s_setpc_b64 s[30:31] 16294; 16295; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_1: 16296; GFX90A: ; %bb.0: 16297; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16298; GFX90A-NEXT: ;;#ASMSTART 16299; GFX90A-NEXT: ; def s[4:7] 16300; GFX90A-NEXT: ;;#ASMEND 16301; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s4 16302; GFX90A-NEXT: ;;#ASMSTART 16303; GFX90A-NEXT: ; use s8 16304; GFX90A-NEXT: ;;#ASMEND 16305; GFX90A-NEXT: s_setpc_b64 s[30:31] 16306; 16307; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_1: 16308; GFX940: ; %bb.0: 16309; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16310; GFX940-NEXT: ;;#ASMSTART 16311; GFX940-NEXT: ; def s[0:3] 16312; GFX940-NEXT: ;;#ASMEND 16313; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s0 16314; GFX940-NEXT: ;;#ASMSTART 16315; GFX940-NEXT: ; use s8 16316; GFX940-NEXT: ;;#ASMEND 16317; GFX940-NEXT: s_setpc_b64 s[30:31] 16318 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16319 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 1> 16320 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16321 ret void 16322} 16323 16324define void @s_shuffle_v2i16_v8i16__8_1() { 16325; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_1: 16326; GFX900: ; %bb.0: 16327; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16328; GFX900-NEXT: ;;#ASMSTART 16329; GFX900-NEXT: ; def s[8:11] 16330; GFX900-NEXT: ;;#ASMEND 16331; GFX900-NEXT: ;;#ASMSTART 16332; GFX900-NEXT: ; use s8 16333; GFX900-NEXT: ;;#ASMEND 16334; GFX900-NEXT: s_setpc_b64 s[30:31] 16335; 16336; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_1: 16337; GFX90A: ; %bb.0: 16338; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16339; GFX90A-NEXT: ;;#ASMSTART 16340; GFX90A-NEXT: ; def s[8:11] 16341; GFX90A-NEXT: ;;#ASMEND 16342; GFX90A-NEXT: ;;#ASMSTART 16343; GFX90A-NEXT: ; use s8 16344; GFX90A-NEXT: ;;#ASMEND 16345; GFX90A-NEXT: s_setpc_b64 s[30:31] 16346; 16347; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_1: 16348; GFX940: ; %bb.0: 16349; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16350; GFX940-NEXT: ;;#ASMSTART 16351; GFX940-NEXT: ; def s[8:11] 16352; GFX940-NEXT: ;;#ASMEND 16353; GFX940-NEXT: s_nop 0 16354; GFX940-NEXT: ;;#ASMSTART 16355; GFX940-NEXT: ; use s8 16356; GFX940-NEXT: ;;#ASMEND 16357; GFX940-NEXT: s_setpc_b64 s[30:31] 16358 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16359 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 1> 16360 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16361 ret void 16362} 16363 16364define void @s_shuffle_v2i16_v8i16__9_1() { 16365; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_1: 16366; GFX900: ; %bb.0: 16367; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16368; GFX900-NEXT: ;;#ASMSTART 16369; GFX900-NEXT: ; def s[8:11] 16370; GFX900-NEXT: ;;#ASMEND 16371; GFX900-NEXT: ;;#ASMSTART 16372; GFX900-NEXT: ; def s[4:7] 16373; GFX900-NEXT: ;;#ASMEND 16374; GFX900-NEXT: s_pack_hh_b32_b16 s8, s8, s4 16375; GFX900-NEXT: ;;#ASMSTART 16376; GFX900-NEXT: ; use s8 16377; GFX900-NEXT: ;;#ASMEND 16378; GFX900-NEXT: s_setpc_b64 s[30:31] 16379; 16380; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_1: 16381; GFX90A: ; %bb.0: 16382; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16383; GFX90A-NEXT: ;;#ASMSTART 16384; GFX90A-NEXT: ; def s[8:11] 16385; GFX90A-NEXT: ;;#ASMEND 16386; GFX90A-NEXT: ;;#ASMSTART 16387; GFX90A-NEXT: ; def s[4:7] 16388; GFX90A-NEXT: ;;#ASMEND 16389; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s8, s4 16390; GFX90A-NEXT: ;;#ASMSTART 16391; GFX90A-NEXT: ; use s8 16392; GFX90A-NEXT: ;;#ASMEND 16393; GFX90A-NEXT: s_setpc_b64 s[30:31] 16394; 16395; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_1: 16396; GFX940: ; %bb.0: 16397; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16398; GFX940-NEXT: ;;#ASMSTART 16399; GFX940-NEXT: ; def s[0:3] 16400; GFX940-NEXT: ;;#ASMEND 16401; GFX940-NEXT: ;;#ASMSTART 16402; GFX940-NEXT: ; def s[4:7] 16403; GFX940-NEXT: ;;#ASMEND 16404; GFX940-NEXT: s_pack_hh_b32_b16 s8, s4, s0 16405; GFX940-NEXT: ;;#ASMSTART 16406; GFX940-NEXT: ; use s8 16407; GFX940-NEXT: ;;#ASMEND 16408; GFX940-NEXT: s_setpc_b64 s[30:31] 16409 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16410 %vec1 = call <8 x i16> asm "; def $0", "=s"() 16411 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 1> 16412 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16413 ret void 16414} 16415 16416define void @s_shuffle_v2i16_v8i16__10_1() { 16417; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_1: 16418; GFX900: ; %bb.0: 16419; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16420; GFX900-NEXT: ;;#ASMSTART 16421; GFX900-NEXT: ; def s[8:11] 16422; GFX900-NEXT: ;;#ASMEND 16423; GFX900-NEXT: ;;#ASMSTART 16424; GFX900-NEXT: ; def s[4:7] 16425; GFX900-NEXT: ;;#ASMEND 16426; GFX900-NEXT: s_pack_lh_b32_b16 s8, s9, s4 16427; GFX900-NEXT: ;;#ASMSTART 16428; GFX900-NEXT: ; use s8 16429; GFX900-NEXT: ;;#ASMEND 16430; GFX900-NEXT: s_setpc_b64 s[30:31] 16431; 16432; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_1: 16433; GFX90A: ; %bb.0: 16434; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16435; GFX90A-NEXT: ;;#ASMSTART 16436; GFX90A-NEXT: ; def s[8:11] 16437; GFX90A-NEXT: ;;#ASMEND 16438; GFX90A-NEXT: ;;#ASMSTART 16439; GFX90A-NEXT: ; def s[4:7] 16440; GFX90A-NEXT: ;;#ASMEND 16441; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s9, s4 16442; GFX90A-NEXT: ;;#ASMSTART 16443; GFX90A-NEXT: ; use s8 16444; GFX90A-NEXT: ;;#ASMEND 16445; GFX90A-NEXT: s_setpc_b64 s[30:31] 16446; 16447; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_1: 16448; GFX940: ; %bb.0: 16449; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16450; GFX940-NEXT: ;;#ASMSTART 16451; GFX940-NEXT: ; def s[0:3] 16452; GFX940-NEXT: ;;#ASMEND 16453; GFX940-NEXT: ;;#ASMSTART 16454; GFX940-NEXT: ; def s[4:7] 16455; GFX940-NEXT: ;;#ASMEND 16456; GFX940-NEXT: s_pack_lh_b32_b16 s8, s5, s0 16457; GFX940-NEXT: ;;#ASMSTART 16458; GFX940-NEXT: ; use s8 16459; GFX940-NEXT: ;;#ASMEND 16460; GFX940-NEXT: s_setpc_b64 s[30:31] 16461 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16462 %vec1 = call <8 x i16> asm "; def $0", "=s"() 16463 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 1> 16464 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16465 ret void 16466} 16467 16468define void @s_shuffle_v2i16_v8i16__11_1() { 16469; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_1: 16470; GFX900: ; %bb.0: 16471; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16472; GFX900-NEXT: ;;#ASMSTART 16473; GFX900-NEXT: ; def s[8:11] 16474; GFX900-NEXT: ;;#ASMEND 16475; GFX900-NEXT: ;;#ASMSTART 16476; GFX900-NEXT: ; def s[4:7] 16477; GFX900-NEXT: ;;#ASMEND 16478; GFX900-NEXT: s_pack_hh_b32_b16 s8, s9, s4 16479; GFX900-NEXT: ;;#ASMSTART 16480; GFX900-NEXT: ; use s8 16481; GFX900-NEXT: ;;#ASMEND 16482; GFX900-NEXT: s_setpc_b64 s[30:31] 16483; 16484; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_1: 16485; GFX90A: ; %bb.0: 16486; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16487; GFX90A-NEXT: ;;#ASMSTART 16488; GFX90A-NEXT: ; def s[8:11] 16489; GFX90A-NEXT: ;;#ASMEND 16490; GFX90A-NEXT: ;;#ASMSTART 16491; GFX90A-NEXT: ; def s[4:7] 16492; GFX90A-NEXT: ;;#ASMEND 16493; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s9, s4 16494; GFX90A-NEXT: ;;#ASMSTART 16495; GFX90A-NEXT: ; use s8 16496; GFX90A-NEXT: ;;#ASMEND 16497; GFX90A-NEXT: s_setpc_b64 s[30:31] 16498; 16499; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_1: 16500; GFX940: ; %bb.0: 16501; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16502; GFX940-NEXT: ;;#ASMSTART 16503; GFX940-NEXT: ; def s[0:3] 16504; GFX940-NEXT: ;;#ASMEND 16505; GFX940-NEXT: ;;#ASMSTART 16506; GFX940-NEXT: ; def s[4:7] 16507; GFX940-NEXT: ;;#ASMEND 16508; GFX940-NEXT: s_pack_hh_b32_b16 s8, s5, s0 16509; GFX940-NEXT: ;;#ASMSTART 16510; GFX940-NEXT: ; use s8 16511; GFX940-NEXT: ;;#ASMEND 16512; GFX940-NEXT: s_setpc_b64 s[30:31] 16513 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16514 %vec1 = call <8 x i16> asm "; def $0", "=s"() 16515 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 1> 16516 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16517 ret void 16518} 16519 16520define void @s_shuffle_v2i16_v8i16__12_1() { 16521; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_1: 16522; GFX900: ; %bb.0: 16523; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16524; GFX900-NEXT: ;;#ASMSTART 16525; GFX900-NEXT: ; def s[8:11] 16526; GFX900-NEXT: ;;#ASMEND 16527; GFX900-NEXT: ;;#ASMSTART 16528; GFX900-NEXT: ; def s[4:7] 16529; GFX900-NEXT: ;;#ASMEND 16530; GFX900-NEXT: s_pack_lh_b32_b16 s8, s10, s4 16531; GFX900-NEXT: ;;#ASMSTART 16532; GFX900-NEXT: ; use s8 16533; GFX900-NEXT: ;;#ASMEND 16534; GFX900-NEXT: s_setpc_b64 s[30:31] 16535; 16536; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_1: 16537; GFX90A: ; %bb.0: 16538; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16539; GFX90A-NEXT: ;;#ASMSTART 16540; GFX90A-NEXT: ; def s[8:11] 16541; GFX90A-NEXT: ;;#ASMEND 16542; GFX90A-NEXT: ;;#ASMSTART 16543; GFX90A-NEXT: ; def s[4:7] 16544; GFX90A-NEXT: ;;#ASMEND 16545; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s10, s4 16546; GFX90A-NEXT: ;;#ASMSTART 16547; GFX90A-NEXT: ; use s8 16548; GFX90A-NEXT: ;;#ASMEND 16549; GFX90A-NEXT: s_setpc_b64 s[30:31] 16550; 16551; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_1: 16552; GFX940: ; %bb.0: 16553; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16554; GFX940-NEXT: ;;#ASMSTART 16555; GFX940-NEXT: ; def s[0:3] 16556; GFX940-NEXT: ;;#ASMEND 16557; GFX940-NEXT: ;;#ASMSTART 16558; GFX940-NEXT: ; def s[4:7] 16559; GFX940-NEXT: ;;#ASMEND 16560; GFX940-NEXT: s_pack_lh_b32_b16 s8, s6, s0 16561; GFX940-NEXT: ;;#ASMSTART 16562; GFX940-NEXT: ; use s8 16563; GFX940-NEXT: ;;#ASMEND 16564; GFX940-NEXT: s_setpc_b64 s[30:31] 16565 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16566 %vec1 = call <8 x i16> asm "; def $0", "=s"() 16567 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 1> 16568 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16569 ret void 16570} 16571 16572define void @s_shuffle_v2i16_v8i16__13_1() { 16573; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_1: 16574; GFX900: ; %bb.0: 16575; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16576; GFX900-NEXT: ;;#ASMSTART 16577; GFX900-NEXT: ; def s[8:11] 16578; GFX900-NEXT: ;;#ASMEND 16579; GFX900-NEXT: ;;#ASMSTART 16580; GFX900-NEXT: ; def s[4:7] 16581; GFX900-NEXT: ;;#ASMEND 16582; GFX900-NEXT: s_pack_hh_b32_b16 s8, s10, s4 16583; GFX900-NEXT: ;;#ASMSTART 16584; GFX900-NEXT: ; use s8 16585; GFX900-NEXT: ;;#ASMEND 16586; GFX900-NEXT: s_setpc_b64 s[30:31] 16587; 16588; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_1: 16589; GFX90A: ; %bb.0: 16590; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16591; GFX90A-NEXT: ;;#ASMSTART 16592; GFX90A-NEXT: ; def s[8:11] 16593; GFX90A-NEXT: ;;#ASMEND 16594; GFX90A-NEXT: ;;#ASMSTART 16595; GFX90A-NEXT: ; def s[4:7] 16596; GFX90A-NEXT: ;;#ASMEND 16597; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s10, s4 16598; GFX90A-NEXT: ;;#ASMSTART 16599; GFX90A-NEXT: ; use s8 16600; GFX90A-NEXT: ;;#ASMEND 16601; GFX90A-NEXT: s_setpc_b64 s[30:31] 16602; 16603; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_1: 16604; GFX940: ; %bb.0: 16605; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16606; GFX940-NEXT: ;;#ASMSTART 16607; GFX940-NEXT: ; def s[0:3] 16608; GFX940-NEXT: ;;#ASMEND 16609; GFX940-NEXT: ;;#ASMSTART 16610; GFX940-NEXT: ; def s[4:7] 16611; GFX940-NEXT: ;;#ASMEND 16612; GFX940-NEXT: s_pack_hh_b32_b16 s8, s6, s0 16613; GFX940-NEXT: ;;#ASMSTART 16614; GFX940-NEXT: ; use s8 16615; GFX940-NEXT: ;;#ASMEND 16616; GFX940-NEXT: s_setpc_b64 s[30:31] 16617 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16618 %vec1 = call <8 x i16> asm "; def $0", "=s"() 16619 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 1> 16620 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16621 ret void 16622} 16623 16624define void @s_shuffle_v2i16_v8i16__14_1() { 16625; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_1: 16626; GFX900: ; %bb.0: 16627; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16628; GFX900-NEXT: ;;#ASMSTART 16629; GFX900-NEXT: ; def s[8:11] 16630; GFX900-NEXT: ;;#ASMEND 16631; GFX900-NEXT: ;;#ASMSTART 16632; GFX900-NEXT: ; def s[4:7] 16633; GFX900-NEXT: ;;#ASMEND 16634; GFX900-NEXT: s_pack_lh_b32_b16 s8, s11, s4 16635; GFX900-NEXT: ;;#ASMSTART 16636; GFX900-NEXT: ; use s8 16637; GFX900-NEXT: ;;#ASMEND 16638; GFX900-NEXT: s_setpc_b64 s[30:31] 16639; 16640; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_1: 16641; GFX90A: ; %bb.0: 16642; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16643; GFX90A-NEXT: ;;#ASMSTART 16644; GFX90A-NEXT: ; def s[8:11] 16645; GFX90A-NEXT: ;;#ASMEND 16646; GFX90A-NEXT: ;;#ASMSTART 16647; GFX90A-NEXT: ; def s[4:7] 16648; GFX90A-NEXT: ;;#ASMEND 16649; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s11, s4 16650; GFX90A-NEXT: ;;#ASMSTART 16651; GFX90A-NEXT: ; use s8 16652; GFX90A-NEXT: ;;#ASMEND 16653; GFX90A-NEXT: s_setpc_b64 s[30:31] 16654; 16655; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_1: 16656; GFX940: ; %bb.0: 16657; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16658; GFX940-NEXT: ;;#ASMSTART 16659; GFX940-NEXT: ; def s[0:3] 16660; GFX940-NEXT: ;;#ASMEND 16661; GFX940-NEXT: ;;#ASMSTART 16662; GFX940-NEXT: ; def s[4:7] 16663; GFX940-NEXT: ;;#ASMEND 16664; GFX940-NEXT: s_pack_lh_b32_b16 s8, s7, s0 16665; GFX940-NEXT: ;;#ASMSTART 16666; GFX940-NEXT: ; use s8 16667; GFX940-NEXT: ;;#ASMEND 16668; GFX940-NEXT: s_setpc_b64 s[30:31] 16669 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16670 %vec1 = call <8 x i16> asm "; def $0", "=s"() 16671 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 1> 16672 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16673 ret void 16674} 16675 16676define void @s_shuffle_v2i16_v8i16__u_2() { 16677; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_2: 16678; GFX900: ; %bb.0: 16679; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16680; GFX900-NEXT: ;;#ASMSTART 16681; GFX900-NEXT: ; def s[4:7] 16682; GFX900-NEXT: ;;#ASMEND 16683; GFX900-NEXT: s_lshl_b32 s8, s5, 16 16684; GFX900-NEXT: ;;#ASMSTART 16685; GFX900-NEXT: ; use s8 16686; GFX900-NEXT: ;;#ASMEND 16687; GFX900-NEXT: s_setpc_b64 s[30:31] 16688; 16689; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_2: 16690; GFX90A: ; %bb.0: 16691; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16692; GFX90A-NEXT: ;;#ASMSTART 16693; GFX90A-NEXT: ; def s[4:7] 16694; GFX90A-NEXT: ;;#ASMEND 16695; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 16696; GFX90A-NEXT: ;;#ASMSTART 16697; GFX90A-NEXT: ; use s8 16698; GFX90A-NEXT: ;;#ASMEND 16699; GFX90A-NEXT: s_setpc_b64 s[30:31] 16700; 16701; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_2: 16702; GFX940: ; %bb.0: 16703; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16704; GFX940-NEXT: ;;#ASMSTART 16705; GFX940-NEXT: ; def s[0:3] 16706; GFX940-NEXT: ;;#ASMEND 16707; GFX940-NEXT: s_lshl_b32 s8, s1, 16 16708; GFX940-NEXT: ;;#ASMSTART 16709; GFX940-NEXT: ; use s8 16710; GFX940-NEXT: ;;#ASMEND 16711; GFX940-NEXT: s_setpc_b64 s[30:31] 16712 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16713 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 2> 16714 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16715 ret void 16716} 16717 16718define void @s_shuffle_v2i16_v8i16__0_2() { 16719; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_2: 16720; GFX900: ; %bb.0: 16721; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16722; GFX900-NEXT: ;;#ASMSTART 16723; GFX900-NEXT: ; def s[4:7] 16724; GFX900-NEXT: ;;#ASMEND 16725; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16726; GFX900-NEXT: ;;#ASMSTART 16727; GFX900-NEXT: ; use s8 16728; GFX900-NEXT: ;;#ASMEND 16729; GFX900-NEXT: s_setpc_b64 s[30:31] 16730; 16731; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_2: 16732; GFX90A: ; %bb.0: 16733; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16734; GFX90A-NEXT: ;;#ASMSTART 16735; GFX90A-NEXT: ; def s[4:7] 16736; GFX90A-NEXT: ;;#ASMEND 16737; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16738; GFX90A-NEXT: ;;#ASMSTART 16739; GFX90A-NEXT: ; use s8 16740; GFX90A-NEXT: ;;#ASMEND 16741; GFX90A-NEXT: s_setpc_b64 s[30:31] 16742; 16743; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_2: 16744; GFX940: ; %bb.0: 16745; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16746; GFX940-NEXT: ;;#ASMSTART 16747; GFX940-NEXT: ; def s[0:3] 16748; GFX940-NEXT: ;;#ASMEND 16749; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 16750; GFX940-NEXT: ;;#ASMSTART 16751; GFX940-NEXT: ; use s8 16752; GFX940-NEXT: ;;#ASMEND 16753; GFX940-NEXT: s_setpc_b64 s[30:31] 16754 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16755 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 2> 16756 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16757 ret void 16758} 16759 16760define void @s_shuffle_v2i16_v8i16__1_2() { 16761; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_2: 16762; GFX900: ; %bb.0: 16763; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16764; GFX900-NEXT: ;;#ASMSTART 16765; GFX900-NEXT: ; def s[4:7] 16766; GFX900-NEXT: ;;#ASMEND 16767; GFX900-NEXT: s_lshr_b32 s4, s4, 16 16768; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16769; GFX900-NEXT: ;;#ASMSTART 16770; GFX900-NEXT: ; use s8 16771; GFX900-NEXT: ;;#ASMEND 16772; GFX900-NEXT: s_setpc_b64 s[30:31] 16773; 16774; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_2: 16775; GFX90A: ; %bb.0: 16776; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16777; GFX90A-NEXT: ;;#ASMSTART 16778; GFX90A-NEXT: ; def s[4:7] 16779; GFX90A-NEXT: ;;#ASMEND 16780; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 16781; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16782; GFX90A-NEXT: ;;#ASMSTART 16783; GFX90A-NEXT: ; use s8 16784; GFX90A-NEXT: ;;#ASMEND 16785; GFX90A-NEXT: s_setpc_b64 s[30:31] 16786; 16787; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_2: 16788; GFX940: ; %bb.0: 16789; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16790; GFX940-NEXT: ;;#ASMSTART 16791; GFX940-NEXT: ; def s[0:3] 16792; GFX940-NEXT: ;;#ASMEND 16793; GFX940-NEXT: s_lshr_b32 s0, s0, 16 16794; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 16795; GFX940-NEXT: ;;#ASMSTART 16796; GFX940-NEXT: ; use s8 16797; GFX940-NEXT: ;;#ASMEND 16798; GFX940-NEXT: s_setpc_b64 s[30:31] 16799 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16800 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 2> 16801 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16802 ret void 16803} 16804 16805define void @s_shuffle_v2i16_v8i16__2_2() { 16806; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_2: 16807; GFX900: ; %bb.0: 16808; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16809; GFX900-NEXT: ;;#ASMSTART 16810; GFX900-NEXT: ; def s[4:7] 16811; GFX900-NEXT: ;;#ASMEND 16812; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 16813; GFX900-NEXT: ;;#ASMSTART 16814; GFX900-NEXT: ; use s8 16815; GFX900-NEXT: ;;#ASMEND 16816; GFX900-NEXT: s_setpc_b64 s[30:31] 16817; 16818; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_2: 16819; GFX90A: ; %bb.0: 16820; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16821; GFX90A-NEXT: ;;#ASMSTART 16822; GFX90A-NEXT: ; def s[4:7] 16823; GFX90A-NEXT: ;;#ASMEND 16824; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 16825; GFX90A-NEXT: ;;#ASMSTART 16826; GFX90A-NEXT: ; use s8 16827; GFX90A-NEXT: ;;#ASMEND 16828; GFX90A-NEXT: s_setpc_b64 s[30:31] 16829; 16830; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_2: 16831; GFX940: ; %bb.0: 16832; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16833; GFX940-NEXT: ;;#ASMSTART 16834; GFX940-NEXT: ; def s[0:3] 16835; GFX940-NEXT: ;;#ASMEND 16836; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 16837; GFX940-NEXT: ;;#ASMSTART 16838; GFX940-NEXT: ; use s8 16839; GFX940-NEXT: ;;#ASMEND 16840; GFX940-NEXT: s_setpc_b64 s[30:31] 16841 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16842 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 2> 16843 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16844 ret void 16845} 16846 16847define void @s_shuffle_v2i16_v8i16__3_2() { 16848; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_2: 16849; GFX900: ; %bb.0: 16850; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16851; GFX900-NEXT: ;;#ASMSTART 16852; GFX900-NEXT: ; def s[4:7] 16853; GFX900-NEXT: ;;#ASMEND 16854; GFX900-NEXT: s_lshr_b32 s4, s5, 16 16855; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16856; GFX900-NEXT: ;;#ASMSTART 16857; GFX900-NEXT: ; use s8 16858; GFX900-NEXT: ;;#ASMEND 16859; GFX900-NEXT: s_setpc_b64 s[30:31] 16860; 16861; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_2: 16862; GFX90A: ; %bb.0: 16863; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16864; GFX90A-NEXT: ;;#ASMSTART 16865; GFX90A-NEXT: ; def s[4:7] 16866; GFX90A-NEXT: ;;#ASMEND 16867; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 16868; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16869; GFX90A-NEXT: ;;#ASMSTART 16870; GFX90A-NEXT: ; use s8 16871; GFX90A-NEXT: ;;#ASMEND 16872; GFX90A-NEXT: s_setpc_b64 s[30:31] 16873; 16874; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_2: 16875; GFX940: ; %bb.0: 16876; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16877; GFX940-NEXT: ;;#ASMSTART 16878; GFX940-NEXT: ; def s[0:3] 16879; GFX940-NEXT: ;;#ASMEND 16880; GFX940-NEXT: s_lshr_b32 s0, s1, 16 16881; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 16882; GFX940-NEXT: ;;#ASMSTART 16883; GFX940-NEXT: ; use s8 16884; GFX940-NEXT: ;;#ASMEND 16885; GFX940-NEXT: s_setpc_b64 s[30:31] 16886 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16887 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 2> 16888 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16889 ret void 16890} 16891 16892define void @s_shuffle_v2i16_v8i16__4_2() { 16893; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_2: 16894; GFX900: ; %bb.0: 16895; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16896; GFX900-NEXT: ;;#ASMSTART 16897; GFX900-NEXT: ; def s[4:7] 16898; GFX900-NEXT: ;;#ASMEND 16899; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 16900; GFX900-NEXT: ;;#ASMSTART 16901; GFX900-NEXT: ; use s8 16902; GFX900-NEXT: ;;#ASMEND 16903; GFX900-NEXT: s_setpc_b64 s[30:31] 16904; 16905; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_2: 16906; GFX90A: ; %bb.0: 16907; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16908; GFX90A-NEXT: ;;#ASMSTART 16909; GFX90A-NEXT: ; def s[4:7] 16910; GFX90A-NEXT: ;;#ASMEND 16911; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 16912; GFX90A-NEXT: ;;#ASMSTART 16913; GFX90A-NEXT: ; use s8 16914; GFX90A-NEXT: ;;#ASMEND 16915; GFX90A-NEXT: s_setpc_b64 s[30:31] 16916; 16917; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_2: 16918; GFX940: ; %bb.0: 16919; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16920; GFX940-NEXT: ;;#ASMSTART 16921; GFX940-NEXT: ; def s[0:3] 16922; GFX940-NEXT: ;;#ASMEND 16923; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 16924; GFX940-NEXT: ;;#ASMSTART 16925; GFX940-NEXT: ; use s8 16926; GFX940-NEXT: ;;#ASMEND 16927; GFX940-NEXT: s_setpc_b64 s[30:31] 16928 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16929 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 2> 16930 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16931 ret void 16932} 16933 16934define void @s_shuffle_v2i16_v8i16__5_2() { 16935; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_2: 16936; GFX900: ; %bb.0: 16937; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16938; GFX900-NEXT: ;;#ASMSTART 16939; GFX900-NEXT: ; def s[4:7] 16940; GFX900-NEXT: ;;#ASMEND 16941; GFX900-NEXT: s_lshr_b32 s4, s6, 16 16942; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16943; GFX900-NEXT: ;;#ASMSTART 16944; GFX900-NEXT: ; use s8 16945; GFX900-NEXT: ;;#ASMEND 16946; GFX900-NEXT: s_setpc_b64 s[30:31] 16947; 16948; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_2: 16949; GFX90A: ; %bb.0: 16950; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16951; GFX90A-NEXT: ;;#ASMSTART 16952; GFX90A-NEXT: ; def s[4:7] 16953; GFX90A-NEXT: ;;#ASMEND 16954; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 16955; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 16956; GFX90A-NEXT: ;;#ASMSTART 16957; GFX90A-NEXT: ; use s8 16958; GFX90A-NEXT: ;;#ASMEND 16959; GFX90A-NEXT: s_setpc_b64 s[30:31] 16960; 16961; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_2: 16962; GFX940: ; %bb.0: 16963; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16964; GFX940-NEXT: ;;#ASMSTART 16965; GFX940-NEXT: ; def s[0:3] 16966; GFX940-NEXT: ;;#ASMEND 16967; GFX940-NEXT: s_lshr_b32 s0, s2, 16 16968; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 16969; GFX940-NEXT: ;;#ASMSTART 16970; GFX940-NEXT: ; use s8 16971; GFX940-NEXT: ;;#ASMEND 16972; GFX940-NEXT: s_setpc_b64 s[30:31] 16973 %vec0 = call <8 x i16> asm "; def $0", "=s"() 16974 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 2> 16975 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 16976 ret void 16977} 16978 16979define void @s_shuffle_v2i16_v8i16__6_2() { 16980; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_2: 16981; GFX900: ; %bb.0: 16982; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16983; GFX900-NEXT: ;;#ASMSTART 16984; GFX900-NEXT: ; def s[4:7] 16985; GFX900-NEXT: ;;#ASMEND 16986; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 16987; GFX900-NEXT: ;;#ASMSTART 16988; GFX900-NEXT: ; use s8 16989; GFX900-NEXT: ;;#ASMEND 16990; GFX900-NEXT: s_setpc_b64 s[30:31] 16991; 16992; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_2: 16993; GFX90A: ; %bb.0: 16994; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16995; GFX90A-NEXT: ;;#ASMSTART 16996; GFX90A-NEXT: ; def s[4:7] 16997; GFX90A-NEXT: ;;#ASMEND 16998; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 16999; GFX90A-NEXT: ;;#ASMSTART 17000; GFX90A-NEXT: ; use s8 17001; GFX90A-NEXT: ;;#ASMEND 17002; GFX90A-NEXT: s_setpc_b64 s[30:31] 17003; 17004; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_2: 17005; GFX940: ; %bb.0: 17006; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17007; GFX940-NEXT: ;;#ASMSTART 17008; GFX940-NEXT: ; def s[0:3] 17009; GFX940-NEXT: ;;#ASMEND 17010; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 17011; GFX940-NEXT: ;;#ASMSTART 17012; GFX940-NEXT: ; use s8 17013; GFX940-NEXT: ;;#ASMEND 17014; GFX940-NEXT: s_setpc_b64 s[30:31] 17015 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17016 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 2> 17017 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17018 ret void 17019} 17020 17021define void @s_shuffle_v2i16_v8i16__7_2() { 17022; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_2: 17023; GFX900: ; %bb.0: 17024; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17025; GFX900-NEXT: ;;#ASMSTART 17026; GFX900-NEXT: ; def s[4:7] 17027; GFX900-NEXT: ;;#ASMEND 17028; GFX900-NEXT: s_lshr_b32 s4, s7, 16 17029; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 17030; GFX900-NEXT: ;;#ASMSTART 17031; GFX900-NEXT: ; use s8 17032; GFX900-NEXT: ;;#ASMEND 17033; GFX900-NEXT: s_setpc_b64 s[30:31] 17034; 17035; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_2: 17036; GFX90A: ; %bb.0: 17037; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17038; GFX90A-NEXT: ;;#ASMSTART 17039; GFX90A-NEXT: ; def s[4:7] 17040; GFX90A-NEXT: ;;#ASMEND 17041; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 17042; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 17043; GFX90A-NEXT: ;;#ASMSTART 17044; GFX90A-NEXT: ; use s8 17045; GFX90A-NEXT: ;;#ASMEND 17046; GFX90A-NEXT: s_setpc_b64 s[30:31] 17047; 17048; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_2: 17049; GFX940: ; %bb.0: 17050; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17051; GFX940-NEXT: ;;#ASMSTART 17052; GFX940-NEXT: ; def s[0:3] 17053; GFX940-NEXT: ;;#ASMEND 17054; GFX940-NEXT: s_lshr_b32 s0, s3, 16 17055; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 17056; GFX940-NEXT: ;;#ASMSTART 17057; GFX940-NEXT: ; use s8 17058; GFX940-NEXT: ;;#ASMEND 17059; GFX940-NEXT: s_setpc_b64 s[30:31] 17060 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17061 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 2> 17062 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17063 ret void 17064} 17065 17066define void @s_shuffle_v2i16_v8i16__8_2() { 17067; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_2: 17068; GFX900: ; %bb.0: 17069; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17070; GFX900-NEXT: ;;#ASMSTART 17071; GFX900-NEXT: ; def s[4:7] 17072; GFX900-NEXT: ;;#ASMEND 17073; GFX900-NEXT: s_lshl_b32 s8, s5, 16 17074; GFX900-NEXT: ;;#ASMSTART 17075; GFX900-NEXT: ; use s8 17076; GFX900-NEXT: ;;#ASMEND 17077; GFX900-NEXT: s_setpc_b64 s[30:31] 17078; 17079; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_2: 17080; GFX90A: ; %bb.0: 17081; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17082; GFX90A-NEXT: ;;#ASMSTART 17083; GFX90A-NEXT: ; def s[4:7] 17084; GFX90A-NEXT: ;;#ASMEND 17085; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 17086; GFX90A-NEXT: ;;#ASMSTART 17087; GFX90A-NEXT: ; use s8 17088; GFX90A-NEXT: ;;#ASMEND 17089; GFX90A-NEXT: s_setpc_b64 s[30:31] 17090; 17091; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_2: 17092; GFX940: ; %bb.0: 17093; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17094; GFX940-NEXT: ;;#ASMSTART 17095; GFX940-NEXT: ; def s[0:3] 17096; GFX940-NEXT: ;;#ASMEND 17097; GFX940-NEXT: s_lshl_b32 s8, s1, 16 17098; GFX940-NEXT: ;;#ASMSTART 17099; GFX940-NEXT: ; use s8 17100; GFX940-NEXT: ;;#ASMEND 17101; GFX940-NEXT: s_setpc_b64 s[30:31] 17102 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17103 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 2> 17104 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17105 ret void 17106} 17107 17108define void @s_shuffle_v2i16_v8i16__9_2() { 17109; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_2: 17110; GFX900: ; %bb.0: 17111; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17112; GFX900-NEXT: ;;#ASMSTART 17113; GFX900-NEXT: ; def s[4:7] 17114; GFX900-NEXT: ;;#ASMEND 17115; GFX900-NEXT: ;;#ASMSTART 17116; GFX900-NEXT: ; def s[8:11] 17117; GFX900-NEXT: ;;#ASMEND 17118; GFX900-NEXT: s_lshr_b32 s4, s8, 16 17119; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 17120; GFX900-NEXT: ;;#ASMSTART 17121; GFX900-NEXT: ; use s8 17122; GFX900-NEXT: ;;#ASMEND 17123; GFX900-NEXT: s_setpc_b64 s[30:31] 17124; 17125; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_2: 17126; GFX90A: ; %bb.0: 17127; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17128; GFX90A-NEXT: ;;#ASMSTART 17129; GFX90A-NEXT: ; def s[4:7] 17130; GFX90A-NEXT: ;;#ASMEND 17131; GFX90A-NEXT: ;;#ASMSTART 17132; GFX90A-NEXT: ; def s[8:11] 17133; GFX90A-NEXT: ;;#ASMEND 17134; GFX90A-NEXT: s_lshr_b32 s4, s8, 16 17135; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 17136; GFX90A-NEXT: ;;#ASMSTART 17137; GFX90A-NEXT: ; use s8 17138; GFX90A-NEXT: ;;#ASMEND 17139; GFX90A-NEXT: s_setpc_b64 s[30:31] 17140; 17141; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_2: 17142; GFX940: ; %bb.0: 17143; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17144; GFX940-NEXT: ;;#ASMSTART 17145; GFX940-NEXT: ; def s[0:3] 17146; GFX940-NEXT: ;;#ASMEND 17147; GFX940-NEXT: ;;#ASMSTART 17148; GFX940-NEXT: ; def s[4:7] 17149; GFX940-NEXT: ;;#ASMEND 17150; GFX940-NEXT: s_lshr_b32 s0, s4, 16 17151; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 17152; GFX940-NEXT: ;;#ASMSTART 17153; GFX940-NEXT: ; use s8 17154; GFX940-NEXT: ;;#ASMEND 17155; GFX940-NEXT: s_setpc_b64 s[30:31] 17156 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17157 %vec1 = call <8 x i16> asm "; def $0", "=s"() 17158 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 2> 17159 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17160 ret void 17161} 17162 17163define void @s_shuffle_v2i16_v8i16__10_2() { 17164; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_2: 17165; GFX900: ; %bb.0: 17166; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17167; GFX900-NEXT: ;;#ASMSTART 17168; GFX900-NEXT: ; def s[8:11] 17169; GFX900-NEXT: ;;#ASMEND 17170; GFX900-NEXT: ;;#ASMSTART 17171; GFX900-NEXT: ; def s[4:7] 17172; GFX900-NEXT: ;;#ASMEND 17173; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s5 17174; GFX900-NEXT: ;;#ASMSTART 17175; GFX900-NEXT: ; use s8 17176; GFX900-NEXT: ;;#ASMEND 17177; GFX900-NEXT: s_setpc_b64 s[30:31] 17178; 17179; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_2: 17180; GFX90A: ; %bb.0: 17181; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17182; GFX90A-NEXT: ;;#ASMSTART 17183; GFX90A-NEXT: ; def s[8:11] 17184; GFX90A-NEXT: ;;#ASMEND 17185; GFX90A-NEXT: ;;#ASMSTART 17186; GFX90A-NEXT: ; def s[4:7] 17187; GFX90A-NEXT: ;;#ASMEND 17188; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s5 17189; GFX90A-NEXT: ;;#ASMSTART 17190; GFX90A-NEXT: ; use s8 17191; GFX90A-NEXT: ;;#ASMEND 17192; GFX90A-NEXT: s_setpc_b64 s[30:31] 17193; 17194; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_2: 17195; GFX940: ; %bb.0: 17196; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17197; GFX940-NEXT: ;;#ASMSTART 17198; GFX940-NEXT: ; def s[0:3] 17199; GFX940-NEXT: ;;#ASMEND 17200; GFX940-NEXT: ;;#ASMSTART 17201; GFX940-NEXT: ; def s[4:7] 17202; GFX940-NEXT: ;;#ASMEND 17203; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s1 17204; GFX940-NEXT: ;;#ASMSTART 17205; GFX940-NEXT: ; use s8 17206; GFX940-NEXT: ;;#ASMEND 17207; GFX940-NEXT: s_setpc_b64 s[30:31] 17208 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17209 %vec1 = call <8 x i16> asm "; def $0", "=s"() 17210 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 2> 17211 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17212 ret void 17213} 17214 17215define void @s_shuffle_v2i16_v8i16__11_2() { 17216; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_2: 17217; GFX900: ; %bb.0: 17218; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17219; GFX900-NEXT: ;;#ASMSTART 17220; GFX900-NEXT: ; def s[4:7] 17221; GFX900-NEXT: ;;#ASMEND 17222; GFX900-NEXT: ;;#ASMSTART 17223; GFX900-NEXT: ; def s[8:11] 17224; GFX900-NEXT: ;;#ASMEND 17225; GFX900-NEXT: s_lshr_b32 s4, s9, 16 17226; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 17227; GFX900-NEXT: ;;#ASMSTART 17228; GFX900-NEXT: ; use s8 17229; GFX900-NEXT: ;;#ASMEND 17230; GFX900-NEXT: s_setpc_b64 s[30:31] 17231; 17232; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_2: 17233; GFX90A: ; %bb.0: 17234; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17235; GFX90A-NEXT: ;;#ASMSTART 17236; GFX90A-NEXT: ; def s[4:7] 17237; GFX90A-NEXT: ;;#ASMEND 17238; GFX90A-NEXT: ;;#ASMSTART 17239; GFX90A-NEXT: ; def s[8:11] 17240; GFX90A-NEXT: ;;#ASMEND 17241; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 17242; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 17243; GFX90A-NEXT: ;;#ASMSTART 17244; GFX90A-NEXT: ; use s8 17245; GFX90A-NEXT: ;;#ASMEND 17246; GFX90A-NEXT: s_setpc_b64 s[30:31] 17247; 17248; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_2: 17249; GFX940: ; %bb.0: 17250; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17251; GFX940-NEXT: ;;#ASMSTART 17252; GFX940-NEXT: ; def s[0:3] 17253; GFX940-NEXT: ;;#ASMEND 17254; GFX940-NEXT: ;;#ASMSTART 17255; GFX940-NEXT: ; def s[4:7] 17256; GFX940-NEXT: ;;#ASMEND 17257; GFX940-NEXT: s_lshr_b32 s0, s5, 16 17258; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 17259; GFX940-NEXT: ;;#ASMSTART 17260; GFX940-NEXT: ; use s8 17261; GFX940-NEXT: ;;#ASMEND 17262; GFX940-NEXT: s_setpc_b64 s[30:31] 17263 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17264 %vec1 = call <8 x i16> asm "; def $0", "=s"() 17265 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 2> 17266 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17267 ret void 17268} 17269 17270define void @s_shuffle_v2i16_v8i16__12_2() { 17271; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_2: 17272; GFX900: ; %bb.0: 17273; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17274; GFX900-NEXT: ;;#ASMSTART 17275; GFX900-NEXT: ; def s[8:11] 17276; GFX900-NEXT: ;;#ASMEND 17277; GFX900-NEXT: ;;#ASMSTART 17278; GFX900-NEXT: ; def s[4:7] 17279; GFX900-NEXT: ;;#ASMEND 17280; GFX900-NEXT: s_pack_ll_b32_b16 s8, s10, s5 17281; GFX900-NEXT: ;;#ASMSTART 17282; GFX900-NEXT: ; use s8 17283; GFX900-NEXT: ;;#ASMEND 17284; GFX900-NEXT: s_setpc_b64 s[30:31] 17285; 17286; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_2: 17287; GFX90A: ; %bb.0: 17288; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17289; GFX90A-NEXT: ;;#ASMSTART 17290; GFX90A-NEXT: ; def s[8:11] 17291; GFX90A-NEXT: ;;#ASMEND 17292; GFX90A-NEXT: ;;#ASMSTART 17293; GFX90A-NEXT: ; def s[4:7] 17294; GFX90A-NEXT: ;;#ASMEND 17295; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s10, s5 17296; GFX90A-NEXT: ;;#ASMSTART 17297; GFX90A-NEXT: ; use s8 17298; GFX90A-NEXT: ;;#ASMEND 17299; GFX90A-NEXT: s_setpc_b64 s[30:31] 17300; 17301; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_2: 17302; GFX940: ; %bb.0: 17303; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17304; GFX940-NEXT: ;;#ASMSTART 17305; GFX940-NEXT: ; def s[0:3] 17306; GFX940-NEXT: ;;#ASMEND 17307; GFX940-NEXT: ;;#ASMSTART 17308; GFX940-NEXT: ; def s[4:7] 17309; GFX940-NEXT: ;;#ASMEND 17310; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s1 17311; GFX940-NEXT: ;;#ASMSTART 17312; GFX940-NEXT: ; use s8 17313; GFX940-NEXT: ;;#ASMEND 17314; GFX940-NEXT: s_setpc_b64 s[30:31] 17315 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17316 %vec1 = call <8 x i16> asm "; def $0", "=s"() 17317 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 2> 17318 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17319 ret void 17320} 17321 17322define void @s_shuffle_v2i16_v8i16__13_2() { 17323; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_2: 17324; GFX900: ; %bb.0: 17325; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17326; GFX900-NEXT: ;;#ASMSTART 17327; GFX900-NEXT: ; def s[4:7] 17328; GFX900-NEXT: ;;#ASMEND 17329; GFX900-NEXT: ;;#ASMSTART 17330; GFX900-NEXT: ; def s[8:11] 17331; GFX900-NEXT: ;;#ASMEND 17332; GFX900-NEXT: s_lshr_b32 s4, s10, 16 17333; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 17334; GFX900-NEXT: ;;#ASMSTART 17335; GFX900-NEXT: ; use s8 17336; GFX900-NEXT: ;;#ASMEND 17337; GFX900-NEXT: s_setpc_b64 s[30:31] 17338; 17339; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_2: 17340; GFX90A: ; %bb.0: 17341; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17342; GFX90A-NEXT: ;;#ASMSTART 17343; GFX90A-NEXT: ; def s[4:7] 17344; GFX90A-NEXT: ;;#ASMEND 17345; GFX90A-NEXT: ;;#ASMSTART 17346; GFX90A-NEXT: ; def s[8:11] 17347; GFX90A-NEXT: ;;#ASMEND 17348; GFX90A-NEXT: s_lshr_b32 s4, s10, 16 17349; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 17350; GFX90A-NEXT: ;;#ASMSTART 17351; GFX90A-NEXT: ; use s8 17352; GFX90A-NEXT: ;;#ASMEND 17353; GFX90A-NEXT: s_setpc_b64 s[30:31] 17354; 17355; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_2: 17356; GFX940: ; %bb.0: 17357; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17358; GFX940-NEXT: ;;#ASMSTART 17359; GFX940-NEXT: ; def s[0:3] 17360; GFX940-NEXT: ;;#ASMEND 17361; GFX940-NEXT: ;;#ASMSTART 17362; GFX940-NEXT: ; def s[4:7] 17363; GFX940-NEXT: ;;#ASMEND 17364; GFX940-NEXT: s_lshr_b32 s0, s6, 16 17365; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 17366; GFX940-NEXT: ;;#ASMSTART 17367; GFX940-NEXT: ; use s8 17368; GFX940-NEXT: ;;#ASMEND 17369; GFX940-NEXT: s_setpc_b64 s[30:31] 17370 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17371 %vec1 = call <8 x i16> asm "; def $0", "=s"() 17372 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 2> 17373 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17374 ret void 17375} 17376 17377define void @s_shuffle_v2i16_v8i16__14_2() { 17378; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_2: 17379; GFX900: ; %bb.0: 17380; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17381; GFX900-NEXT: ;;#ASMSTART 17382; GFX900-NEXT: ; def s[8:11] 17383; GFX900-NEXT: ;;#ASMEND 17384; GFX900-NEXT: ;;#ASMSTART 17385; GFX900-NEXT: ; def s[4:7] 17386; GFX900-NEXT: ;;#ASMEND 17387; GFX900-NEXT: s_pack_ll_b32_b16 s8, s11, s5 17388; GFX900-NEXT: ;;#ASMSTART 17389; GFX900-NEXT: ; use s8 17390; GFX900-NEXT: ;;#ASMEND 17391; GFX900-NEXT: s_setpc_b64 s[30:31] 17392; 17393; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_2: 17394; GFX90A: ; %bb.0: 17395; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17396; GFX90A-NEXT: ;;#ASMSTART 17397; GFX90A-NEXT: ; def s[8:11] 17398; GFX90A-NEXT: ;;#ASMEND 17399; GFX90A-NEXT: ;;#ASMSTART 17400; GFX90A-NEXT: ; def s[4:7] 17401; GFX90A-NEXT: ;;#ASMEND 17402; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s11, s5 17403; GFX90A-NEXT: ;;#ASMSTART 17404; GFX90A-NEXT: ; use s8 17405; GFX90A-NEXT: ;;#ASMEND 17406; GFX90A-NEXT: s_setpc_b64 s[30:31] 17407; 17408; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_2: 17409; GFX940: ; %bb.0: 17410; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17411; GFX940-NEXT: ;;#ASMSTART 17412; GFX940-NEXT: ; def s[0:3] 17413; GFX940-NEXT: ;;#ASMEND 17414; GFX940-NEXT: ;;#ASMSTART 17415; GFX940-NEXT: ; def s[4:7] 17416; GFX940-NEXT: ;;#ASMEND 17417; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s1 17418; GFX940-NEXT: ;;#ASMSTART 17419; GFX940-NEXT: ; use s8 17420; GFX940-NEXT: ;;#ASMEND 17421; GFX940-NEXT: s_setpc_b64 s[30:31] 17422 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17423 %vec1 = call <8 x i16> asm "; def $0", "=s"() 17424 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 2> 17425 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17426 ret void 17427} 17428 17429define void @s_shuffle_v2i16_v8i16__u_3() { 17430; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_3: 17431; GFX900: ; %bb.0: 17432; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17433; GFX900-NEXT: ;;#ASMSTART 17434; GFX900-NEXT: ; def s[4:7] 17435; GFX900-NEXT: ;;#ASMEND 17436; GFX900-NEXT: s_mov_b32 s8, s5 17437; GFX900-NEXT: ;;#ASMSTART 17438; GFX900-NEXT: ; use s8 17439; GFX900-NEXT: ;;#ASMEND 17440; GFX900-NEXT: s_setpc_b64 s[30:31] 17441; 17442; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_3: 17443; GFX90A: ; %bb.0: 17444; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17445; GFX90A-NEXT: ;;#ASMSTART 17446; GFX90A-NEXT: ; def s[4:7] 17447; GFX90A-NEXT: ;;#ASMEND 17448; GFX90A-NEXT: s_mov_b32 s8, s5 17449; GFX90A-NEXT: ;;#ASMSTART 17450; GFX90A-NEXT: ; use s8 17451; GFX90A-NEXT: ;;#ASMEND 17452; GFX90A-NEXT: s_setpc_b64 s[30:31] 17453; 17454; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_3: 17455; GFX940: ; %bb.0: 17456; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17457; GFX940-NEXT: ;;#ASMSTART 17458; GFX940-NEXT: ; def s[0:3] 17459; GFX940-NEXT: ;;#ASMEND 17460; GFX940-NEXT: s_mov_b32 s8, s1 17461; GFX940-NEXT: ;;#ASMSTART 17462; GFX940-NEXT: ; use s8 17463; GFX940-NEXT: ;;#ASMEND 17464; GFX940-NEXT: s_setpc_b64 s[30:31] 17465 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17466 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 3> 17467 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17468 ret void 17469} 17470 17471define void @s_shuffle_v2i16_v8i16__0_3() { 17472; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_3: 17473; GFX900: ; %bb.0: 17474; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17475; GFX900-NEXT: ;;#ASMSTART 17476; GFX900-NEXT: ; def s[4:7] 17477; GFX900-NEXT: ;;#ASMEND 17478; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s5 17479; GFX900-NEXT: ;;#ASMSTART 17480; GFX900-NEXT: ; use s8 17481; GFX900-NEXT: ;;#ASMEND 17482; GFX900-NEXT: s_setpc_b64 s[30:31] 17483; 17484; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_3: 17485; GFX90A: ; %bb.0: 17486; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17487; GFX90A-NEXT: ;;#ASMSTART 17488; GFX90A-NEXT: ; def s[4:7] 17489; GFX90A-NEXT: ;;#ASMEND 17490; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s5 17491; GFX90A-NEXT: ;;#ASMSTART 17492; GFX90A-NEXT: ; use s8 17493; GFX90A-NEXT: ;;#ASMEND 17494; GFX90A-NEXT: s_setpc_b64 s[30:31] 17495; 17496; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_3: 17497; GFX940: ; %bb.0: 17498; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17499; GFX940-NEXT: ;;#ASMSTART 17500; GFX940-NEXT: ; def s[0:3] 17501; GFX940-NEXT: ;;#ASMEND 17502; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 17503; GFX940-NEXT: ;;#ASMSTART 17504; GFX940-NEXT: ; use s8 17505; GFX940-NEXT: ;;#ASMEND 17506; GFX940-NEXT: s_setpc_b64 s[30:31] 17507 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17508 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 3> 17509 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17510 ret void 17511} 17512 17513define void @s_shuffle_v2i16_v8i16__1_3() { 17514; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_3: 17515; GFX900: ; %bb.0: 17516; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17517; GFX900-NEXT: ;;#ASMSTART 17518; GFX900-NEXT: ; def s[4:7] 17519; GFX900-NEXT: ;;#ASMEND 17520; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s5 17521; GFX900-NEXT: ;;#ASMSTART 17522; GFX900-NEXT: ; use s8 17523; GFX900-NEXT: ;;#ASMEND 17524; GFX900-NEXT: s_setpc_b64 s[30:31] 17525; 17526; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_3: 17527; GFX90A: ; %bb.0: 17528; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17529; GFX90A-NEXT: ;;#ASMSTART 17530; GFX90A-NEXT: ; def s[4:7] 17531; GFX90A-NEXT: ;;#ASMEND 17532; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s5 17533; GFX90A-NEXT: ;;#ASMSTART 17534; GFX90A-NEXT: ; use s8 17535; GFX90A-NEXT: ;;#ASMEND 17536; GFX90A-NEXT: s_setpc_b64 s[30:31] 17537; 17538; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_3: 17539; GFX940: ; %bb.0: 17540; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17541; GFX940-NEXT: ;;#ASMSTART 17542; GFX940-NEXT: ; def s[0:3] 17543; GFX940-NEXT: ;;#ASMEND 17544; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 17545; GFX940-NEXT: ;;#ASMSTART 17546; GFX940-NEXT: ; use s8 17547; GFX940-NEXT: ;;#ASMEND 17548; GFX940-NEXT: s_setpc_b64 s[30:31] 17549 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17550 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 3> 17551 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17552 ret void 17553} 17554 17555define void @s_shuffle_v2i16_v8i16__2_3() { 17556; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_3: 17557; GFX900: ; %bb.0: 17558; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17559; GFX900-NEXT: ;;#ASMSTART 17560; GFX900-NEXT: ; def s[4:7] 17561; GFX900-NEXT: ;;#ASMEND 17562; GFX900-NEXT: s_mov_b32 s8, s5 17563; GFX900-NEXT: ;;#ASMSTART 17564; GFX900-NEXT: ; use s8 17565; GFX900-NEXT: ;;#ASMEND 17566; GFX900-NEXT: s_setpc_b64 s[30:31] 17567; 17568; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_3: 17569; GFX90A: ; %bb.0: 17570; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17571; GFX90A-NEXT: ;;#ASMSTART 17572; GFX90A-NEXT: ; def s[4:7] 17573; GFX90A-NEXT: ;;#ASMEND 17574; GFX90A-NEXT: s_mov_b32 s8, s5 17575; GFX90A-NEXT: ;;#ASMSTART 17576; GFX90A-NEXT: ; use s8 17577; GFX90A-NEXT: ;;#ASMEND 17578; GFX90A-NEXT: s_setpc_b64 s[30:31] 17579; 17580; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_3: 17581; GFX940: ; %bb.0: 17582; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17583; GFX940-NEXT: ;;#ASMSTART 17584; GFX940-NEXT: ; def s[0:3] 17585; GFX940-NEXT: ;;#ASMEND 17586; GFX940-NEXT: s_mov_b32 s8, s1 17587; GFX940-NEXT: ;;#ASMSTART 17588; GFX940-NEXT: ; use s8 17589; GFX940-NEXT: ;;#ASMEND 17590; GFX940-NEXT: s_setpc_b64 s[30:31] 17591 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17592 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 3> 17593 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17594 ret void 17595} 17596 17597define void @s_shuffle_v2i16_v8i16__3_3() { 17598; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_3: 17599; GFX900: ; %bb.0: 17600; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17601; GFX900-NEXT: ;;#ASMSTART 17602; GFX900-NEXT: ; def s[4:7] 17603; GFX900-NEXT: ;;#ASMEND 17604; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 17605; GFX900-NEXT: ;;#ASMSTART 17606; GFX900-NEXT: ; use s8 17607; GFX900-NEXT: ;;#ASMEND 17608; GFX900-NEXT: s_setpc_b64 s[30:31] 17609; 17610; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_3: 17611; GFX90A: ; %bb.0: 17612; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17613; GFX90A-NEXT: ;;#ASMSTART 17614; GFX90A-NEXT: ; def s[4:7] 17615; GFX90A-NEXT: ;;#ASMEND 17616; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 17617; GFX90A-NEXT: ;;#ASMSTART 17618; GFX90A-NEXT: ; use s8 17619; GFX90A-NEXT: ;;#ASMEND 17620; GFX90A-NEXT: s_setpc_b64 s[30:31] 17621; 17622; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_3: 17623; GFX940: ; %bb.0: 17624; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17625; GFX940-NEXT: ;;#ASMSTART 17626; GFX940-NEXT: ; def s[0:3] 17627; GFX940-NEXT: ;;#ASMEND 17628; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 17629; GFX940-NEXT: ;;#ASMSTART 17630; GFX940-NEXT: ; use s8 17631; GFX940-NEXT: ;;#ASMEND 17632; GFX940-NEXT: s_setpc_b64 s[30:31] 17633 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17634 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 3> 17635 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17636 ret void 17637} 17638 17639define void @s_shuffle_v2i16_v8i16__4_3() { 17640; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_3: 17641; GFX900: ; %bb.0: 17642; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17643; GFX900-NEXT: ;;#ASMSTART 17644; GFX900-NEXT: ; def s[4:7] 17645; GFX900-NEXT: ;;#ASMEND 17646; GFX900-NEXT: s_pack_lh_b32_b16 s8, s6, s5 17647; GFX900-NEXT: ;;#ASMSTART 17648; GFX900-NEXT: ; use s8 17649; GFX900-NEXT: ;;#ASMEND 17650; GFX900-NEXT: s_setpc_b64 s[30:31] 17651; 17652; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_3: 17653; GFX90A: ; %bb.0: 17654; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17655; GFX90A-NEXT: ;;#ASMSTART 17656; GFX90A-NEXT: ; def s[4:7] 17657; GFX90A-NEXT: ;;#ASMEND 17658; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s6, s5 17659; GFX90A-NEXT: ;;#ASMSTART 17660; GFX90A-NEXT: ; use s8 17661; GFX90A-NEXT: ;;#ASMEND 17662; GFX90A-NEXT: s_setpc_b64 s[30:31] 17663; 17664; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_3: 17665; GFX940: ; %bb.0: 17666; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17667; GFX940-NEXT: ;;#ASMSTART 17668; GFX940-NEXT: ; def s[0:3] 17669; GFX940-NEXT: ;;#ASMEND 17670; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s1 17671; GFX940-NEXT: ;;#ASMSTART 17672; GFX940-NEXT: ; use s8 17673; GFX940-NEXT: ;;#ASMEND 17674; GFX940-NEXT: s_setpc_b64 s[30:31] 17675 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17676 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 3> 17677 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17678 ret void 17679} 17680 17681define void @s_shuffle_v2i16_v8i16__5_3() { 17682; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_3: 17683; GFX900: ; %bb.0: 17684; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17685; GFX900-NEXT: ;;#ASMSTART 17686; GFX900-NEXT: ; def s[4:7] 17687; GFX900-NEXT: ;;#ASMEND 17688; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s5 17689; GFX900-NEXT: ;;#ASMSTART 17690; GFX900-NEXT: ; use s8 17691; GFX900-NEXT: ;;#ASMEND 17692; GFX900-NEXT: s_setpc_b64 s[30:31] 17693; 17694; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_3: 17695; GFX90A: ; %bb.0: 17696; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17697; GFX90A-NEXT: ;;#ASMSTART 17698; GFX90A-NEXT: ; def s[4:7] 17699; GFX90A-NEXT: ;;#ASMEND 17700; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s5 17701; GFX90A-NEXT: ;;#ASMSTART 17702; GFX90A-NEXT: ; use s8 17703; GFX90A-NEXT: ;;#ASMEND 17704; GFX90A-NEXT: s_setpc_b64 s[30:31] 17705; 17706; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_3: 17707; GFX940: ; %bb.0: 17708; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17709; GFX940-NEXT: ;;#ASMSTART 17710; GFX940-NEXT: ; def s[0:3] 17711; GFX940-NEXT: ;;#ASMEND 17712; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s1 17713; GFX940-NEXT: ;;#ASMSTART 17714; GFX940-NEXT: ; use s8 17715; GFX940-NEXT: ;;#ASMEND 17716; GFX940-NEXT: s_setpc_b64 s[30:31] 17717 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17718 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 3> 17719 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17720 ret void 17721} 17722 17723define void @s_shuffle_v2i16_v8i16__6_3() { 17724; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_3: 17725; GFX900: ; %bb.0: 17726; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17727; GFX900-NEXT: ;;#ASMSTART 17728; GFX900-NEXT: ; def s[4:7] 17729; GFX900-NEXT: ;;#ASMEND 17730; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s5 17731; GFX900-NEXT: ;;#ASMSTART 17732; GFX900-NEXT: ; use s8 17733; GFX900-NEXT: ;;#ASMEND 17734; GFX900-NEXT: s_setpc_b64 s[30:31] 17735; 17736; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_3: 17737; GFX90A: ; %bb.0: 17738; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17739; GFX90A-NEXT: ;;#ASMSTART 17740; GFX90A-NEXT: ; def s[4:7] 17741; GFX90A-NEXT: ;;#ASMEND 17742; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s5 17743; GFX90A-NEXT: ;;#ASMSTART 17744; GFX90A-NEXT: ; use s8 17745; GFX90A-NEXT: ;;#ASMEND 17746; GFX90A-NEXT: s_setpc_b64 s[30:31] 17747; 17748; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_3: 17749; GFX940: ; %bb.0: 17750; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17751; GFX940-NEXT: ;;#ASMSTART 17752; GFX940-NEXT: ; def s[0:3] 17753; GFX940-NEXT: ;;#ASMEND 17754; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s1 17755; GFX940-NEXT: ;;#ASMSTART 17756; GFX940-NEXT: ; use s8 17757; GFX940-NEXT: ;;#ASMEND 17758; GFX940-NEXT: s_setpc_b64 s[30:31] 17759 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17760 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 3> 17761 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17762 ret void 17763} 17764 17765define void @s_shuffle_v2i16_v8i16__7_3() { 17766; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_3: 17767; GFX900: ; %bb.0: 17768; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17769; GFX900-NEXT: ;;#ASMSTART 17770; GFX900-NEXT: ; def s[4:7] 17771; GFX900-NEXT: ;;#ASMEND 17772; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s5 17773; GFX900-NEXT: ;;#ASMSTART 17774; GFX900-NEXT: ; use s8 17775; GFX900-NEXT: ;;#ASMEND 17776; GFX900-NEXT: s_setpc_b64 s[30:31] 17777; 17778; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_3: 17779; GFX90A: ; %bb.0: 17780; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17781; GFX90A-NEXT: ;;#ASMSTART 17782; GFX90A-NEXT: ; def s[4:7] 17783; GFX90A-NEXT: ;;#ASMEND 17784; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s5 17785; GFX90A-NEXT: ;;#ASMSTART 17786; GFX90A-NEXT: ; use s8 17787; GFX90A-NEXT: ;;#ASMEND 17788; GFX90A-NEXT: s_setpc_b64 s[30:31] 17789; 17790; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_3: 17791; GFX940: ; %bb.0: 17792; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17793; GFX940-NEXT: ;;#ASMSTART 17794; GFX940-NEXT: ; def s[0:3] 17795; GFX940-NEXT: ;;#ASMEND 17796; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s1 17797; GFX940-NEXT: ;;#ASMSTART 17798; GFX940-NEXT: ; use s8 17799; GFX940-NEXT: ;;#ASMEND 17800; GFX940-NEXT: s_setpc_b64 s[30:31] 17801 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17802 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 3> 17803 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17804 ret void 17805} 17806 17807define void @s_shuffle_v2i16_v8i16__8_3() { 17808; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_3: 17809; GFX900: ; %bb.0: 17810; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17811; GFX900-NEXT: ;;#ASMSTART 17812; GFX900-NEXT: ; def s[4:7] 17813; GFX900-NEXT: ;;#ASMEND 17814; GFX900-NEXT: s_mov_b32 s8, s5 17815; GFX900-NEXT: ;;#ASMSTART 17816; GFX900-NEXT: ; use s8 17817; GFX900-NEXT: ;;#ASMEND 17818; GFX900-NEXT: s_setpc_b64 s[30:31] 17819; 17820; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_3: 17821; GFX90A: ; %bb.0: 17822; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17823; GFX90A-NEXT: ;;#ASMSTART 17824; GFX90A-NEXT: ; def s[4:7] 17825; GFX90A-NEXT: ;;#ASMEND 17826; GFX90A-NEXT: s_mov_b32 s8, s5 17827; GFX90A-NEXT: ;;#ASMSTART 17828; GFX90A-NEXT: ; use s8 17829; GFX90A-NEXT: ;;#ASMEND 17830; GFX90A-NEXT: s_setpc_b64 s[30:31] 17831; 17832; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_3: 17833; GFX940: ; %bb.0: 17834; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17835; GFX940-NEXT: ;;#ASMSTART 17836; GFX940-NEXT: ; def s[0:3] 17837; GFX940-NEXT: ;;#ASMEND 17838; GFX940-NEXT: s_mov_b32 s8, s1 17839; GFX940-NEXT: ;;#ASMSTART 17840; GFX940-NEXT: ; use s8 17841; GFX940-NEXT: ;;#ASMEND 17842; GFX940-NEXT: s_setpc_b64 s[30:31] 17843 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17844 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 3> 17845 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17846 ret void 17847} 17848 17849define void @s_shuffle_v2i16_v8i16__9_3() { 17850; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_3: 17851; GFX900: ; %bb.0: 17852; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17853; GFX900-NEXT: ;;#ASMSTART 17854; GFX900-NEXT: ; def s[8:11] 17855; GFX900-NEXT: ;;#ASMEND 17856; GFX900-NEXT: ;;#ASMSTART 17857; GFX900-NEXT: ; def s[4:7] 17858; GFX900-NEXT: ;;#ASMEND 17859; GFX900-NEXT: s_pack_hh_b32_b16 s8, s8, s5 17860; GFX900-NEXT: ;;#ASMSTART 17861; GFX900-NEXT: ; use s8 17862; GFX900-NEXT: ;;#ASMEND 17863; GFX900-NEXT: s_setpc_b64 s[30:31] 17864; 17865; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_3: 17866; GFX90A: ; %bb.0: 17867; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17868; GFX90A-NEXT: ;;#ASMSTART 17869; GFX90A-NEXT: ; def s[8:11] 17870; GFX90A-NEXT: ;;#ASMEND 17871; GFX90A-NEXT: ;;#ASMSTART 17872; GFX90A-NEXT: ; def s[4:7] 17873; GFX90A-NEXT: ;;#ASMEND 17874; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s8, s5 17875; GFX90A-NEXT: ;;#ASMSTART 17876; GFX90A-NEXT: ; use s8 17877; GFX90A-NEXT: ;;#ASMEND 17878; GFX90A-NEXT: s_setpc_b64 s[30:31] 17879; 17880; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_3: 17881; GFX940: ; %bb.0: 17882; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17883; GFX940-NEXT: ;;#ASMSTART 17884; GFX940-NEXT: ; def s[0:3] 17885; GFX940-NEXT: ;;#ASMEND 17886; GFX940-NEXT: ;;#ASMSTART 17887; GFX940-NEXT: ; def s[4:7] 17888; GFX940-NEXT: ;;#ASMEND 17889; GFX940-NEXT: s_pack_hh_b32_b16 s8, s4, s1 17890; GFX940-NEXT: ;;#ASMSTART 17891; GFX940-NEXT: ; use s8 17892; GFX940-NEXT: ;;#ASMEND 17893; GFX940-NEXT: s_setpc_b64 s[30:31] 17894 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17895 %vec1 = call <8 x i16> asm "; def $0", "=s"() 17896 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 3> 17897 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17898 ret void 17899} 17900 17901define void @s_shuffle_v2i16_v8i16__10_3() { 17902; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_3: 17903; GFX900: ; %bb.0: 17904; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17905; GFX900-NEXT: ;;#ASMSTART 17906; GFX900-NEXT: ; def s[8:11] 17907; GFX900-NEXT: ;;#ASMEND 17908; GFX900-NEXT: ;;#ASMSTART 17909; GFX900-NEXT: ; def s[4:7] 17910; GFX900-NEXT: ;;#ASMEND 17911; GFX900-NEXT: s_pack_lh_b32_b16 s8, s9, s5 17912; GFX900-NEXT: ;;#ASMSTART 17913; GFX900-NEXT: ; use s8 17914; GFX900-NEXT: ;;#ASMEND 17915; GFX900-NEXT: s_setpc_b64 s[30:31] 17916; 17917; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_3: 17918; GFX90A: ; %bb.0: 17919; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17920; GFX90A-NEXT: ;;#ASMSTART 17921; GFX90A-NEXT: ; def s[8:11] 17922; GFX90A-NEXT: ;;#ASMEND 17923; GFX90A-NEXT: ;;#ASMSTART 17924; GFX90A-NEXT: ; def s[4:7] 17925; GFX90A-NEXT: ;;#ASMEND 17926; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s9, s5 17927; GFX90A-NEXT: ;;#ASMSTART 17928; GFX90A-NEXT: ; use s8 17929; GFX90A-NEXT: ;;#ASMEND 17930; GFX90A-NEXT: s_setpc_b64 s[30:31] 17931; 17932; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_3: 17933; GFX940: ; %bb.0: 17934; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17935; GFX940-NEXT: ;;#ASMSTART 17936; GFX940-NEXT: ; def s[0:3] 17937; GFX940-NEXT: ;;#ASMEND 17938; GFX940-NEXT: ;;#ASMSTART 17939; GFX940-NEXT: ; def s[4:7] 17940; GFX940-NEXT: ;;#ASMEND 17941; GFX940-NEXT: s_pack_lh_b32_b16 s8, s5, s1 17942; GFX940-NEXT: ;;#ASMSTART 17943; GFX940-NEXT: ; use s8 17944; GFX940-NEXT: ;;#ASMEND 17945; GFX940-NEXT: s_setpc_b64 s[30:31] 17946 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17947 %vec1 = call <8 x i16> asm "; def $0", "=s"() 17948 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 3> 17949 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 17950 ret void 17951} 17952 17953define void @s_shuffle_v2i16_v8i16__11_3() { 17954; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_3: 17955; GFX900: ; %bb.0: 17956; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17957; GFX900-NEXT: ;;#ASMSTART 17958; GFX900-NEXT: ; def s[8:11] 17959; GFX900-NEXT: ;;#ASMEND 17960; GFX900-NEXT: ;;#ASMSTART 17961; GFX900-NEXT: ; def s[4:7] 17962; GFX900-NEXT: ;;#ASMEND 17963; GFX900-NEXT: s_pack_hh_b32_b16 s8, s9, s5 17964; GFX900-NEXT: ;;#ASMSTART 17965; GFX900-NEXT: ; use s8 17966; GFX900-NEXT: ;;#ASMEND 17967; GFX900-NEXT: s_setpc_b64 s[30:31] 17968; 17969; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_3: 17970; GFX90A: ; %bb.0: 17971; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17972; GFX90A-NEXT: ;;#ASMSTART 17973; GFX90A-NEXT: ; def s[8:11] 17974; GFX90A-NEXT: ;;#ASMEND 17975; GFX90A-NEXT: ;;#ASMSTART 17976; GFX90A-NEXT: ; def s[4:7] 17977; GFX90A-NEXT: ;;#ASMEND 17978; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s9, s5 17979; GFX90A-NEXT: ;;#ASMSTART 17980; GFX90A-NEXT: ; use s8 17981; GFX90A-NEXT: ;;#ASMEND 17982; GFX90A-NEXT: s_setpc_b64 s[30:31] 17983; 17984; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_3: 17985; GFX940: ; %bb.0: 17986; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17987; GFX940-NEXT: ;;#ASMSTART 17988; GFX940-NEXT: ; def s[0:3] 17989; GFX940-NEXT: ;;#ASMEND 17990; GFX940-NEXT: ;;#ASMSTART 17991; GFX940-NEXT: ; def s[4:7] 17992; GFX940-NEXT: ;;#ASMEND 17993; GFX940-NEXT: s_pack_hh_b32_b16 s8, s5, s1 17994; GFX940-NEXT: ;;#ASMSTART 17995; GFX940-NEXT: ; use s8 17996; GFX940-NEXT: ;;#ASMEND 17997; GFX940-NEXT: s_setpc_b64 s[30:31] 17998 %vec0 = call <8 x i16> asm "; def $0", "=s"() 17999 %vec1 = call <8 x i16> asm "; def $0", "=s"() 18000 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 3> 18001 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18002 ret void 18003} 18004 18005define void @s_shuffle_v2i16_v8i16__12_3() { 18006; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_3: 18007; GFX900: ; %bb.0: 18008; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18009; GFX900-NEXT: ;;#ASMSTART 18010; GFX900-NEXT: ; def s[8:11] 18011; GFX900-NEXT: ;;#ASMEND 18012; GFX900-NEXT: ;;#ASMSTART 18013; GFX900-NEXT: ; def s[4:7] 18014; GFX900-NEXT: ;;#ASMEND 18015; GFX900-NEXT: s_pack_lh_b32_b16 s8, s10, s5 18016; GFX900-NEXT: ;;#ASMSTART 18017; GFX900-NEXT: ; use s8 18018; GFX900-NEXT: ;;#ASMEND 18019; GFX900-NEXT: s_setpc_b64 s[30:31] 18020; 18021; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_3: 18022; GFX90A: ; %bb.0: 18023; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18024; GFX90A-NEXT: ;;#ASMSTART 18025; GFX90A-NEXT: ; def s[8:11] 18026; GFX90A-NEXT: ;;#ASMEND 18027; GFX90A-NEXT: ;;#ASMSTART 18028; GFX90A-NEXT: ; def s[4:7] 18029; GFX90A-NEXT: ;;#ASMEND 18030; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s10, s5 18031; GFX90A-NEXT: ;;#ASMSTART 18032; GFX90A-NEXT: ; use s8 18033; GFX90A-NEXT: ;;#ASMEND 18034; GFX90A-NEXT: s_setpc_b64 s[30:31] 18035; 18036; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_3: 18037; GFX940: ; %bb.0: 18038; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18039; GFX940-NEXT: ;;#ASMSTART 18040; GFX940-NEXT: ; def s[0:3] 18041; GFX940-NEXT: ;;#ASMEND 18042; GFX940-NEXT: ;;#ASMSTART 18043; GFX940-NEXT: ; def s[4:7] 18044; GFX940-NEXT: ;;#ASMEND 18045; GFX940-NEXT: s_pack_lh_b32_b16 s8, s6, s1 18046; GFX940-NEXT: ;;#ASMSTART 18047; GFX940-NEXT: ; use s8 18048; GFX940-NEXT: ;;#ASMEND 18049; GFX940-NEXT: s_setpc_b64 s[30:31] 18050 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18051 %vec1 = call <8 x i16> asm "; def $0", "=s"() 18052 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 3> 18053 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18054 ret void 18055} 18056 18057define void @s_shuffle_v2i16_v8i16__13_3() { 18058; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_3: 18059; GFX900: ; %bb.0: 18060; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18061; GFX900-NEXT: ;;#ASMSTART 18062; GFX900-NEXT: ; def s[8:11] 18063; GFX900-NEXT: ;;#ASMEND 18064; GFX900-NEXT: ;;#ASMSTART 18065; GFX900-NEXT: ; def s[4:7] 18066; GFX900-NEXT: ;;#ASMEND 18067; GFX900-NEXT: s_pack_hh_b32_b16 s8, s10, s5 18068; GFX900-NEXT: ;;#ASMSTART 18069; GFX900-NEXT: ; use s8 18070; GFX900-NEXT: ;;#ASMEND 18071; GFX900-NEXT: s_setpc_b64 s[30:31] 18072; 18073; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_3: 18074; GFX90A: ; %bb.0: 18075; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18076; GFX90A-NEXT: ;;#ASMSTART 18077; GFX90A-NEXT: ; def s[8:11] 18078; GFX90A-NEXT: ;;#ASMEND 18079; GFX90A-NEXT: ;;#ASMSTART 18080; GFX90A-NEXT: ; def s[4:7] 18081; GFX90A-NEXT: ;;#ASMEND 18082; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s10, s5 18083; GFX90A-NEXT: ;;#ASMSTART 18084; GFX90A-NEXT: ; use s8 18085; GFX90A-NEXT: ;;#ASMEND 18086; GFX90A-NEXT: s_setpc_b64 s[30:31] 18087; 18088; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_3: 18089; GFX940: ; %bb.0: 18090; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18091; GFX940-NEXT: ;;#ASMSTART 18092; GFX940-NEXT: ; def s[0:3] 18093; GFX940-NEXT: ;;#ASMEND 18094; GFX940-NEXT: ;;#ASMSTART 18095; GFX940-NEXT: ; def s[4:7] 18096; GFX940-NEXT: ;;#ASMEND 18097; GFX940-NEXT: s_pack_hh_b32_b16 s8, s6, s1 18098; GFX940-NEXT: ;;#ASMSTART 18099; GFX940-NEXT: ; use s8 18100; GFX940-NEXT: ;;#ASMEND 18101; GFX940-NEXT: s_setpc_b64 s[30:31] 18102 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18103 %vec1 = call <8 x i16> asm "; def $0", "=s"() 18104 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 3> 18105 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18106 ret void 18107} 18108 18109define void @s_shuffle_v2i16_v8i16__14_3() { 18110; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_3: 18111; GFX900: ; %bb.0: 18112; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18113; GFX900-NEXT: ;;#ASMSTART 18114; GFX900-NEXT: ; def s[8:11] 18115; GFX900-NEXT: ;;#ASMEND 18116; GFX900-NEXT: ;;#ASMSTART 18117; GFX900-NEXT: ; def s[4:7] 18118; GFX900-NEXT: ;;#ASMEND 18119; GFX900-NEXT: s_pack_lh_b32_b16 s8, s11, s5 18120; GFX900-NEXT: ;;#ASMSTART 18121; GFX900-NEXT: ; use s8 18122; GFX900-NEXT: ;;#ASMEND 18123; GFX900-NEXT: s_setpc_b64 s[30:31] 18124; 18125; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_3: 18126; GFX90A: ; %bb.0: 18127; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18128; GFX90A-NEXT: ;;#ASMSTART 18129; GFX90A-NEXT: ; def s[8:11] 18130; GFX90A-NEXT: ;;#ASMEND 18131; GFX90A-NEXT: ;;#ASMSTART 18132; GFX90A-NEXT: ; def s[4:7] 18133; GFX90A-NEXT: ;;#ASMEND 18134; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s11, s5 18135; GFX90A-NEXT: ;;#ASMSTART 18136; GFX90A-NEXT: ; use s8 18137; GFX90A-NEXT: ;;#ASMEND 18138; GFX90A-NEXT: s_setpc_b64 s[30:31] 18139; 18140; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_3: 18141; GFX940: ; %bb.0: 18142; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18143; GFX940-NEXT: ;;#ASMSTART 18144; GFX940-NEXT: ; def s[0:3] 18145; GFX940-NEXT: ;;#ASMEND 18146; GFX940-NEXT: ;;#ASMSTART 18147; GFX940-NEXT: ; def s[4:7] 18148; GFX940-NEXT: ;;#ASMEND 18149; GFX940-NEXT: s_pack_lh_b32_b16 s8, s7, s1 18150; GFX940-NEXT: ;;#ASMSTART 18151; GFX940-NEXT: ; use s8 18152; GFX940-NEXT: ;;#ASMEND 18153; GFX940-NEXT: s_setpc_b64 s[30:31] 18154 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18155 %vec1 = call <8 x i16> asm "; def $0", "=s"() 18156 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 3> 18157 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18158 ret void 18159} 18160 18161define void @s_shuffle_v2i16_v8i16__u_4() { 18162; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_4: 18163; GFX900: ; %bb.0: 18164; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18165; GFX900-NEXT: ;;#ASMSTART 18166; GFX900-NEXT: ; def s[4:7] 18167; GFX900-NEXT: ;;#ASMEND 18168; GFX900-NEXT: s_lshl_b32 s8, s6, 16 18169; GFX900-NEXT: ;;#ASMSTART 18170; GFX900-NEXT: ; use s8 18171; GFX900-NEXT: ;;#ASMEND 18172; GFX900-NEXT: s_setpc_b64 s[30:31] 18173; 18174; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_4: 18175; GFX90A: ; %bb.0: 18176; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18177; GFX90A-NEXT: ;;#ASMSTART 18178; GFX90A-NEXT: ; def s[4:7] 18179; GFX90A-NEXT: ;;#ASMEND 18180; GFX90A-NEXT: s_lshl_b32 s8, s6, 16 18181; GFX90A-NEXT: ;;#ASMSTART 18182; GFX90A-NEXT: ; use s8 18183; GFX90A-NEXT: ;;#ASMEND 18184; GFX90A-NEXT: s_setpc_b64 s[30:31] 18185; 18186; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_4: 18187; GFX940: ; %bb.0: 18188; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18189; GFX940-NEXT: ;;#ASMSTART 18190; GFX940-NEXT: ; def s[0:3] 18191; GFX940-NEXT: ;;#ASMEND 18192; GFX940-NEXT: s_lshl_b32 s8, s2, 16 18193; GFX940-NEXT: ;;#ASMSTART 18194; GFX940-NEXT: ; use s8 18195; GFX940-NEXT: ;;#ASMEND 18196; GFX940-NEXT: s_setpc_b64 s[30:31] 18197 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18198 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 4> 18199 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18200 ret void 18201} 18202 18203define void @s_shuffle_v2i16_v8i16__0_4() { 18204; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_4: 18205; GFX900: ; %bb.0: 18206; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18207; GFX900-NEXT: ;;#ASMSTART 18208; GFX900-NEXT: ; def s[4:7] 18209; GFX900-NEXT: ;;#ASMEND 18210; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18211; GFX900-NEXT: ;;#ASMSTART 18212; GFX900-NEXT: ; use s8 18213; GFX900-NEXT: ;;#ASMEND 18214; GFX900-NEXT: s_setpc_b64 s[30:31] 18215; 18216; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_4: 18217; GFX90A: ; %bb.0: 18218; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18219; GFX90A-NEXT: ;;#ASMSTART 18220; GFX90A-NEXT: ; def s[4:7] 18221; GFX90A-NEXT: ;;#ASMEND 18222; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18223; GFX90A-NEXT: ;;#ASMSTART 18224; GFX90A-NEXT: ; use s8 18225; GFX90A-NEXT: ;;#ASMEND 18226; GFX90A-NEXT: s_setpc_b64 s[30:31] 18227; 18228; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_4: 18229; GFX940: ; %bb.0: 18230; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18231; GFX940-NEXT: ;;#ASMSTART 18232; GFX940-NEXT: ; def s[0:3] 18233; GFX940-NEXT: ;;#ASMEND 18234; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 18235; GFX940-NEXT: ;;#ASMSTART 18236; GFX940-NEXT: ; use s8 18237; GFX940-NEXT: ;;#ASMEND 18238; GFX940-NEXT: s_setpc_b64 s[30:31] 18239 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18240 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 4> 18241 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18242 ret void 18243} 18244 18245define void @s_shuffle_v2i16_v8i16__1_4() { 18246; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_4: 18247; GFX900: ; %bb.0: 18248; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18249; GFX900-NEXT: ;;#ASMSTART 18250; GFX900-NEXT: ; def s[4:7] 18251; GFX900-NEXT: ;;#ASMEND 18252; GFX900-NEXT: s_lshr_b32 s4, s4, 16 18253; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18254; GFX900-NEXT: ;;#ASMSTART 18255; GFX900-NEXT: ; use s8 18256; GFX900-NEXT: ;;#ASMEND 18257; GFX900-NEXT: s_setpc_b64 s[30:31] 18258; 18259; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_4: 18260; GFX90A: ; %bb.0: 18261; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18262; GFX90A-NEXT: ;;#ASMSTART 18263; GFX90A-NEXT: ; def s[4:7] 18264; GFX90A-NEXT: ;;#ASMEND 18265; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 18266; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18267; GFX90A-NEXT: ;;#ASMSTART 18268; GFX90A-NEXT: ; use s8 18269; GFX90A-NEXT: ;;#ASMEND 18270; GFX90A-NEXT: s_setpc_b64 s[30:31] 18271; 18272; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_4: 18273; GFX940: ; %bb.0: 18274; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18275; GFX940-NEXT: ;;#ASMSTART 18276; GFX940-NEXT: ; def s[0:3] 18277; GFX940-NEXT: ;;#ASMEND 18278; GFX940-NEXT: s_lshr_b32 s0, s0, 16 18279; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 18280; GFX940-NEXT: ;;#ASMSTART 18281; GFX940-NEXT: ; use s8 18282; GFX940-NEXT: ;;#ASMEND 18283; GFX940-NEXT: s_setpc_b64 s[30:31] 18284 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18285 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 4> 18286 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18287 ret void 18288} 18289 18290define void @s_shuffle_v2i16_v8i16__2_4() { 18291; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_4: 18292; GFX900: ; %bb.0: 18293; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18294; GFX900-NEXT: ;;#ASMSTART 18295; GFX900-NEXT: ; def s[4:7] 18296; GFX900-NEXT: ;;#ASMEND 18297; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6 18298; GFX900-NEXT: ;;#ASMSTART 18299; GFX900-NEXT: ; use s8 18300; GFX900-NEXT: ;;#ASMEND 18301; GFX900-NEXT: s_setpc_b64 s[30:31] 18302; 18303; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_4: 18304; GFX90A: ; %bb.0: 18305; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18306; GFX90A-NEXT: ;;#ASMSTART 18307; GFX90A-NEXT: ; def s[4:7] 18308; GFX90A-NEXT: ;;#ASMEND 18309; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6 18310; GFX90A-NEXT: ;;#ASMSTART 18311; GFX90A-NEXT: ; use s8 18312; GFX90A-NEXT: ;;#ASMEND 18313; GFX90A-NEXT: s_setpc_b64 s[30:31] 18314; 18315; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_4: 18316; GFX940: ; %bb.0: 18317; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18318; GFX940-NEXT: ;;#ASMSTART 18319; GFX940-NEXT: ; def s[0:3] 18320; GFX940-NEXT: ;;#ASMEND 18321; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 18322; GFX940-NEXT: ;;#ASMSTART 18323; GFX940-NEXT: ; use s8 18324; GFX940-NEXT: ;;#ASMEND 18325; GFX940-NEXT: s_setpc_b64 s[30:31] 18326 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18327 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 4> 18328 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18329 ret void 18330} 18331 18332define void @s_shuffle_v2i16_v8i16__3_4() { 18333; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_4: 18334; GFX900: ; %bb.0: 18335; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18336; GFX900-NEXT: ;;#ASMSTART 18337; GFX900-NEXT: ; def s[4:7] 18338; GFX900-NEXT: ;;#ASMEND 18339; GFX900-NEXT: s_lshr_b32 s4, s5, 16 18340; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18341; GFX900-NEXT: ;;#ASMSTART 18342; GFX900-NEXT: ; use s8 18343; GFX900-NEXT: ;;#ASMEND 18344; GFX900-NEXT: s_setpc_b64 s[30:31] 18345; 18346; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_4: 18347; GFX90A: ; %bb.0: 18348; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18349; GFX90A-NEXT: ;;#ASMSTART 18350; GFX90A-NEXT: ; def s[4:7] 18351; GFX90A-NEXT: ;;#ASMEND 18352; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 18353; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18354; GFX90A-NEXT: ;;#ASMSTART 18355; GFX90A-NEXT: ; use s8 18356; GFX90A-NEXT: ;;#ASMEND 18357; GFX90A-NEXT: s_setpc_b64 s[30:31] 18358; 18359; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_4: 18360; GFX940: ; %bb.0: 18361; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18362; GFX940-NEXT: ;;#ASMSTART 18363; GFX940-NEXT: ; def s[0:3] 18364; GFX940-NEXT: ;;#ASMEND 18365; GFX940-NEXT: s_lshr_b32 s0, s1, 16 18366; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 18367; GFX940-NEXT: ;;#ASMSTART 18368; GFX940-NEXT: ; use s8 18369; GFX940-NEXT: ;;#ASMEND 18370; GFX940-NEXT: s_setpc_b64 s[30:31] 18371 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18372 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 4> 18373 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18374 ret void 18375} 18376 18377define void @s_shuffle_v2i16_v8i16__4_4() { 18378; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_4: 18379; GFX900: ; %bb.0: 18380; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18381; GFX900-NEXT: ;;#ASMSTART 18382; GFX900-NEXT: ; def s[4:7] 18383; GFX900-NEXT: ;;#ASMEND 18384; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s6 18385; GFX900-NEXT: ;;#ASMSTART 18386; GFX900-NEXT: ; use s8 18387; GFX900-NEXT: ;;#ASMEND 18388; GFX900-NEXT: s_setpc_b64 s[30:31] 18389; 18390; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_4: 18391; GFX90A: ; %bb.0: 18392; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18393; GFX90A-NEXT: ;;#ASMSTART 18394; GFX90A-NEXT: ; def s[4:7] 18395; GFX90A-NEXT: ;;#ASMEND 18396; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s6 18397; GFX90A-NEXT: ;;#ASMSTART 18398; GFX90A-NEXT: ; use s8 18399; GFX90A-NEXT: ;;#ASMEND 18400; GFX90A-NEXT: s_setpc_b64 s[30:31] 18401; 18402; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_4: 18403; GFX940: ; %bb.0: 18404; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18405; GFX940-NEXT: ;;#ASMSTART 18406; GFX940-NEXT: ; def s[0:3] 18407; GFX940-NEXT: ;;#ASMEND 18408; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s2 18409; GFX940-NEXT: ;;#ASMSTART 18410; GFX940-NEXT: ; use s8 18411; GFX940-NEXT: ;;#ASMEND 18412; GFX940-NEXT: s_setpc_b64 s[30:31] 18413 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18414 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 4> 18415 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18416 ret void 18417} 18418 18419define void @s_shuffle_v2i16_v8i16__5_4() { 18420; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_4: 18421; GFX900: ; %bb.0: 18422; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18423; GFX900-NEXT: ;;#ASMSTART 18424; GFX900-NEXT: ; def s[4:7] 18425; GFX900-NEXT: ;;#ASMEND 18426; GFX900-NEXT: s_lshr_b32 s4, s6, 16 18427; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18428; GFX900-NEXT: ;;#ASMSTART 18429; GFX900-NEXT: ; use s8 18430; GFX900-NEXT: ;;#ASMEND 18431; GFX900-NEXT: s_setpc_b64 s[30:31] 18432; 18433; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_4: 18434; GFX90A: ; %bb.0: 18435; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18436; GFX90A-NEXT: ;;#ASMSTART 18437; GFX90A-NEXT: ; def s[4:7] 18438; GFX90A-NEXT: ;;#ASMEND 18439; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 18440; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18441; GFX90A-NEXT: ;;#ASMSTART 18442; GFX90A-NEXT: ; use s8 18443; GFX90A-NEXT: ;;#ASMEND 18444; GFX90A-NEXT: s_setpc_b64 s[30:31] 18445; 18446; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_4: 18447; GFX940: ; %bb.0: 18448; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18449; GFX940-NEXT: ;;#ASMSTART 18450; GFX940-NEXT: ; def s[0:3] 18451; GFX940-NEXT: ;;#ASMEND 18452; GFX940-NEXT: s_lshr_b32 s0, s2, 16 18453; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 18454; GFX940-NEXT: ;;#ASMSTART 18455; GFX940-NEXT: ; use s8 18456; GFX940-NEXT: ;;#ASMEND 18457; GFX940-NEXT: s_setpc_b64 s[30:31] 18458 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18459 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 4> 18460 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18461 ret void 18462} 18463 18464define void @s_shuffle_v2i16_v8i16__6_4() { 18465; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_4: 18466; GFX900: ; %bb.0: 18467; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18468; GFX900-NEXT: ;;#ASMSTART 18469; GFX900-NEXT: ; def s[4:7] 18470; GFX900-NEXT: ;;#ASMEND 18471; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s6 18472; GFX900-NEXT: ;;#ASMSTART 18473; GFX900-NEXT: ; use s8 18474; GFX900-NEXT: ;;#ASMEND 18475; GFX900-NEXT: s_setpc_b64 s[30:31] 18476; 18477; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_4: 18478; GFX90A: ; %bb.0: 18479; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18480; GFX90A-NEXT: ;;#ASMSTART 18481; GFX90A-NEXT: ; def s[4:7] 18482; GFX90A-NEXT: ;;#ASMEND 18483; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s6 18484; GFX90A-NEXT: ;;#ASMSTART 18485; GFX90A-NEXT: ; use s8 18486; GFX90A-NEXT: ;;#ASMEND 18487; GFX90A-NEXT: s_setpc_b64 s[30:31] 18488; 18489; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_4: 18490; GFX940: ; %bb.0: 18491; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18492; GFX940-NEXT: ;;#ASMSTART 18493; GFX940-NEXT: ; def s[0:3] 18494; GFX940-NEXT: ;;#ASMEND 18495; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 18496; GFX940-NEXT: ;;#ASMSTART 18497; GFX940-NEXT: ; use s8 18498; GFX940-NEXT: ;;#ASMEND 18499; GFX940-NEXT: s_setpc_b64 s[30:31] 18500 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18501 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 4> 18502 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18503 ret void 18504} 18505 18506define void @s_shuffle_v2i16_v8i16__7_4() { 18507; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_4: 18508; GFX900: ; %bb.0: 18509; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18510; GFX900-NEXT: ;;#ASMSTART 18511; GFX900-NEXT: ; def s[4:7] 18512; GFX900-NEXT: ;;#ASMEND 18513; GFX900-NEXT: s_lshr_b32 s4, s7, 16 18514; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18515; GFX900-NEXT: ;;#ASMSTART 18516; GFX900-NEXT: ; use s8 18517; GFX900-NEXT: ;;#ASMEND 18518; GFX900-NEXT: s_setpc_b64 s[30:31] 18519; 18520; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_4: 18521; GFX90A: ; %bb.0: 18522; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18523; GFX90A-NEXT: ;;#ASMSTART 18524; GFX90A-NEXT: ; def s[4:7] 18525; GFX90A-NEXT: ;;#ASMEND 18526; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 18527; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18528; GFX90A-NEXT: ;;#ASMSTART 18529; GFX90A-NEXT: ; use s8 18530; GFX90A-NEXT: ;;#ASMEND 18531; GFX90A-NEXT: s_setpc_b64 s[30:31] 18532; 18533; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_4: 18534; GFX940: ; %bb.0: 18535; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18536; GFX940-NEXT: ;;#ASMSTART 18537; GFX940-NEXT: ; def s[0:3] 18538; GFX940-NEXT: ;;#ASMEND 18539; GFX940-NEXT: s_lshr_b32 s0, s3, 16 18540; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 18541; GFX940-NEXT: ;;#ASMSTART 18542; GFX940-NEXT: ; use s8 18543; GFX940-NEXT: ;;#ASMEND 18544; GFX940-NEXT: s_setpc_b64 s[30:31] 18545 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18546 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 4> 18547 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18548 ret void 18549} 18550 18551define void @s_shuffle_v2i16_v8i16__8_4() { 18552; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_4: 18553; GFX900: ; %bb.0: 18554; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18555; GFX900-NEXT: ;;#ASMSTART 18556; GFX900-NEXT: ; def s[4:7] 18557; GFX900-NEXT: ;;#ASMEND 18558; GFX900-NEXT: s_lshl_b32 s8, s6, 16 18559; GFX900-NEXT: ;;#ASMSTART 18560; GFX900-NEXT: ; use s8 18561; GFX900-NEXT: ;;#ASMEND 18562; GFX900-NEXT: s_setpc_b64 s[30:31] 18563; 18564; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_4: 18565; GFX90A: ; %bb.0: 18566; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18567; GFX90A-NEXT: ;;#ASMSTART 18568; GFX90A-NEXT: ; def s[4:7] 18569; GFX90A-NEXT: ;;#ASMEND 18570; GFX90A-NEXT: s_lshl_b32 s8, s6, 16 18571; GFX90A-NEXT: ;;#ASMSTART 18572; GFX90A-NEXT: ; use s8 18573; GFX90A-NEXT: ;;#ASMEND 18574; GFX90A-NEXT: s_setpc_b64 s[30:31] 18575; 18576; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_4: 18577; GFX940: ; %bb.0: 18578; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18579; GFX940-NEXT: ;;#ASMSTART 18580; GFX940-NEXT: ; def s[0:3] 18581; GFX940-NEXT: ;;#ASMEND 18582; GFX940-NEXT: s_lshl_b32 s8, s2, 16 18583; GFX940-NEXT: ;;#ASMSTART 18584; GFX940-NEXT: ; use s8 18585; GFX940-NEXT: ;;#ASMEND 18586; GFX940-NEXT: s_setpc_b64 s[30:31] 18587 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18588 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 4> 18589 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18590 ret void 18591} 18592 18593define void @s_shuffle_v2i16_v8i16__9_4() { 18594; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_4: 18595; GFX900: ; %bb.0: 18596; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18597; GFX900-NEXT: ;;#ASMSTART 18598; GFX900-NEXT: ; def s[4:7] 18599; GFX900-NEXT: ;;#ASMEND 18600; GFX900-NEXT: ;;#ASMSTART 18601; GFX900-NEXT: ; def s[8:11] 18602; GFX900-NEXT: ;;#ASMEND 18603; GFX900-NEXT: s_lshr_b32 s4, s8, 16 18604; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18605; GFX900-NEXT: ;;#ASMSTART 18606; GFX900-NEXT: ; use s8 18607; GFX900-NEXT: ;;#ASMEND 18608; GFX900-NEXT: s_setpc_b64 s[30:31] 18609; 18610; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_4: 18611; GFX90A: ; %bb.0: 18612; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18613; GFX90A-NEXT: ;;#ASMSTART 18614; GFX90A-NEXT: ; def s[4:7] 18615; GFX90A-NEXT: ;;#ASMEND 18616; GFX90A-NEXT: ;;#ASMSTART 18617; GFX90A-NEXT: ; def s[8:11] 18618; GFX90A-NEXT: ;;#ASMEND 18619; GFX90A-NEXT: s_lshr_b32 s4, s8, 16 18620; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18621; GFX90A-NEXT: ;;#ASMSTART 18622; GFX90A-NEXT: ; use s8 18623; GFX90A-NEXT: ;;#ASMEND 18624; GFX90A-NEXT: s_setpc_b64 s[30:31] 18625; 18626; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_4: 18627; GFX940: ; %bb.0: 18628; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18629; GFX940-NEXT: ;;#ASMSTART 18630; GFX940-NEXT: ; def s[0:3] 18631; GFX940-NEXT: ;;#ASMEND 18632; GFX940-NEXT: ;;#ASMSTART 18633; GFX940-NEXT: ; def s[4:7] 18634; GFX940-NEXT: ;;#ASMEND 18635; GFX940-NEXT: s_lshr_b32 s0, s4, 16 18636; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 18637; GFX940-NEXT: ;;#ASMSTART 18638; GFX940-NEXT: ; use s8 18639; GFX940-NEXT: ;;#ASMEND 18640; GFX940-NEXT: s_setpc_b64 s[30:31] 18641 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18642 %vec1 = call <8 x i16> asm "; def $0", "=s"() 18643 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 4> 18644 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18645 ret void 18646} 18647 18648define void @s_shuffle_v2i16_v8i16__10_4() { 18649; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_4: 18650; GFX900: ; %bb.0: 18651; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18652; GFX900-NEXT: ;;#ASMSTART 18653; GFX900-NEXT: ; def s[8:11] 18654; GFX900-NEXT: ;;#ASMEND 18655; GFX900-NEXT: ;;#ASMSTART 18656; GFX900-NEXT: ; def s[4:7] 18657; GFX900-NEXT: ;;#ASMEND 18658; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s6 18659; GFX900-NEXT: ;;#ASMSTART 18660; GFX900-NEXT: ; use s8 18661; GFX900-NEXT: ;;#ASMEND 18662; GFX900-NEXT: s_setpc_b64 s[30:31] 18663; 18664; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_4: 18665; GFX90A: ; %bb.0: 18666; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18667; GFX90A-NEXT: ;;#ASMSTART 18668; GFX90A-NEXT: ; def s[8:11] 18669; GFX90A-NEXT: ;;#ASMEND 18670; GFX90A-NEXT: ;;#ASMSTART 18671; GFX90A-NEXT: ; def s[4:7] 18672; GFX90A-NEXT: ;;#ASMEND 18673; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s6 18674; GFX90A-NEXT: ;;#ASMSTART 18675; GFX90A-NEXT: ; use s8 18676; GFX90A-NEXT: ;;#ASMEND 18677; GFX90A-NEXT: s_setpc_b64 s[30:31] 18678; 18679; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_4: 18680; GFX940: ; %bb.0: 18681; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18682; GFX940-NEXT: ;;#ASMSTART 18683; GFX940-NEXT: ; def s[0:3] 18684; GFX940-NEXT: ;;#ASMEND 18685; GFX940-NEXT: ;;#ASMSTART 18686; GFX940-NEXT: ; def s[4:7] 18687; GFX940-NEXT: ;;#ASMEND 18688; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s2 18689; GFX940-NEXT: ;;#ASMSTART 18690; GFX940-NEXT: ; use s8 18691; GFX940-NEXT: ;;#ASMEND 18692; GFX940-NEXT: s_setpc_b64 s[30:31] 18693 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18694 %vec1 = call <8 x i16> asm "; def $0", "=s"() 18695 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 4> 18696 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18697 ret void 18698} 18699 18700define void @s_shuffle_v2i16_v8i16__11_4() { 18701; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_4: 18702; GFX900: ; %bb.0: 18703; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18704; GFX900-NEXT: ;;#ASMSTART 18705; GFX900-NEXT: ; def s[4:7] 18706; GFX900-NEXT: ;;#ASMEND 18707; GFX900-NEXT: ;;#ASMSTART 18708; GFX900-NEXT: ; def s[8:11] 18709; GFX900-NEXT: ;;#ASMEND 18710; GFX900-NEXT: s_lshr_b32 s4, s9, 16 18711; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18712; GFX900-NEXT: ;;#ASMSTART 18713; GFX900-NEXT: ; use s8 18714; GFX900-NEXT: ;;#ASMEND 18715; GFX900-NEXT: s_setpc_b64 s[30:31] 18716; 18717; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_4: 18718; GFX90A: ; %bb.0: 18719; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18720; GFX90A-NEXT: ;;#ASMSTART 18721; GFX90A-NEXT: ; def s[4:7] 18722; GFX90A-NEXT: ;;#ASMEND 18723; GFX90A-NEXT: ;;#ASMSTART 18724; GFX90A-NEXT: ; def s[8:11] 18725; GFX90A-NEXT: ;;#ASMEND 18726; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 18727; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18728; GFX90A-NEXT: ;;#ASMSTART 18729; GFX90A-NEXT: ; use s8 18730; GFX90A-NEXT: ;;#ASMEND 18731; GFX90A-NEXT: s_setpc_b64 s[30:31] 18732; 18733; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_4: 18734; GFX940: ; %bb.0: 18735; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18736; GFX940-NEXT: ;;#ASMSTART 18737; GFX940-NEXT: ; def s[0:3] 18738; GFX940-NEXT: ;;#ASMEND 18739; GFX940-NEXT: ;;#ASMSTART 18740; GFX940-NEXT: ; def s[4:7] 18741; GFX940-NEXT: ;;#ASMEND 18742; GFX940-NEXT: s_lshr_b32 s0, s5, 16 18743; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 18744; GFX940-NEXT: ;;#ASMSTART 18745; GFX940-NEXT: ; use s8 18746; GFX940-NEXT: ;;#ASMEND 18747; GFX940-NEXT: s_setpc_b64 s[30:31] 18748 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18749 %vec1 = call <8 x i16> asm "; def $0", "=s"() 18750 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 4> 18751 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18752 ret void 18753} 18754 18755define void @s_shuffle_v2i16_v8i16__12_4() { 18756; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_4: 18757; GFX900: ; %bb.0: 18758; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18759; GFX900-NEXT: ;;#ASMSTART 18760; GFX900-NEXT: ; def s[8:11] 18761; GFX900-NEXT: ;;#ASMEND 18762; GFX900-NEXT: ;;#ASMSTART 18763; GFX900-NEXT: ; def s[4:7] 18764; GFX900-NEXT: ;;#ASMEND 18765; GFX900-NEXT: s_pack_ll_b32_b16 s8, s10, s6 18766; GFX900-NEXT: ;;#ASMSTART 18767; GFX900-NEXT: ; use s8 18768; GFX900-NEXT: ;;#ASMEND 18769; GFX900-NEXT: s_setpc_b64 s[30:31] 18770; 18771; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_4: 18772; GFX90A: ; %bb.0: 18773; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18774; GFX90A-NEXT: ;;#ASMSTART 18775; GFX90A-NEXT: ; def s[8:11] 18776; GFX90A-NEXT: ;;#ASMEND 18777; GFX90A-NEXT: ;;#ASMSTART 18778; GFX90A-NEXT: ; def s[4:7] 18779; GFX90A-NEXT: ;;#ASMEND 18780; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s10, s6 18781; GFX90A-NEXT: ;;#ASMSTART 18782; GFX90A-NEXT: ; use s8 18783; GFX90A-NEXT: ;;#ASMEND 18784; GFX90A-NEXT: s_setpc_b64 s[30:31] 18785; 18786; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_4: 18787; GFX940: ; %bb.0: 18788; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18789; GFX940-NEXT: ;;#ASMSTART 18790; GFX940-NEXT: ; def s[0:3] 18791; GFX940-NEXT: ;;#ASMEND 18792; GFX940-NEXT: ;;#ASMSTART 18793; GFX940-NEXT: ; def s[4:7] 18794; GFX940-NEXT: ;;#ASMEND 18795; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s2 18796; GFX940-NEXT: ;;#ASMSTART 18797; GFX940-NEXT: ; use s8 18798; GFX940-NEXT: ;;#ASMEND 18799; GFX940-NEXT: s_setpc_b64 s[30:31] 18800 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18801 %vec1 = call <8 x i16> asm "; def $0", "=s"() 18802 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 4> 18803 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18804 ret void 18805} 18806 18807define void @s_shuffle_v2i16_v8i16__13_4() { 18808; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_4: 18809; GFX900: ; %bb.0: 18810; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18811; GFX900-NEXT: ;;#ASMSTART 18812; GFX900-NEXT: ; def s[4:7] 18813; GFX900-NEXT: ;;#ASMEND 18814; GFX900-NEXT: ;;#ASMSTART 18815; GFX900-NEXT: ; def s[8:11] 18816; GFX900-NEXT: ;;#ASMEND 18817; GFX900-NEXT: s_lshr_b32 s4, s10, 16 18818; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18819; GFX900-NEXT: ;;#ASMSTART 18820; GFX900-NEXT: ; use s8 18821; GFX900-NEXT: ;;#ASMEND 18822; GFX900-NEXT: s_setpc_b64 s[30:31] 18823; 18824; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_4: 18825; GFX90A: ; %bb.0: 18826; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18827; GFX90A-NEXT: ;;#ASMSTART 18828; GFX90A-NEXT: ; def s[4:7] 18829; GFX90A-NEXT: ;;#ASMEND 18830; GFX90A-NEXT: ;;#ASMSTART 18831; GFX90A-NEXT: ; def s[8:11] 18832; GFX90A-NEXT: ;;#ASMEND 18833; GFX90A-NEXT: s_lshr_b32 s4, s10, 16 18834; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 18835; GFX90A-NEXT: ;;#ASMSTART 18836; GFX90A-NEXT: ; use s8 18837; GFX90A-NEXT: ;;#ASMEND 18838; GFX90A-NEXT: s_setpc_b64 s[30:31] 18839; 18840; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_4: 18841; GFX940: ; %bb.0: 18842; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18843; GFX940-NEXT: ;;#ASMSTART 18844; GFX940-NEXT: ; def s[0:3] 18845; GFX940-NEXT: ;;#ASMEND 18846; GFX940-NEXT: ;;#ASMSTART 18847; GFX940-NEXT: ; def s[4:7] 18848; GFX940-NEXT: ;;#ASMEND 18849; GFX940-NEXT: s_lshr_b32 s0, s6, 16 18850; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 18851; GFX940-NEXT: ;;#ASMSTART 18852; GFX940-NEXT: ; use s8 18853; GFX940-NEXT: ;;#ASMEND 18854; GFX940-NEXT: s_setpc_b64 s[30:31] 18855 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18856 %vec1 = call <8 x i16> asm "; def $0", "=s"() 18857 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 4> 18858 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18859 ret void 18860} 18861 18862define void @s_shuffle_v2i16_v8i16__14_4() { 18863; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_4: 18864; GFX900: ; %bb.0: 18865; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18866; GFX900-NEXT: ;;#ASMSTART 18867; GFX900-NEXT: ; def s[8:11] 18868; GFX900-NEXT: ;;#ASMEND 18869; GFX900-NEXT: ;;#ASMSTART 18870; GFX900-NEXT: ; def s[4:7] 18871; GFX900-NEXT: ;;#ASMEND 18872; GFX900-NEXT: s_pack_ll_b32_b16 s8, s11, s6 18873; GFX900-NEXT: ;;#ASMSTART 18874; GFX900-NEXT: ; use s8 18875; GFX900-NEXT: ;;#ASMEND 18876; GFX900-NEXT: s_setpc_b64 s[30:31] 18877; 18878; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_4: 18879; GFX90A: ; %bb.0: 18880; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18881; GFX90A-NEXT: ;;#ASMSTART 18882; GFX90A-NEXT: ; def s[8:11] 18883; GFX90A-NEXT: ;;#ASMEND 18884; GFX90A-NEXT: ;;#ASMSTART 18885; GFX90A-NEXT: ; def s[4:7] 18886; GFX90A-NEXT: ;;#ASMEND 18887; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s11, s6 18888; GFX90A-NEXT: ;;#ASMSTART 18889; GFX90A-NEXT: ; use s8 18890; GFX90A-NEXT: ;;#ASMEND 18891; GFX90A-NEXT: s_setpc_b64 s[30:31] 18892; 18893; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_4: 18894; GFX940: ; %bb.0: 18895; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18896; GFX940-NEXT: ;;#ASMSTART 18897; GFX940-NEXT: ; def s[0:3] 18898; GFX940-NEXT: ;;#ASMEND 18899; GFX940-NEXT: ;;#ASMSTART 18900; GFX940-NEXT: ; def s[4:7] 18901; GFX940-NEXT: ;;#ASMEND 18902; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s2 18903; GFX940-NEXT: ;;#ASMSTART 18904; GFX940-NEXT: ; use s8 18905; GFX940-NEXT: ;;#ASMEND 18906; GFX940-NEXT: s_setpc_b64 s[30:31] 18907 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18908 %vec1 = call <8 x i16> asm "; def $0", "=s"() 18909 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 4> 18910 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18911 ret void 18912} 18913 18914define void @s_shuffle_v2i16_v8i16__u_5() { 18915; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_5: 18916; GFX900: ; %bb.0: 18917; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18918; GFX900-NEXT: ;;#ASMSTART 18919; GFX900-NEXT: ; def s[4:7] 18920; GFX900-NEXT: ;;#ASMEND 18921; GFX900-NEXT: s_mov_b32 s8, s6 18922; GFX900-NEXT: ;;#ASMSTART 18923; GFX900-NEXT: ; use s8 18924; GFX900-NEXT: ;;#ASMEND 18925; GFX900-NEXT: s_setpc_b64 s[30:31] 18926; 18927; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_5: 18928; GFX90A: ; %bb.0: 18929; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18930; GFX90A-NEXT: ;;#ASMSTART 18931; GFX90A-NEXT: ; def s[4:7] 18932; GFX90A-NEXT: ;;#ASMEND 18933; GFX90A-NEXT: s_mov_b32 s8, s6 18934; GFX90A-NEXT: ;;#ASMSTART 18935; GFX90A-NEXT: ; use s8 18936; GFX90A-NEXT: ;;#ASMEND 18937; GFX90A-NEXT: s_setpc_b64 s[30:31] 18938; 18939; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_5: 18940; GFX940: ; %bb.0: 18941; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18942; GFX940-NEXT: ;;#ASMSTART 18943; GFX940-NEXT: ; def s[0:3] 18944; GFX940-NEXT: ;;#ASMEND 18945; GFX940-NEXT: s_mov_b32 s8, s2 18946; GFX940-NEXT: ;;#ASMSTART 18947; GFX940-NEXT: ; use s8 18948; GFX940-NEXT: ;;#ASMEND 18949; GFX940-NEXT: s_setpc_b64 s[30:31] 18950 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18951 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 5> 18952 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18953 ret void 18954} 18955 18956define void @s_shuffle_v2i16_v8i16__0_5() { 18957; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_5: 18958; GFX900: ; %bb.0: 18959; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18960; GFX900-NEXT: ;;#ASMSTART 18961; GFX900-NEXT: ; def s[4:7] 18962; GFX900-NEXT: ;;#ASMEND 18963; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s6 18964; GFX900-NEXT: ;;#ASMSTART 18965; GFX900-NEXT: ; use s8 18966; GFX900-NEXT: ;;#ASMEND 18967; GFX900-NEXT: s_setpc_b64 s[30:31] 18968; 18969; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_5: 18970; GFX90A: ; %bb.0: 18971; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18972; GFX90A-NEXT: ;;#ASMSTART 18973; GFX90A-NEXT: ; def s[4:7] 18974; GFX90A-NEXT: ;;#ASMEND 18975; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s6 18976; GFX90A-NEXT: ;;#ASMSTART 18977; GFX90A-NEXT: ; use s8 18978; GFX90A-NEXT: ;;#ASMEND 18979; GFX90A-NEXT: s_setpc_b64 s[30:31] 18980; 18981; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_5: 18982; GFX940: ; %bb.0: 18983; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18984; GFX940-NEXT: ;;#ASMSTART 18985; GFX940-NEXT: ; def s[0:3] 18986; GFX940-NEXT: ;;#ASMEND 18987; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 18988; GFX940-NEXT: ;;#ASMSTART 18989; GFX940-NEXT: ; use s8 18990; GFX940-NEXT: ;;#ASMEND 18991; GFX940-NEXT: s_setpc_b64 s[30:31] 18992 %vec0 = call <8 x i16> asm "; def $0", "=s"() 18993 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 5> 18994 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 18995 ret void 18996} 18997 18998define void @s_shuffle_v2i16_v8i16__1_5() { 18999; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_5: 19000; GFX900: ; %bb.0: 19001; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19002; GFX900-NEXT: ;;#ASMSTART 19003; GFX900-NEXT: ; def s[4:7] 19004; GFX900-NEXT: ;;#ASMEND 19005; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s6 19006; GFX900-NEXT: ;;#ASMSTART 19007; GFX900-NEXT: ; use s8 19008; GFX900-NEXT: ;;#ASMEND 19009; GFX900-NEXT: s_setpc_b64 s[30:31] 19010; 19011; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_5: 19012; GFX90A: ; %bb.0: 19013; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19014; GFX90A-NEXT: ;;#ASMSTART 19015; GFX90A-NEXT: ; def s[4:7] 19016; GFX90A-NEXT: ;;#ASMEND 19017; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s6 19018; GFX90A-NEXT: ;;#ASMSTART 19019; GFX90A-NEXT: ; use s8 19020; GFX90A-NEXT: ;;#ASMEND 19021; GFX90A-NEXT: s_setpc_b64 s[30:31] 19022; 19023; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_5: 19024; GFX940: ; %bb.0: 19025; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19026; GFX940-NEXT: ;;#ASMSTART 19027; GFX940-NEXT: ; def s[0:3] 19028; GFX940-NEXT: ;;#ASMEND 19029; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 19030; GFX940-NEXT: ;;#ASMSTART 19031; GFX940-NEXT: ; use s8 19032; GFX940-NEXT: ;;#ASMEND 19033; GFX940-NEXT: s_setpc_b64 s[30:31] 19034 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19035 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 5> 19036 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19037 ret void 19038} 19039 19040define void @s_shuffle_v2i16_v8i16__2_5() { 19041; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_5: 19042; GFX900: ; %bb.0: 19043; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19044; GFX900-NEXT: ;;#ASMSTART 19045; GFX900-NEXT: ; def s[4:7] 19046; GFX900-NEXT: ;;#ASMEND 19047; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s6 19048; GFX900-NEXT: ;;#ASMSTART 19049; GFX900-NEXT: ; use s8 19050; GFX900-NEXT: ;;#ASMEND 19051; GFX900-NEXT: s_setpc_b64 s[30:31] 19052; 19053; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_5: 19054; GFX90A: ; %bb.0: 19055; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19056; GFX90A-NEXT: ;;#ASMSTART 19057; GFX90A-NEXT: ; def s[4:7] 19058; GFX90A-NEXT: ;;#ASMEND 19059; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s6 19060; GFX90A-NEXT: ;;#ASMSTART 19061; GFX90A-NEXT: ; use s8 19062; GFX90A-NEXT: ;;#ASMEND 19063; GFX90A-NEXT: s_setpc_b64 s[30:31] 19064; 19065; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_5: 19066; GFX940: ; %bb.0: 19067; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19068; GFX940-NEXT: ;;#ASMSTART 19069; GFX940-NEXT: ; def s[0:3] 19070; GFX940-NEXT: ;;#ASMEND 19071; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 19072; GFX940-NEXT: ;;#ASMSTART 19073; GFX940-NEXT: ; use s8 19074; GFX940-NEXT: ;;#ASMEND 19075; GFX940-NEXT: s_setpc_b64 s[30:31] 19076 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19077 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 5> 19078 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19079 ret void 19080} 19081 19082define void @s_shuffle_v2i16_v8i16__3_5() { 19083; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_5: 19084; GFX900: ; %bb.0: 19085; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19086; GFX900-NEXT: ;;#ASMSTART 19087; GFX900-NEXT: ; def s[4:7] 19088; GFX900-NEXT: ;;#ASMEND 19089; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s6 19090; GFX900-NEXT: ;;#ASMSTART 19091; GFX900-NEXT: ; use s8 19092; GFX900-NEXT: ;;#ASMEND 19093; GFX900-NEXT: s_setpc_b64 s[30:31] 19094; 19095; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_5: 19096; GFX90A: ; %bb.0: 19097; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19098; GFX90A-NEXT: ;;#ASMSTART 19099; GFX90A-NEXT: ; def s[4:7] 19100; GFX90A-NEXT: ;;#ASMEND 19101; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s6 19102; GFX90A-NEXT: ;;#ASMSTART 19103; GFX90A-NEXT: ; use s8 19104; GFX90A-NEXT: ;;#ASMEND 19105; GFX90A-NEXT: s_setpc_b64 s[30:31] 19106; 19107; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_5: 19108; GFX940: ; %bb.0: 19109; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19110; GFX940-NEXT: ;;#ASMSTART 19111; GFX940-NEXT: ; def s[0:3] 19112; GFX940-NEXT: ;;#ASMEND 19113; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s2 19114; GFX940-NEXT: ;;#ASMSTART 19115; GFX940-NEXT: ; use s8 19116; GFX940-NEXT: ;;#ASMEND 19117; GFX940-NEXT: s_setpc_b64 s[30:31] 19118 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19119 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 5> 19120 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19121 ret void 19122} 19123 19124define void @s_shuffle_v2i16_v8i16__4_5() { 19125; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_5: 19126; GFX900: ; %bb.0: 19127; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19128; GFX900-NEXT: ;;#ASMSTART 19129; GFX900-NEXT: ; def s[4:7] 19130; GFX900-NEXT: ;;#ASMEND 19131; GFX900-NEXT: s_mov_b32 s8, s6 19132; GFX900-NEXT: ;;#ASMSTART 19133; GFX900-NEXT: ; use s8 19134; GFX900-NEXT: ;;#ASMEND 19135; GFX900-NEXT: s_setpc_b64 s[30:31] 19136; 19137; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_5: 19138; GFX90A: ; %bb.0: 19139; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19140; GFX90A-NEXT: ;;#ASMSTART 19141; GFX90A-NEXT: ; def s[4:7] 19142; GFX90A-NEXT: ;;#ASMEND 19143; GFX90A-NEXT: s_mov_b32 s8, s6 19144; GFX90A-NEXT: ;;#ASMSTART 19145; GFX90A-NEXT: ; use s8 19146; GFX90A-NEXT: ;;#ASMEND 19147; GFX90A-NEXT: s_setpc_b64 s[30:31] 19148; 19149; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_5: 19150; GFX940: ; %bb.0: 19151; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19152; GFX940-NEXT: ;;#ASMSTART 19153; GFX940-NEXT: ; def s[0:3] 19154; GFX940-NEXT: ;;#ASMEND 19155; GFX940-NEXT: s_mov_b32 s8, s2 19156; GFX940-NEXT: ;;#ASMSTART 19157; GFX940-NEXT: ; use s8 19158; GFX940-NEXT: ;;#ASMEND 19159; GFX940-NEXT: s_setpc_b64 s[30:31] 19160 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19161 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 5> 19162 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19163 ret void 19164} 19165 19166define void @s_shuffle_v2i16_v8i16__5_5() { 19167; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_5: 19168; GFX900: ; %bb.0: 19169; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19170; GFX900-NEXT: ;;#ASMSTART 19171; GFX900-NEXT: ; def s[4:7] 19172; GFX900-NEXT: ;;#ASMEND 19173; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s6 19174; GFX900-NEXT: ;;#ASMSTART 19175; GFX900-NEXT: ; use s8 19176; GFX900-NEXT: ;;#ASMEND 19177; GFX900-NEXT: s_setpc_b64 s[30:31] 19178; 19179; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_5: 19180; GFX90A: ; %bb.0: 19181; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19182; GFX90A-NEXT: ;;#ASMSTART 19183; GFX90A-NEXT: ; def s[4:7] 19184; GFX90A-NEXT: ;;#ASMEND 19185; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s6 19186; GFX90A-NEXT: ;;#ASMSTART 19187; GFX90A-NEXT: ; use s8 19188; GFX90A-NEXT: ;;#ASMEND 19189; GFX90A-NEXT: s_setpc_b64 s[30:31] 19190; 19191; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_5: 19192; GFX940: ; %bb.0: 19193; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19194; GFX940-NEXT: ;;#ASMSTART 19195; GFX940-NEXT: ; def s[0:3] 19196; GFX940-NEXT: ;;#ASMEND 19197; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s2 19198; GFX940-NEXT: ;;#ASMSTART 19199; GFX940-NEXT: ; use s8 19200; GFX940-NEXT: ;;#ASMEND 19201; GFX940-NEXT: s_setpc_b64 s[30:31] 19202 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19203 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 5> 19204 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19205 ret void 19206} 19207 19208define void @s_shuffle_v2i16_v8i16__6_5() { 19209; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_5: 19210; GFX900: ; %bb.0: 19211; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19212; GFX900-NEXT: ;;#ASMSTART 19213; GFX900-NEXT: ; def s[4:7] 19214; GFX900-NEXT: ;;#ASMEND 19215; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s6 19216; GFX900-NEXT: ;;#ASMSTART 19217; GFX900-NEXT: ; use s8 19218; GFX900-NEXT: ;;#ASMEND 19219; GFX900-NEXT: s_setpc_b64 s[30:31] 19220; 19221; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_5: 19222; GFX90A: ; %bb.0: 19223; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19224; GFX90A-NEXT: ;;#ASMSTART 19225; GFX90A-NEXT: ; def s[4:7] 19226; GFX90A-NEXT: ;;#ASMEND 19227; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s6 19228; GFX90A-NEXT: ;;#ASMSTART 19229; GFX90A-NEXT: ; use s8 19230; GFX90A-NEXT: ;;#ASMEND 19231; GFX90A-NEXT: s_setpc_b64 s[30:31] 19232; 19233; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_5: 19234; GFX940: ; %bb.0: 19235; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19236; GFX940-NEXT: ;;#ASMSTART 19237; GFX940-NEXT: ; def s[0:3] 19238; GFX940-NEXT: ;;#ASMEND 19239; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 19240; GFX940-NEXT: ;;#ASMSTART 19241; GFX940-NEXT: ; use s8 19242; GFX940-NEXT: ;;#ASMEND 19243; GFX940-NEXT: s_setpc_b64 s[30:31] 19244 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19245 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 5> 19246 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19247 ret void 19248} 19249 19250define void @s_shuffle_v2i16_v8i16__7_5() { 19251; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_5: 19252; GFX900: ; %bb.0: 19253; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19254; GFX900-NEXT: ;;#ASMSTART 19255; GFX900-NEXT: ; def s[4:7] 19256; GFX900-NEXT: ;;#ASMEND 19257; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s6 19258; GFX900-NEXT: ;;#ASMSTART 19259; GFX900-NEXT: ; use s8 19260; GFX900-NEXT: ;;#ASMEND 19261; GFX900-NEXT: s_setpc_b64 s[30:31] 19262; 19263; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_5: 19264; GFX90A: ; %bb.0: 19265; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19266; GFX90A-NEXT: ;;#ASMSTART 19267; GFX90A-NEXT: ; def s[4:7] 19268; GFX90A-NEXT: ;;#ASMEND 19269; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s6 19270; GFX90A-NEXT: ;;#ASMSTART 19271; GFX90A-NEXT: ; use s8 19272; GFX90A-NEXT: ;;#ASMEND 19273; GFX90A-NEXT: s_setpc_b64 s[30:31] 19274; 19275; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_5: 19276; GFX940: ; %bb.0: 19277; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19278; GFX940-NEXT: ;;#ASMSTART 19279; GFX940-NEXT: ; def s[0:3] 19280; GFX940-NEXT: ;;#ASMEND 19281; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s2 19282; GFX940-NEXT: ;;#ASMSTART 19283; GFX940-NEXT: ; use s8 19284; GFX940-NEXT: ;;#ASMEND 19285; GFX940-NEXT: s_setpc_b64 s[30:31] 19286 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19287 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 5> 19288 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19289 ret void 19290} 19291 19292define void @s_shuffle_v2i16_v8i16__8_5() { 19293; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_5: 19294; GFX900: ; %bb.0: 19295; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19296; GFX900-NEXT: ;;#ASMSTART 19297; GFX900-NEXT: ; def s[4:7] 19298; GFX900-NEXT: ;;#ASMEND 19299; GFX900-NEXT: s_mov_b32 s8, s6 19300; GFX900-NEXT: ;;#ASMSTART 19301; GFX900-NEXT: ; use s8 19302; GFX900-NEXT: ;;#ASMEND 19303; GFX900-NEXT: s_setpc_b64 s[30:31] 19304; 19305; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_5: 19306; GFX90A: ; %bb.0: 19307; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19308; GFX90A-NEXT: ;;#ASMSTART 19309; GFX90A-NEXT: ; def s[4:7] 19310; GFX90A-NEXT: ;;#ASMEND 19311; GFX90A-NEXT: s_mov_b32 s8, s6 19312; GFX90A-NEXT: ;;#ASMSTART 19313; GFX90A-NEXT: ; use s8 19314; GFX90A-NEXT: ;;#ASMEND 19315; GFX90A-NEXT: s_setpc_b64 s[30:31] 19316; 19317; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_5: 19318; GFX940: ; %bb.0: 19319; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19320; GFX940-NEXT: ;;#ASMSTART 19321; GFX940-NEXT: ; def s[0:3] 19322; GFX940-NEXT: ;;#ASMEND 19323; GFX940-NEXT: s_mov_b32 s8, s2 19324; GFX940-NEXT: ;;#ASMSTART 19325; GFX940-NEXT: ; use s8 19326; GFX940-NEXT: ;;#ASMEND 19327; GFX940-NEXT: s_setpc_b64 s[30:31] 19328 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19329 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 5> 19330 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19331 ret void 19332} 19333 19334define void @s_shuffle_v2i16_v8i16__9_5() { 19335; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_5: 19336; GFX900: ; %bb.0: 19337; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19338; GFX900-NEXT: ;;#ASMSTART 19339; GFX900-NEXT: ; def s[8:11] 19340; GFX900-NEXT: ;;#ASMEND 19341; GFX900-NEXT: ;;#ASMSTART 19342; GFX900-NEXT: ; def s[4:7] 19343; GFX900-NEXT: ;;#ASMEND 19344; GFX900-NEXT: s_pack_hh_b32_b16 s8, s8, s6 19345; GFX900-NEXT: ;;#ASMSTART 19346; GFX900-NEXT: ; use s8 19347; GFX900-NEXT: ;;#ASMEND 19348; GFX900-NEXT: s_setpc_b64 s[30:31] 19349; 19350; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_5: 19351; GFX90A: ; %bb.0: 19352; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19353; GFX90A-NEXT: ;;#ASMSTART 19354; GFX90A-NEXT: ; def s[8:11] 19355; GFX90A-NEXT: ;;#ASMEND 19356; GFX90A-NEXT: ;;#ASMSTART 19357; GFX90A-NEXT: ; def s[4:7] 19358; GFX90A-NEXT: ;;#ASMEND 19359; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s8, s6 19360; GFX90A-NEXT: ;;#ASMSTART 19361; GFX90A-NEXT: ; use s8 19362; GFX90A-NEXT: ;;#ASMEND 19363; GFX90A-NEXT: s_setpc_b64 s[30:31] 19364; 19365; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_5: 19366; GFX940: ; %bb.0: 19367; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19368; GFX940-NEXT: ;;#ASMSTART 19369; GFX940-NEXT: ; def s[0:3] 19370; GFX940-NEXT: ;;#ASMEND 19371; GFX940-NEXT: ;;#ASMSTART 19372; GFX940-NEXT: ; def s[4:7] 19373; GFX940-NEXT: ;;#ASMEND 19374; GFX940-NEXT: s_pack_hh_b32_b16 s8, s4, s2 19375; GFX940-NEXT: ;;#ASMSTART 19376; GFX940-NEXT: ; use s8 19377; GFX940-NEXT: ;;#ASMEND 19378; GFX940-NEXT: s_setpc_b64 s[30:31] 19379 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19380 %vec1 = call <8 x i16> asm "; def $0", "=s"() 19381 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 5> 19382 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19383 ret void 19384} 19385 19386define void @s_shuffle_v2i16_v8i16__10_5() { 19387; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_5: 19388; GFX900: ; %bb.0: 19389; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19390; GFX900-NEXT: ;;#ASMSTART 19391; GFX900-NEXT: ; def s[8:11] 19392; GFX900-NEXT: ;;#ASMEND 19393; GFX900-NEXT: ;;#ASMSTART 19394; GFX900-NEXT: ; def s[4:7] 19395; GFX900-NEXT: ;;#ASMEND 19396; GFX900-NEXT: s_pack_lh_b32_b16 s8, s9, s6 19397; GFX900-NEXT: ;;#ASMSTART 19398; GFX900-NEXT: ; use s8 19399; GFX900-NEXT: ;;#ASMEND 19400; GFX900-NEXT: s_setpc_b64 s[30:31] 19401; 19402; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_5: 19403; GFX90A: ; %bb.0: 19404; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19405; GFX90A-NEXT: ;;#ASMSTART 19406; GFX90A-NEXT: ; def s[8:11] 19407; GFX90A-NEXT: ;;#ASMEND 19408; GFX90A-NEXT: ;;#ASMSTART 19409; GFX90A-NEXT: ; def s[4:7] 19410; GFX90A-NEXT: ;;#ASMEND 19411; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s9, s6 19412; GFX90A-NEXT: ;;#ASMSTART 19413; GFX90A-NEXT: ; use s8 19414; GFX90A-NEXT: ;;#ASMEND 19415; GFX90A-NEXT: s_setpc_b64 s[30:31] 19416; 19417; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_5: 19418; GFX940: ; %bb.0: 19419; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19420; GFX940-NEXT: ;;#ASMSTART 19421; GFX940-NEXT: ; def s[0:3] 19422; GFX940-NEXT: ;;#ASMEND 19423; GFX940-NEXT: ;;#ASMSTART 19424; GFX940-NEXT: ; def s[4:7] 19425; GFX940-NEXT: ;;#ASMEND 19426; GFX940-NEXT: s_pack_lh_b32_b16 s8, s5, s2 19427; GFX940-NEXT: ;;#ASMSTART 19428; GFX940-NEXT: ; use s8 19429; GFX940-NEXT: ;;#ASMEND 19430; GFX940-NEXT: s_setpc_b64 s[30:31] 19431 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19432 %vec1 = call <8 x i16> asm "; def $0", "=s"() 19433 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 5> 19434 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19435 ret void 19436} 19437 19438define void @s_shuffle_v2i16_v8i16__11_5() { 19439; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_5: 19440; GFX900: ; %bb.0: 19441; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19442; GFX900-NEXT: ;;#ASMSTART 19443; GFX900-NEXT: ; def s[8:11] 19444; GFX900-NEXT: ;;#ASMEND 19445; GFX900-NEXT: ;;#ASMSTART 19446; GFX900-NEXT: ; def s[4:7] 19447; GFX900-NEXT: ;;#ASMEND 19448; GFX900-NEXT: s_pack_hh_b32_b16 s8, s9, s6 19449; GFX900-NEXT: ;;#ASMSTART 19450; GFX900-NEXT: ; use s8 19451; GFX900-NEXT: ;;#ASMEND 19452; GFX900-NEXT: s_setpc_b64 s[30:31] 19453; 19454; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_5: 19455; GFX90A: ; %bb.0: 19456; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19457; GFX90A-NEXT: ;;#ASMSTART 19458; GFX90A-NEXT: ; def s[8:11] 19459; GFX90A-NEXT: ;;#ASMEND 19460; GFX90A-NEXT: ;;#ASMSTART 19461; GFX90A-NEXT: ; def s[4:7] 19462; GFX90A-NEXT: ;;#ASMEND 19463; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s9, s6 19464; GFX90A-NEXT: ;;#ASMSTART 19465; GFX90A-NEXT: ; use s8 19466; GFX90A-NEXT: ;;#ASMEND 19467; GFX90A-NEXT: s_setpc_b64 s[30:31] 19468; 19469; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_5: 19470; GFX940: ; %bb.0: 19471; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19472; GFX940-NEXT: ;;#ASMSTART 19473; GFX940-NEXT: ; def s[0:3] 19474; GFX940-NEXT: ;;#ASMEND 19475; GFX940-NEXT: ;;#ASMSTART 19476; GFX940-NEXT: ; def s[4:7] 19477; GFX940-NEXT: ;;#ASMEND 19478; GFX940-NEXT: s_pack_hh_b32_b16 s8, s5, s2 19479; GFX940-NEXT: ;;#ASMSTART 19480; GFX940-NEXT: ; use s8 19481; GFX940-NEXT: ;;#ASMEND 19482; GFX940-NEXT: s_setpc_b64 s[30:31] 19483 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19484 %vec1 = call <8 x i16> asm "; def $0", "=s"() 19485 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 5> 19486 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19487 ret void 19488} 19489 19490define void @s_shuffle_v2i16_v8i16__12_5() { 19491; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_5: 19492; GFX900: ; %bb.0: 19493; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19494; GFX900-NEXT: ;;#ASMSTART 19495; GFX900-NEXT: ; def s[8:11] 19496; GFX900-NEXT: ;;#ASMEND 19497; GFX900-NEXT: ;;#ASMSTART 19498; GFX900-NEXT: ; def s[4:7] 19499; GFX900-NEXT: ;;#ASMEND 19500; GFX900-NEXT: s_pack_lh_b32_b16 s8, s10, s6 19501; GFX900-NEXT: ;;#ASMSTART 19502; GFX900-NEXT: ; use s8 19503; GFX900-NEXT: ;;#ASMEND 19504; GFX900-NEXT: s_setpc_b64 s[30:31] 19505; 19506; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_5: 19507; GFX90A: ; %bb.0: 19508; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19509; GFX90A-NEXT: ;;#ASMSTART 19510; GFX90A-NEXT: ; def s[8:11] 19511; GFX90A-NEXT: ;;#ASMEND 19512; GFX90A-NEXT: ;;#ASMSTART 19513; GFX90A-NEXT: ; def s[4:7] 19514; GFX90A-NEXT: ;;#ASMEND 19515; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s10, s6 19516; GFX90A-NEXT: ;;#ASMSTART 19517; GFX90A-NEXT: ; use s8 19518; GFX90A-NEXT: ;;#ASMEND 19519; GFX90A-NEXT: s_setpc_b64 s[30:31] 19520; 19521; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_5: 19522; GFX940: ; %bb.0: 19523; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19524; GFX940-NEXT: ;;#ASMSTART 19525; GFX940-NEXT: ; def s[0:3] 19526; GFX940-NEXT: ;;#ASMEND 19527; GFX940-NEXT: ;;#ASMSTART 19528; GFX940-NEXT: ; def s[4:7] 19529; GFX940-NEXT: ;;#ASMEND 19530; GFX940-NEXT: s_pack_lh_b32_b16 s8, s6, s2 19531; GFX940-NEXT: ;;#ASMSTART 19532; GFX940-NEXT: ; use s8 19533; GFX940-NEXT: ;;#ASMEND 19534; GFX940-NEXT: s_setpc_b64 s[30:31] 19535 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19536 %vec1 = call <8 x i16> asm "; def $0", "=s"() 19537 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 5> 19538 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19539 ret void 19540} 19541 19542define void @s_shuffle_v2i16_v8i16__13_5() { 19543; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_5: 19544; GFX900: ; %bb.0: 19545; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19546; GFX900-NEXT: ;;#ASMSTART 19547; GFX900-NEXT: ; def s[8:11] 19548; GFX900-NEXT: ;;#ASMEND 19549; GFX900-NEXT: ;;#ASMSTART 19550; GFX900-NEXT: ; def s[4:7] 19551; GFX900-NEXT: ;;#ASMEND 19552; GFX900-NEXT: s_pack_hh_b32_b16 s8, s10, s6 19553; GFX900-NEXT: ;;#ASMSTART 19554; GFX900-NEXT: ; use s8 19555; GFX900-NEXT: ;;#ASMEND 19556; GFX900-NEXT: s_setpc_b64 s[30:31] 19557; 19558; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_5: 19559; GFX90A: ; %bb.0: 19560; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19561; GFX90A-NEXT: ;;#ASMSTART 19562; GFX90A-NEXT: ; def s[8:11] 19563; GFX90A-NEXT: ;;#ASMEND 19564; GFX90A-NEXT: ;;#ASMSTART 19565; GFX90A-NEXT: ; def s[4:7] 19566; GFX90A-NEXT: ;;#ASMEND 19567; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s10, s6 19568; GFX90A-NEXT: ;;#ASMSTART 19569; GFX90A-NEXT: ; use s8 19570; GFX90A-NEXT: ;;#ASMEND 19571; GFX90A-NEXT: s_setpc_b64 s[30:31] 19572; 19573; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_5: 19574; GFX940: ; %bb.0: 19575; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19576; GFX940-NEXT: ;;#ASMSTART 19577; GFX940-NEXT: ; def s[0:3] 19578; GFX940-NEXT: ;;#ASMEND 19579; GFX940-NEXT: ;;#ASMSTART 19580; GFX940-NEXT: ; def s[4:7] 19581; GFX940-NEXT: ;;#ASMEND 19582; GFX940-NEXT: s_pack_hh_b32_b16 s8, s6, s2 19583; GFX940-NEXT: ;;#ASMSTART 19584; GFX940-NEXT: ; use s8 19585; GFX940-NEXT: ;;#ASMEND 19586; GFX940-NEXT: s_setpc_b64 s[30:31] 19587 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19588 %vec1 = call <8 x i16> asm "; def $0", "=s"() 19589 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 5> 19590 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19591 ret void 19592} 19593 19594define void @s_shuffle_v2i16_v8i16__14_5() { 19595; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_5: 19596; GFX900: ; %bb.0: 19597; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19598; GFX900-NEXT: ;;#ASMSTART 19599; GFX900-NEXT: ; def s[8:11] 19600; GFX900-NEXT: ;;#ASMEND 19601; GFX900-NEXT: ;;#ASMSTART 19602; GFX900-NEXT: ; def s[4:7] 19603; GFX900-NEXT: ;;#ASMEND 19604; GFX900-NEXT: s_pack_lh_b32_b16 s8, s11, s6 19605; GFX900-NEXT: ;;#ASMSTART 19606; GFX900-NEXT: ; use s8 19607; GFX900-NEXT: ;;#ASMEND 19608; GFX900-NEXT: s_setpc_b64 s[30:31] 19609; 19610; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_5: 19611; GFX90A: ; %bb.0: 19612; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19613; GFX90A-NEXT: ;;#ASMSTART 19614; GFX90A-NEXT: ; def s[8:11] 19615; GFX90A-NEXT: ;;#ASMEND 19616; GFX90A-NEXT: ;;#ASMSTART 19617; GFX90A-NEXT: ; def s[4:7] 19618; GFX90A-NEXT: ;;#ASMEND 19619; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s11, s6 19620; GFX90A-NEXT: ;;#ASMSTART 19621; GFX90A-NEXT: ; use s8 19622; GFX90A-NEXT: ;;#ASMEND 19623; GFX90A-NEXT: s_setpc_b64 s[30:31] 19624; 19625; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_5: 19626; GFX940: ; %bb.0: 19627; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19628; GFX940-NEXT: ;;#ASMSTART 19629; GFX940-NEXT: ; def s[0:3] 19630; GFX940-NEXT: ;;#ASMEND 19631; GFX940-NEXT: ;;#ASMSTART 19632; GFX940-NEXT: ; def s[4:7] 19633; GFX940-NEXT: ;;#ASMEND 19634; GFX940-NEXT: s_pack_lh_b32_b16 s8, s7, s2 19635; GFX940-NEXT: ;;#ASMSTART 19636; GFX940-NEXT: ; use s8 19637; GFX940-NEXT: ;;#ASMEND 19638; GFX940-NEXT: s_setpc_b64 s[30:31] 19639 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19640 %vec1 = call <8 x i16> asm "; def $0", "=s"() 19641 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 5> 19642 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19643 ret void 19644} 19645 19646define void @s_shuffle_v2i16_v8i16__u_6() { 19647; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_6: 19648; GFX900: ; %bb.0: 19649; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19650; GFX900-NEXT: ;;#ASMSTART 19651; GFX900-NEXT: ; def s[4:7] 19652; GFX900-NEXT: ;;#ASMEND 19653; GFX900-NEXT: s_lshl_b32 s8, s7, 16 19654; GFX900-NEXT: ;;#ASMSTART 19655; GFX900-NEXT: ; use s8 19656; GFX900-NEXT: ;;#ASMEND 19657; GFX900-NEXT: s_setpc_b64 s[30:31] 19658; 19659; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_6: 19660; GFX90A: ; %bb.0: 19661; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19662; GFX90A-NEXT: ;;#ASMSTART 19663; GFX90A-NEXT: ; def s[4:7] 19664; GFX90A-NEXT: ;;#ASMEND 19665; GFX90A-NEXT: s_lshl_b32 s8, s7, 16 19666; GFX90A-NEXT: ;;#ASMSTART 19667; GFX90A-NEXT: ; use s8 19668; GFX90A-NEXT: ;;#ASMEND 19669; GFX90A-NEXT: s_setpc_b64 s[30:31] 19670; 19671; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_6: 19672; GFX940: ; %bb.0: 19673; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19674; GFX940-NEXT: ;;#ASMSTART 19675; GFX940-NEXT: ; def s[0:3] 19676; GFX940-NEXT: ;;#ASMEND 19677; GFX940-NEXT: s_lshl_b32 s8, s3, 16 19678; GFX940-NEXT: ;;#ASMSTART 19679; GFX940-NEXT: ; use s8 19680; GFX940-NEXT: ;;#ASMEND 19681; GFX940-NEXT: s_setpc_b64 s[30:31] 19682 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19683 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 6> 19684 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19685 ret void 19686} 19687 19688define void @s_shuffle_v2i16_v8i16__0_6() { 19689; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_6: 19690; GFX900: ; %bb.0: 19691; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19692; GFX900-NEXT: ;;#ASMSTART 19693; GFX900-NEXT: ; def s[4:7] 19694; GFX900-NEXT: ;;#ASMEND 19695; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 19696; GFX900-NEXT: ;;#ASMSTART 19697; GFX900-NEXT: ; use s8 19698; GFX900-NEXT: ;;#ASMEND 19699; GFX900-NEXT: s_setpc_b64 s[30:31] 19700; 19701; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_6: 19702; GFX90A: ; %bb.0: 19703; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19704; GFX90A-NEXT: ;;#ASMSTART 19705; GFX90A-NEXT: ; def s[4:7] 19706; GFX90A-NEXT: ;;#ASMEND 19707; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 19708; GFX90A-NEXT: ;;#ASMSTART 19709; GFX90A-NEXT: ; use s8 19710; GFX90A-NEXT: ;;#ASMEND 19711; GFX90A-NEXT: s_setpc_b64 s[30:31] 19712; 19713; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_6: 19714; GFX940: ; %bb.0: 19715; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19716; GFX940-NEXT: ;;#ASMSTART 19717; GFX940-NEXT: ; def s[0:3] 19718; GFX940-NEXT: ;;#ASMEND 19719; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 19720; GFX940-NEXT: ;;#ASMSTART 19721; GFX940-NEXT: ; use s8 19722; GFX940-NEXT: ;;#ASMEND 19723; GFX940-NEXT: s_setpc_b64 s[30:31] 19724 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19725 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 6> 19726 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19727 ret void 19728} 19729 19730define void @s_shuffle_v2i16_v8i16__1_6() { 19731; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_6: 19732; GFX900: ; %bb.0: 19733; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19734; GFX900-NEXT: ;;#ASMSTART 19735; GFX900-NEXT: ; def s[4:7] 19736; GFX900-NEXT: ;;#ASMEND 19737; GFX900-NEXT: s_lshr_b32 s4, s4, 16 19738; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 19739; GFX900-NEXT: ;;#ASMSTART 19740; GFX900-NEXT: ; use s8 19741; GFX900-NEXT: ;;#ASMEND 19742; GFX900-NEXT: s_setpc_b64 s[30:31] 19743; 19744; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_6: 19745; GFX90A: ; %bb.0: 19746; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19747; GFX90A-NEXT: ;;#ASMSTART 19748; GFX90A-NEXT: ; def s[4:7] 19749; GFX90A-NEXT: ;;#ASMEND 19750; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 19751; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 19752; GFX90A-NEXT: ;;#ASMSTART 19753; GFX90A-NEXT: ; use s8 19754; GFX90A-NEXT: ;;#ASMEND 19755; GFX90A-NEXT: s_setpc_b64 s[30:31] 19756; 19757; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_6: 19758; GFX940: ; %bb.0: 19759; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19760; GFX940-NEXT: ;;#ASMSTART 19761; GFX940-NEXT: ; def s[0:3] 19762; GFX940-NEXT: ;;#ASMEND 19763; GFX940-NEXT: s_lshr_b32 s0, s0, 16 19764; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 19765; GFX940-NEXT: ;;#ASMSTART 19766; GFX940-NEXT: ; use s8 19767; GFX940-NEXT: ;;#ASMEND 19768; GFX940-NEXT: s_setpc_b64 s[30:31] 19769 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19770 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 6> 19771 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19772 ret void 19773} 19774 19775define void @s_shuffle_v2i16_v8i16__2_6() { 19776; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_6: 19777; GFX900: ; %bb.0: 19778; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19779; GFX900-NEXT: ;;#ASMSTART 19780; GFX900-NEXT: ; def s[4:7] 19781; GFX900-NEXT: ;;#ASMEND 19782; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 19783; GFX900-NEXT: ;;#ASMSTART 19784; GFX900-NEXT: ; use s8 19785; GFX900-NEXT: ;;#ASMEND 19786; GFX900-NEXT: s_setpc_b64 s[30:31] 19787; 19788; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_6: 19789; GFX90A: ; %bb.0: 19790; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19791; GFX90A-NEXT: ;;#ASMSTART 19792; GFX90A-NEXT: ; def s[4:7] 19793; GFX90A-NEXT: ;;#ASMEND 19794; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 19795; GFX90A-NEXT: ;;#ASMSTART 19796; GFX90A-NEXT: ; use s8 19797; GFX90A-NEXT: ;;#ASMEND 19798; GFX90A-NEXT: s_setpc_b64 s[30:31] 19799; 19800; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_6: 19801; GFX940: ; %bb.0: 19802; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19803; GFX940-NEXT: ;;#ASMSTART 19804; GFX940-NEXT: ; def s[0:3] 19805; GFX940-NEXT: ;;#ASMEND 19806; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 19807; GFX940-NEXT: ;;#ASMSTART 19808; GFX940-NEXT: ; use s8 19809; GFX940-NEXT: ;;#ASMEND 19810; GFX940-NEXT: s_setpc_b64 s[30:31] 19811 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19812 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 6> 19813 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19814 ret void 19815} 19816 19817define void @s_shuffle_v2i16_v8i16__3_6() { 19818; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_6: 19819; GFX900: ; %bb.0: 19820; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19821; GFX900-NEXT: ;;#ASMSTART 19822; GFX900-NEXT: ; def s[4:7] 19823; GFX900-NEXT: ;;#ASMEND 19824; GFX900-NEXT: s_lshr_b32 s4, s5, 16 19825; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 19826; GFX900-NEXT: ;;#ASMSTART 19827; GFX900-NEXT: ; use s8 19828; GFX900-NEXT: ;;#ASMEND 19829; GFX900-NEXT: s_setpc_b64 s[30:31] 19830; 19831; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_6: 19832; GFX90A: ; %bb.0: 19833; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19834; GFX90A-NEXT: ;;#ASMSTART 19835; GFX90A-NEXT: ; def s[4:7] 19836; GFX90A-NEXT: ;;#ASMEND 19837; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 19838; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 19839; GFX90A-NEXT: ;;#ASMSTART 19840; GFX90A-NEXT: ; use s8 19841; GFX90A-NEXT: ;;#ASMEND 19842; GFX90A-NEXT: s_setpc_b64 s[30:31] 19843; 19844; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_6: 19845; GFX940: ; %bb.0: 19846; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19847; GFX940-NEXT: ;;#ASMSTART 19848; GFX940-NEXT: ; def s[0:3] 19849; GFX940-NEXT: ;;#ASMEND 19850; GFX940-NEXT: s_lshr_b32 s0, s1, 16 19851; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 19852; GFX940-NEXT: ;;#ASMSTART 19853; GFX940-NEXT: ; use s8 19854; GFX940-NEXT: ;;#ASMEND 19855; GFX940-NEXT: s_setpc_b64 s[30:31] 19856 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19857 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 6> 19858 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19859 ret void 19860} 19861 19862define void @s_shuffle_v2i16_v8i16__4_6() { 19863; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_6: 19864; GFX900: ; %bb.0: 19865; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19866; GFX900-NEXT: ;;#ASMSTART 19867; GFX900-NEXT: ; def s[4:7] 19868; GFX900-NEXT: ;;#ASMEND 19869; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s7 19870; GFX900-NEXT: ;;#ASMSTART 19871; GFX900-NEXT: ; use s8 19872; GFX900-NEXT: ;;#ASMEND 19873; GFX900-NEXT: s_setpc_b64 s[30:31] 19874; 19875; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_6: 19876; GFX90A: ; %bb.0: 19877; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19878; GFX90A-NEXT: ;;#ASMSTART 19879; GFX90A-NEXT: ; def s[4:7] 19880; GFX90A-NEXT: ;;#ASMEND 19881; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s7 19882; GFX90A-NEXT: ;;#ASMSTART 19883; GFX90A-NEXT: ; use s8 19884; GFX90A-NEXT: ;;#ASMEND 19885; GFX90A-NEXT: s_setpc_b64 s[30:31] 19886; 19887; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_6: 19888; GFX940: ; %bb.0: 19889; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19890; GFX940-NEXT: ;;#ASMSTART 19891; GFX940-NEXT: ; def s[0:3] 19892; GFX940-NEXT: ;;#ASMEND 19893; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s3 19894; GFX940-NEXT: ;;#ASMSTART 19895; GFX940-NEXT: ; use s8 19896; GFX940-NEXT: ;;#ASMEND 19897; GFX940-NEXT: s_setpc_b64 s[30:31] 19898 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19899 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 6> 19900 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19901 ret void 19902} 19903 19904define void @s_shuffle_v2i16_v8i16__5_6() { 19905; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_6: 19906; GFX900: ; %bb.0: 19907; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19908; GFX900-NEXT: ;;#ASMSTART 19909; GFX900-NEXT: ; def s[4:7] 19910; GFX900-NEXT: ;;#ASMEND 19911; GFX900-NEXT: s_lshr_b32 s4, s6, 16 19912; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 19913; GFX900-NEXT: ;;#ASMSTART 19914; GFX900-NEXT: ; use s8 19915; GFX900-NEXT: ;;#ASMEND 19916; GFX900-NEXT: s_setpc_b64 s[30:31] 19917; 19918; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_6: 19919; GFX90A: ; %bb.0: 19920; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19921; GFX90A-NEXT: ;;#ASMSTART 19922; GFX90A-NEXT: ; def s[4:7] 19923; GFX90A-NEXT: ;;#ASMEND 19924; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 19925; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 19926; GFX90A-NEXT: ;;#ASMSTART 19927; GFX90A-NEXT: ; use s8 19928; GFX90A-NEXT: ;;#ASMEND 19929; GFX90A-NEXT: s_setpc_b64 s[30:31] 19930; 19931; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_6: 19932; GFX940: ; %bb.0: 19933; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19934; GFX940-NEXT: ;;#ASMSTART 19935; GFX940-NEXT: ; def s[0:3] 19936; GFX940-NEXT: ;;#ASMEND 19937; GFX940-NEXT: s_lshr_b32 s0, s2, 16 19938; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 19939; GFX940-NEXT: ;;#ASMSTART 19940; GFX940-NEXT: ; use s8 19941; GFX940-NEXT: ;;#ASMEND 19942; GFX940-NEXT: s_setpc_b64 s[30:31] 19943 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19944 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 6> 19945 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19946 ret void 19947} 19948 19949define void @s_shuffle_v2i16_v8i16__6_6() { 19950; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_6: 19951; GFX900: ; %bb.0: 19952; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19953; GFX900-NEXT: ;;#ASMSTART 19954; GFX900-NEXT: ; def s[4:7] 19955; GFX900-NEXT: ;;#ASMEND 19956; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 19957; GFX900-NEXT: ;;#ASMSTART 19958; GFX900-NEXT: ; use s8 19959; GFX900-NEXT: ;;#ASMEND 19960; GFX900-NEXT: s_setpc_b64 s[30:31] 19961; 19962; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_6: 19963; GFX90A: ; %bb.0: 19964; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19965; GFX90A-NEXT: ;;#ASMSTART 19966; GFX90A-NEXT: ; def s[4:7] 19967; GFX90A-NEXT: ;;#ASMEND 19968; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 19969; GFX90A-NEXT: ;;#ASMSTART 19970; GFX90A-NEXT: ; use s8 19971; GFX90A-NEXT: ;;#ASMEND 19972; GFX90A-NEXT: s_setpc_b64 s[30:31] 19973; 19974; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_6: 19975; GFX940: ; %bb.0: 19976; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19977; GFX940-NEXT: ;;#ASMSTART 19978; GFX940-NEXT: ; def s[0:3] 19979; GFX940-NEXT: ;;#ASMEND 19980; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 19981; GFX940-NEXT: ;;#ASMSTART 19982; GFX940-NEXT: ; use s8 19983; GFX940-NEXT: ;;#ASMEND 19984; GFX940-NEXT: s_setpc_b64 s[30:31] 19985 %vec0 = call <8 x i16> asm "; def $0", "=s"() 19986 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 6> 19987 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 19988 ret void 19989} 19990 19991define void @s_shuffle_v2i16_v8i16__7_6() { 19992; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_6: 19993; GFX900: ; %bb.0: 19994; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19995; GFX900-NEXT: ;;#ASMSTART 19996; GFX900-NEXT: ; def s[4:7] 19997; GFX900-NEXT: ;;#ASMEND 19998; GFX900-NEXT: s_lshr_b32 s4, s7, 16 19999; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 20000; GFX900-NEXT: ;;#ASMSTART 20001; GFX900-NEXT: ; use s8 20002; GFX900-NEXT: ;;#ASMEND 20003; GFX900-NEXT: s_setpc_b64 s[30:31] 20004; 20005; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_6: 20006; GFX90A: ; %bb.0: 20007; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20008; GFX90A-NEXT: ;;#ASMSTART 20009; GFX90A-NEXT: ; def s[4:7] 20010; GFX90A-NEXT: ;;#ASMEND 20011; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 20012; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 20013; GFX90A-NEXT: ;;#ASMSTART 20014; GFX90A-NEXT: ; use s8 20015; GFX90A-NEXT: ;;#ASMEND 20016; GFX90A-NEXT: s_setpc_b64 s[30:31] 20017; 20018; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_6: 20019; GFX940: ; %bb.0: 20020; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20021; GFX940-NEXT: ;;#ASMSTART 20022; GFX940-NEXT: ; def s[0:3] 20023; GFX940-NEXT: ;;#ASMEND 20024; GFX940-NEXT: s_lshr_b32 s0, s3, 16 20025; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 20026; GFX940-NEXT: ;;#ASMSTART 20027; GFX940-NEXT: ; use s8 20028; GFX940-NEXT: ;;#ASMEND 20029; GFX940-NEXT: s_setpc_b64 s[30:31] 20030 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20031 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 6> 20032 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20033 ret void 20034} 20035 20036define void @s_shuffle_v2i16_v8i16__8_6() { 20037; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_6: 20038; GFX900: ; %bb.0: 20039; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20040; GFX900-NEXT: ;;#ASMSTART 20041; GFX900-NEXT: ; def s[4:7] 20042; GFX900-NEXT: ;;#ASMEND 20043; GFX900-NEXT: s_lshl_b32 s8, s7, 16 20044; GFX900-NEXT: ;;#ASMSTART 20045; GFX900-NEXT: ; use s8 20046; GFX900-NEXT: ;;#ASMEND 20047; GFX900-NEXT: s_setpc_b64 s[30:31] 20048; 20049; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_6: 20050; GFX90A: ; %bb.0: 20051; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20052; GFX90A-NEXT: ;;#ASMSTART 20053; GFX90A-NEXT: ; def s[4:7] 20054; GFX90A-NEXT: ;;#ASMEND 20055; GFX90A-NEXT: s_lshl_b32 s8, s7, 16 20056; GFX90A-NEXT: ;;#ASMSTART 20057; GFX90A-NEXT: ; use s8 20058; GFX90A-NEXT: ;;#ASMEND 20059; GFX90A-NEXT: s_setpc_b64 s[30:31] 20060; 20061; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_6: 20062; GFX940: ; %bb.0: 20063; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20064; GFX940-NEXT: ;;#ASMSTART 20065; GFX940-NEXT: ; def s[0:3] 20066; GFX940-NEXT: ;;#ASMEND 20067; GFX940-NEXT: s_lshl_b32 s8, s3, 16 20068; GFX940-NEXT: ;;#ASMSTART 20069; GFX940-NEXT: ; use s8 20070; GFX940-NEXT: ;;#ASMEND 20071; GFX940-NEXT: s_setpc_b64 s[30:31] 20072 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20073 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 6> 20074 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20075 ret void 20076} 20077 20078define void @s_shuffle_v2i16_v8i16__9_6() { 20079; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_6: 20080; GFX900: ; %bb.0: 20081; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20082; GFX900-NEXT: ;;#ASMSTART 20083; GFX900-NEXT: ; def s[4:7] 20084; GFX900-NEXT: ;;#ASMEND 20085; GFX900-NEXT: ;;#ASMSTART 20086; GFX900-NEXT: ; def s[8:11] 20087; GFX900-NEXT: ;;#ASMEND 20088; GFX900-NEXT: s_lshr_b32 s4, s8, 16 20089; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 20090; GFX900-NEXT: ;;#ASMSTART 20091; GFX900-NEXT: ; use s8 20092; GFX900-NEXT: ;;#ASMEND 20093; GFX900-NEXT: s_setpc_b64 s[30:31] 20094; 20095; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_6: 20096; GFX90A: ; %bb.0: 20097; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20098; GFX90A-NEXT: ;;#ASMSTART 20099; GFX90A-NEXT: ; def s[4:7] 20100; GFX90A-NEXT: ;;#ASMEND 20101; GFX90A-NEXT: ;;#ASMSTART 20102; GFX90A-NEXT: ; def s[8:11] 20103; GFX90A-NEXT: ;;#ASMEND 20104; GFX90A-NEXT: s_lshr_b32 s4, s8, 16 20105; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 20106; GFX90A-NEXT: ;;#ASMSTART 20107; GFX90A-NEXT: ; use s8 20108; GFX90A-NEXT: ;;#ASMEND 20109; GFX90A-NEXT: s_setpc_b64 s[30:31] 20110; 20111; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_6: 20112; GFX940: ; %bb.0: 20113; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20114; GFX940-NEXT: ;;#ASMSTART 20115; GFX940-NEXT: ; def s[0:3] 20116; GFX940-NEXT: ;;#ASMEND 20117; GFX940-NEXT: ;;#ASMSTART 20118; GFX940-NEXT: ; def s[4:7] 20119; GFX940-NEXT: ;;#ASMEND 20120; GFX940-NEXT: s_lshr_b32 s0, s4, 16 20121; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 20122; GFX940-NEXT: ;;#ASMSTART 20123; GFX940-NEXT: ; use s8 20124; GFX940-NEXT: ;;#ASMEND 20125; GFX940-NEXT: s_setpc_b64 s[30:31] 20126 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20127 %vec1 = call <8 x i16> asm "; def $0", "=s"() 20128 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 6> 20129 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20130 ret void 20131} 20132 20133define void @s_shuffle_v2i16_v8i16__10_6() { 20134; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_6: 20135; GFX900: ; %bb.0: 20136; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20137; GFX900-NEXT: ;;#ASMSTART 20138; GFX900-NEXT: ; def s[8:11] 20139; GFX900-NEXT: ;;#ASMEND 20140; GFX900-NEXT: ;;#ASMSTART 20141; GFX900-NEXT: ; def s[4:7] 20142; GFX900-NEXT: ;;#ASMEND 20143; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s7 20144; GFX900-NEXT: ;;#ASMSTART 20145; GFX900-NEXT: ; use s8 20146; GFX900-NEXT: ;;#ASMEND 20147; GFX900-NEXT: s_setpc_b64 s[30:31] 20148; 20149; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_6: 20150; GFX90A: ; %bb.0: 20151; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20152; GFX90A-NEXT: ;;#ASMSTART 20153; GFX90A-NEXT: ; def s[8:11] 20154; GFX90A-NEXT: ;;#ASMEND 20155; GFX90A-NEXT: ;;#ASMSTART 20156; GFX90A-NEXT: ; def s[4:7] 20157; GFX90A-NEXT: ;;#ASMEND 20158; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s7 20159; GFX90A-NEXT: ;;#ASMSTART 20160; GFX90A-NEXT: ; use s8 20161; GFX90A-NEXT: ;;#ASMEND 20162; GFX90A-NEXT: s_setpc_b64 s[30:31] 20163; 20164; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_6: 20165; GFX940: ; %bb.0: 20166; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20167; GFX940-NEXT: ;;#ASMSTART 20168; GFX940-NEXT: ; def s[0:3] 20169; GFX940-NEXT: ;;#ASMEND 20170; GFX940-NEXT: ;;#ASMSTART 20171; GFX940-NEXT: ; def s[4:7] 20172; GFX940-NEXT: ;;#ASMEND 20173; GFX940-NEXT: s_pack_ll_b32_b16 s8, s5, s3 20174; GFX940-NEXT: ;;#ASMSTART 20175; GFX940-NEXT: ; use s8 20176; GFX940-NEXT: ;;#ASMEND 20177; GFX940-NEXT: s_setpc_b64 s[30:31] 20178 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20179 %vec1 = call <8 x i16> asm "; def $0", "=s"() 20180 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 6> 20181 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20182 ret void 20183} 20184 20185define void @s_shuffle_v2i16_v8i16__11_6() { 20186; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_6: 20187; GFX900: ; %bb.0: 20188; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20189; GFX900-NEXT: ;;#ASMSTART 20190; GFX900-NEXT: ; def s[4:7] 20191; GFX900-NEXT: ;;#ASMEND 20192; GFX900-NEXT: ;;#ASMSTART 20193; GFX900-NEXT: ; def s[8:11] 20194; GFX900-NEXT: ;;#ASMEND 20195; GFX900-NEXT: s_lshr_b32 s4, s9, 16 20196; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 20197; GFX900-NEXT: ;;#ASMSTART 20198; GFX900-NEXT: ; use s8 20199; GFX900-NEXT: ;;#ASMEND 20200; GFX900-NEXT: s_setpc_b64 s[30:31] 20201; 20202; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_6: 20203; GFX90A: ; %bb.0: 20204; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20205; GFX90A-NEXT: ;;#ASMSTART 20206; GFX90A-NEXT: ; def s[4:7] 20207; GFX90A-NEXT: ;;#ASMEND 20208; GFX90A-NEXT: ;;#ASMSTART 20209; GFX90A-NEXT: ; def s[8:11] 20210; GFX90A-NEXT: ;;#ASMEND 20211; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 20212; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 20213; GFX90A-NEXT: ;;#ASMSTART 20214; GFX90A-NEXT: ; use s8 20215; GFX90A-NEXT: ;;#ASMEND 20216; GFX90A-NEXT: s_setpc_b64 s[30:31] 20217; 20218; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_6: 20219; GFX940: ; %bb.0: 20220; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20221; GFX940-NEXT: ;;#ASMSTART 20222; GFX940-NEXT: ; def s[0:3] 20223; GFX940-NEXT: ;;#ASMEND 20224; GFX940-NEXT: ;;#ASMSTART 20225; GFX940-NEXT: ; def s[4:7] 20226; GFX940-NEXT: ;;#ASMEND 20227; GFX940-NEXT: s_lshr_b32 s0, s5, 16 20228; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 20229; GFX940-NEXT: ;;#ASMSTART 20230; GFX940-NEXT: ; use s8 20231; GFX940-NEXT: ;;#ASMEND 20232; GFX940-NEXT: s_setpc_b64 s[30:31] 20233 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20234 %vec1 = call <8 x i16> asm "; def $0", "=s"() 20235 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 6> 20236 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20237 ret void 20238} 20239 20240define void @s_shuffle_v2i16_v8i16__12_6() { 20241; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_6: 20242; GFX900: ; %bb.0: 20243; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20244; GFX900-NEXT: ;;#ASMSTART 20245; GFX900-NEXT: ; def s[8:11] 20246; GFX900-NEXT: ;;#ASMEND 20247; GFX900-NEXT: ;;#ASMSTART 20248; GFX900-NEXT: ; def s[4:7] 20249; GFX900-NEXT: ;;#ASMEND 20250; GFX900-NEXT: s_pack_ll_b32_b16 s8, s10, s7 20251; GFX900-NEXT: ;;#ASMSTART 20252; GFX900-NEXT: ; use s8 20253; GFX900-NEXT: ;;#ASMEND 20254; GFX900-NEXT: s_setpc_b64 s[30:31] 20255; 20256; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_6: 20257; GFX90A: ; %bb.0: 20258; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20259; GFX90A-NEXT: ;;#ASMSTART 20260; GFX90A-NEXT: ; def s[8:11] 20261; GFX90A-NEXT: ;;#ASMEND 20262; GFX90A-NEXT: ;;#ASMSTART 20263; GFX90A-NEXT: ; def s[4:7] 20264; GFX90A-NEXT: ;;#ASMEND 20265; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s10, s7 20266; GFX90A-NEXT: ;;#ASMSTART 20267; GFX90A-NEXT: ; use s8 20268; GFX90A-NEXT: ;;#ASMEND 20269; GFX90A-NEXT: s_setpc_b64 s[30:31] 20270; 20271; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_6: 20272; GFX940: ; %bb.0: 20273; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20274; GFX940-NEXT: ;;#ASMSTART 20275; GFX940-NEXT: ; def s[0:3] 20276; GFX940-NEXT: ;;#ASMEND 20277; GFX940-NEXT: ;;#ASMSTART 20278; GFX940-NEXT: ; def s[4:7] 20279; GFX940-NEXT: ;;#ASMEND 20280; GFX940-NEXT: s_pack_ll_b32_b16 s8, s6, s3 20281; GFX940-NEXT: ;;#ASMSTART 20282; GFX940-NEXT: ; use s8 20283; GFX940-NEXT: ;;#ASMEND 20284; GFX940-NEXT: s_setpc_b64 s[30:31] 20285 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20286 %vec1 = call <8 x i16> asm "; def $0", "=s"() 20287 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 6> 20288 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20289 ret void 20290} 20291 20292define void @s_shuffle_v2i16_v8i16__13_6() { 20293; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_6: 20294; GFX900: ; %bb.0: 20295; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20296; GFX900-NEXT: ;;#ASMSTART 20297; GFX900-NEXT: ; def s[4:7] 20298; GFX900-NEXT: ;;#ASMEND 20299; GFX900-NEXT: ;;#ASMSTART 20300; GFX900-NEXT: ; def s[8:11] 20301; GFX900-NEXT: ;;#ASMEND 20302; GFX900-NEXT: s_lshr_b32 s4, s10, 16 20303; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 20304; GFX900-NEXT: ;;#ASMSTART 20305; GFX900-NEXT: ; use s8 20306; GFX900-NEXT: ;;#ASMEND 20307; GFX900-NEXT: s_setpc_b64 s[30:31] 20308; 20309; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_6: 20310; GFX90A: ; %bb.0: 20311; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20312; GFX90A-NEXT: ;;#ASMSTART 20313; GFX90A-NEXT: ; def s[4:7] 20314; GFX90A-NEXT: ;;#ASMEND 20315; GFX90A-NEXT: ;;#ASMSTART 20316; GFX90A-NEXT: ; def s[8:11] 20317; GFX90A-NEXT: ;;#ASMEND 20318; GFX90A-NEXT: s_lshr_b32 s4, s10, 16 20319; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 20320; GFX90A-NEXT: ;;#ASMSTART 20321; GFX90A-NEXT: ; use s8 20322; GFX90A-NEXT: ;;#ASMEND 20323; GFX90A-NEXT: s_setpc_b64 s[30:31] 20324; 20325; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_6: 20326; GFX940: ; %bb.0: 20327; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20328; GFX940-NEXT: ;;#ASMSTART 20329; GFX940-NEXT: ; def s[0:3] 20330; GFX940-NEXT: ;;#ASMEND 20331; GFX940-NEXT: ;;#ASMSTART 20332; GFX940-NEXT: ; def s[4:7] 20333; GFX940-NEXT: ;;#ASMEND 20334; GFX940-NEXT: s_lshr_b32 s0, s6, 16 20335; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 20336; GFX940-NEXT: ;;#ASMSTART 20337; GFX940-NEXT: ; use s8 20338; GFX940-NEXT: ;;#ASMEND 20339; GFX940-NEXT: s_setpc_b64 s[30:31] 20340 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20341 %vec1 = call <8 x i16> asm "; def $0", "=s"() 20342 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 6> 20343 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20344 ret void 20345} 20346 20347define void @s_shuffle_v2i16_v8i16__14_6() { 20348; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_6: 20349; GFX900: ; %bb.0: 20350; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20351; GFX900-NEXT: ;;#ASMSTART 20352; GFX900-NEXT: ; def s[8:11] 20353; GFX900-NEXT: ;;#ASMEND 20354; GFX900-NEXT: ;;#ASMSTART 20355; GFX900-NEXT: ; def s[4:7] 20356; GFX900-NEXT: ;;#ASMEND 20357; GFX900-NEXT: s_pack_ll_b32_b16 s8, s11, s7 20358; GFX900-NEXT: ;;#ASMSTART 20359; GFX900-NEXT: ; use s8 20360; GFX900-NEXT: ;;#ASMEND 20361; GFX900-NEXT: s_setpc_b64 s[30:31] 20362; 20363; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_6: 20364; GFX90A: ; %bb.0: 20365; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20366; GFX90A-NEXT: ;;#ASMSTART 20367; GFX90A-NEXT: ; def s[8:11] 20368; GFX90A-NEXT: ;;#ASMEND 20369; GFX90A-NEXT: ;;#ASMSTART 20370; GFX90A-NEXT: ; def s[4:7] 20371; GFX90A-NEXT: ;;#ASMEND 20372; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s11, s7 20373; GFX90A-NEXT: ;;#ASMSTART 20374; GFX90A-NEXT: ; use s8 20375; GFX90A-NEXT: ;;#ASMEND 20376; GFX90A-NEXT: s_setpc_b64 s[30:31] 20377; 20378; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_6: 20379; GFX940: ; %bb.0: 20380; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20381; GFX940-NEXT: ;;#ASMSTART 20382; GFX940-NEXT: ; def s[0:3] 20383; GFX940-NEXT: ;;#ASMEND 20384; GFX940-NEXT: ;;#ASMSTART 20385; GFX940-NEXT: ; def s[4:7] 20386; GFX940-NEXT: ;;#ASMEND 20387; GFX940-NEXT: s_pack_ll_b32_b16 s8, s7, s3 20388; GFX940-NEXT: ;;#ASMSTART 20389; GFX940-NEXT: ; use s8 20390; GFX940-NEXT: ;;#ASMEND 20391; GFX940-NEXT: s_setpc_b64 s[30:31] 20392 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20393 %vec1 = call <8 x i16> asm "; def $0", "=s"() 20394 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 6> 20395 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20396 ret void 20397} 20398 20399define void @s_shuffle_v2i16_v8i16__u_7() { 20400; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_7: 20401; GFX900: ; %bb.0: 20402; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20403; GFX900-NEXT: ;;#ASMSTART 20404; GFX900-NEXT: ; def s[4:7] 20405; GFX900-NEXT: ;;#ASMEND 20406; GFX900-NEXT: s_mov_b32 s8, s7 20407; GFX900-NEXT: ;;#ASMSTART 20408; GFX900-NEXT: ; use s8 20409; GFX900-NEXT: ;;#ASMEND 20410; GFX900-NEXT: s_setpc_b64 s[30:31] 20411; 20412; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_7: 20413; GFX90A: ; %bb.0: 20414; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20415; GFX90A-NEXT: ;;#ASMSTART 20416; GFX90A-NEXT: ; def s[4:7] 20417; GFX90A-NEXT: ;;#ASMEND 20418; GFX90A-NEXT: s_mov_b32 s8, s7 20419; GFX90A-NEXT: ;;#ASMSTART 20420; GFX90A-NEXT: ; use s8 20421; GFX90A-NEXT: ;;#ASMEND 20422; GFX90A-NEXT: s_setpc_b64 s[30:31] 20423; 20424; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_7: 20425; GFX940: ; %bb.0: 20426; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20427; GFX940-NEXT: ;;#ASMSTART 20428; GFX940-NEXT: ; def s[0:3] 20429; GFX940-NEXT: ;;#ASMEND 20430; GFX940-NEXT: s_mov_b32 s8, s3 20431; GFX940-NEXT: ;;#ASMSTART 20432; GFX940-NEXT: ; use s8 20433; GFX940-NEXT: ;;#ASMEND 20434; GFX940-NEXT: s_setpc_b64 s[30:31] 20435 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20436 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 7> 20437 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20438 ret void 20439} 20440 20441define void @s_shuffle_v2i16_v8i16__0_7() { 20442; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_7: 20443; GFX900: ; %bb.0: 20444; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20445; GFX900-NEXT: ;;#ASMSTART 20446; GFX900-NEXT: ; def s[4:7] 20447; GFX900-NEXT: ;;#ASMEND 20448; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s7 20449; GFX900-NEXT: ;;#ASMSTART 20450; GFX900-NEXT: ; use s8 20451; GFX900-NEXT: ;;#ASMEND 20452; GFX900-NEXT: s_setpc_b64 s[30:31] 20453; 20454; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_7: 20455; GFX90A: ; %bb.0: 20456; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20457; GFX90A-NEXT: ;;#ASMSTART 20458; GFX90A-NEXT: ; def s[4:7] 20459; GFX90A-NEXT: ;;#ASMEND 20460; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s7 20461; GFX90A-NEXT: ;;#ASMSTART 20462; GFX90A-NEXT: ; use s8 20463; GFX90A-NEXT: ;;#ASMEND 20464; GFX90A-NEXT: s_setpc_b64 s[30:31] 20465; 20466; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_7: 20467; GFX940: ; %bb.0: 20468; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20469; GFX940-NEXT: ;;#ASMSTART 20470; GFX940-NEXT: ; def s[0:3] 20471; GFX940-NEXT: ;;#ASMEND 20472; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s3 20473; GFX940-NEXT: ;;#ASMSTART 20474; GFX940-NEXT: ; use s8 20475; GFX940-NEXT: ;;#ASMEND 20476; GFX940-NEXT: s_setpc_b64 s[30:31] 20477 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20478 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 7> 20479 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20480 ret void 20481} 20482 20483define void @s_shuffle_v2i16_v8i16__1_7() { 20484; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_7: 20485; GFX900: ; %bb.0: 20486; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20487; GFX900-NEXT: ;;#ASMSTART 20488; GFX900-NEXT: ; def s[4:7] 20489; GFX900-NEXT: ;;#ASMEND 20490; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s7 20491; GFX900-NEXT: ;;#ASMSTART 20492; GFX900-NEXT: ; use s8 20493; GFX900-NEXT: ;;#ASMEND 20494; GFX900-NEXT: s_setpc_b64 s[30:31] 20495; 20496; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_7: 20497; GFX90A: ; %bb.0: 20498; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20499; GFX90A-NEXT: ;;#ASMSTART 20500; GFX90A-NEXT: ; def s[4:7] 20501; GFX90A-NEXT: ;;#ASMEND 20502; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s7 20503; GFX90A-NEXT: ;;#ASMSTART 20504; GFX90A-NEXT: ; use s8 20505; GFX90A-NEXT: ;;#ASMEND 20506; GFX90A-NEXT: s_setpc_b64 s[30:31] 20507; 20508; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_7: 20509; GFX940: ; %bb.0: 20510; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20511; GFX940-NEXT: ;;#ASMSTART 20512; GFX940-NEXT: ; def s[0:3] 20513; GFX940-NEXT: ;;#ASMEND 20514; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s3 20515; GFX940-NEXT: ;;#ASMSTART 20516; GFX940-NEXT: ; use s8 20517; GFX940-NEXT: ;;#ASMEND 20518; GFX940-NEXT: s_setpc_b64 s[30:31] 20519 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20520 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 7> 20521 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20522 ret void 20523} 20524 20525define void @s_shuffle_v2i16_v8i16__2_7() { 20526; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_7: 20527; GFX900: ; %bb.0: 20528; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20529; GFX900-NEXT: ;;#ASMSTART 20530; GFX900-NEXT: ; def s[4:7] 20531; GFX900-NEXT: ;;#ASMEND 20532; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s7 20533; GFX900-NEXT: ;;#ASMSTART 20534; GFX900-NEXT: ; use s8 20535; GFX900-NEXT: ;;#ASMEND 20536; GFX900-NEXT: s_setpc_b64 s[30:31] 20537; 20538; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_7: 20539; GFX90A: ; %bb.0: 20540; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20541; GFX90A-NEXT: ;;#ASMSTART 20542; GFX90A-NEXT: ; def s[4:7] 20543; GFX90A-NEXT: ;;#ASMEND 20544; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s7 20545; GFX90A-NEXT: ;;#ASMSTART 20546; GFX90A-NEXT: ; use s8 20547; GFX90A-NEXT: ;;#ASMEND 20548; GFX90A-NEXT: s_setpc_b64 s[30:31] 20549; 20550; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_7: 20551; GFX940: ; %bb.0: 20552; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20553; GFX940-NEXT: ;;#ASMSTART 20554; GFX940-NEXT: ; def s[0:3] 20555; GFX940-NEXT: ;;#ASMEND 20556; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s3 20557; GFX940-NEXT: ;;#ASMSTART 20558; GFX940-NEXT: ; use s8 20559; GFX940-NEXT: ;;#ASMEND 20560; GFX940-NEXT: s_setpc_b64 s[30:31] 20561 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20562 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 7> 20563 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20564 ret void 20565} 20566 20567define void @s_shuffle_v2i16_v8i16__3_7() { 20568; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_7: 20569; GFX900: ; %bb.0: 20570; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20571; GFX900-NEXT: ;;#ASMSTART 20572; GFX900-NEXT: ; def s[4:7] 20573; GFX900-NEXT: ;;#ASMEND 20574; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s7 20575; GFX900-NEXT: ;;#ASMSTART 20576; GFX900-NEXT: ; use s8 20577; GFX900-NEXT: ;;#ASMEND 20578; GFX900-NEXT: s_setpc_b64 s[30:31] 20579; 20580; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_7: 20581; GFX90A: ; %bb.0: 20582; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20583; GFX90A-NEXT: ;;#ASMSTART 20584; GFX90A-NEXT: ; def s[4:7] 20585; GFX90A-NEXT: ;;#ASMEND 20586; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s7 20587; GFX90A-NEXT: ;;#ASMSTART 20588; GFX90A-NEXT: ; use s8 20589; GFX90A-NEXT: ;;#ASMEND 20590; GFX90A-NEXT: s_setpc_b64 s[30:31] 20591; 20592; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_7: 20593; GFX940: ; %bb.0: 20594; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20595; GFX940-NEXT: ;;#ASMSTART 20596; GFX940-NEXT: ; def s[0:3] 20597; GFX940-NEXT: ;;#ASMEND 20598; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s3 20599; GFX940-NEXT: ;;#ASMSTART 20600; GFX940-NEXT: ; use s8 20601; GFX940-NEXT: ;;#ASMEND 20602; GFX940-NEXT: s_setpc_b64 s[30:31] 20603 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20604 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 7> 20605 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20606 ret void 20607} 20608 20609define void @s_shuffle_v2i16_v8i16__4_7() { 20610; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_7: 20611; GFX900: ; %bb.0: 20612; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20613; GFX900-NEXT: ;;#ASMSTART 20614; GFX900-NEXT: ; def s[4:7] 20615; GFX900-NEXT: ;;#ASMEND 20616; GFX900-NEXT: s_pack_lh_b32_b16 s8, s6, s7 20617; GFX900-NEXT: ;;#ASMSTART 20618; GFX900-NEXT: ; use s8 20619; GFX900-NEXT: ;;#ASMEND 20620; GFX900-NEXT: s_setpc_b64 s[30:31] 20621; 20622; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_7: 20623; GFX90A: ; %bb.0: 20624; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20625; GFX90A-NEXT: ;;#ASMSTART 20626; GFX90A-NEXT: ; def s[4:7] 20627; GFX90A-NEXT: ;;#ASMEND 20628; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s6, s7 20629; GFX90A-NEXT: ;;#ASMSTART 20630; GFX90A-NEXT: ; use s8 20631; GFX90A-NEXT: ;;#ASMEND 20632; GFX90A-NEXT: s_setpc_b64 s[30:31] 20633; 20634; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_7: 20635; GFX940: ; %bb.0: 20636; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20637; GFX940-NEXT: ;;#ASMSTART 20638; GFX940-NEXT: ; def s[0:3] 20639; GFX940-NEXT: ;;#ASMEND 20640; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s3 20641; GFX940-NEXT: ;;#ASMSTART 20642; GFX940-NEXT: ; use s8 20643; GFX940-NEXT: ;;#ASMEND 20644; GFX940-NEXT: s_setpc_b64 s[30:31] 20645 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20646 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 7> 20647 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20648 ret void 20649} 20650 20651define void @s_shuffle_v2i16_v8i16__5_7() { 20652; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_7: 20653; GFX900: ; %bb.0: 20654; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20655; GFX900-NEXT: ;;#ASMSTART 20656; GFX900-NEXT: ; def s[4:7] 20657; GFX900-NEXT: ;;#ASMEND 20658; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s7 20659; GFX900-NEXT: ;;#ASMSTART 20660; GFX900-NEXT: ; use s8 20661; GFX900-NEXT: ;;#ASMEND 20662; GFX900-NEXT: s_setpc_b64 s[30:31] 20663; 20664; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_7: 20665; GFX90A: ; %bb.0: 20666; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20667; GFX90A-NEXT: ;;#ASMSTART 20668; GFX90A-NEXT: ; def s[4:7] 20669; GFX90A-NEXT: ;;#ASMEND 20670; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s7 20671; GFX90A-NEXT: ;;#ASMSTART 20672; GFX90A-NEXT: ; use s8 20673; GFX90A-NEXT: ;;#ASMEND 20674; GFX90A-NEXT: s_setpc_b64 s[30:31] 20675; 20676; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_7: 20677; GFX940: ; %bb.0: 20678; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20679; GFX940-NEXT: ;;#ASMSTART 20680; GFX940-NEXT: ; def s[0:3] 20681; GFX940-NEXT: ;;#ASMEND 20682; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s3 20683; GFX940-NEXT: ;;#ASMSTART 20684; GFX940-NEXT: ; use s8 20685; GFX940-NEXT: ;;#ASMEND 20686; GFX940-NEXT: s_setpc_b64 s[30:31] 20687 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20688 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 7> 20689 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20690 ret void 20691} 20692 20693define void @s_shuffle_v2i16_v8i16__6_7() { 20694; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_7: 20695; GFX900: ; %bb.0: 20696; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20697; GFX900-NEXT: ;;#ASMSTART 20698; GFX900-NEXT: ; def s[4:7] 20699; GFX900-NEXT: ;;#ASMEND 20700; GFX900-NEXT: s_mov_b32 s8, s7 20701; GFX900-NEXT: ;;#ASMSTART 20702; GFX900-NEXT: ; use s8 20703; GFX900-NEXT: ;;#ASMEND 20704; GFX900-NEXT: s_setpc_b64 s[30:31] 20705; 20706; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_7: 20707; GFX90A: ; %bb.0: 20708; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20709; GFX90A-NEXT: ;;#ASMSTART 20710; GFX90A-NEXT: ; def s[4:7] 20711; GFX90A-NEXT: ;;#ASMEND 20712; GFX90A-NEXT: s_mov_b32 s8, s7 20713; GFX90A-NEXT: ;;#ASMSTART 20714; GFX90A-NEXT: ; use s8 20715; GFX90A-NEXT: ;;#ASMEND 20716; GFX90A-NEXT: s_setpc_b64 s[30:31] 20717; 20718; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_7: 20719; GFX940: ; %bb.0: 20720; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20721; GFX940-NEXT: ;;#ASMSTART 20722; GFX940-NEXT: ; def s[0:3] 20723; GFX940-NEXT: ;;#ASMEND 20724; GFX940-NEXT: s_mov_b32 s8, s3 20725; GFX940-NEXT: ;;#ASMSTART 20726; GFX940-NEXT: ; use s8 20727; GFX940-NEXT: ;;#ASMEND 20728; GFX940-NEXT: s_setpc_b64 s[30:31] 20729 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20730 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 7> 20731 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20732 ret void 20733} 20734 20735define void @s_shuffle_v2i16_v8i16__7_7() { 20736; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_7: 20737; GFX900: ; %bb.0: 20738; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20739; GFX900-NEXT: ;;#ASMSTART 20740; GFX900-NEXT: ; def s[4:7] 20741; GFX900-NEXT: ;;#ASMEND 20742; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s7 20743; GFX900-NEXT: ;;#ASMSTART 20744; GFX900-NEXT: ; use s8 20745; GFX900-NEXT: ;;#ASMEND 20746; GFX900-NEXT: s_setpc_b64 s[30:31] 20747; 20748; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_7: 20749; GFX90A: ; %bb.0: 20750; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20751; GFX90A-NEXT: ;;#ASMSTART 20752; GFX90A-NEXT: ; def s[4:7] 20753; GFX90A-NEXT: ;;#ASMEND 20754; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s7 20755; GFX90A-NEXT: ;;#ASMSTART 20756; GFX90A-NEXT: ; use s8 20757; GFX90A-NEXT: ;;#ASMEND 20758; GFX90A-NEXT: s_setpc_b64 s[30:31] 20759; 20760; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_7: 20761; GFX940: ; %bb.0: 20762; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20763; GFX940-NEXT: ;;#ASMSTART 20764; GFX940-NEXT: ; def s[0:3] 20765; GFX940-NEXT: ;;#ASMEND 20766; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s3 20767; GFX940-NEXT: ;;#ASMSTART 20768; GFX940-NEXT: ; use s8 20769; GFX940-NEXT: ;;#ASMEND 20770; GFX940-NEXT: s_setpc_b64 s[30:31] 20771 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20772 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 7> 20773 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20774 ret void 20775} 20776 20777define void @s_shuffle_v2i16_v8i16__8_7() { 20778; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_7: 20779; GFX900: ; %bb.0: 20780; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20781; GFX900-NEXT: ;;#ASMSTART 20782; GFX900-NEXT: ; def s[4:7] 20783; GFX900-NEXT: ;;#ASMEND 20784; GFX900-NEXT: s_mov_b32 s8, s7 20785; GFX900-NEXT: ;;#ASMSTART 20786; GFX900-NEXT: ; use s8 20787; GFX900-NEXT: ;;#ASMEND 20788; GFX900-NEXT: s_setpc_b64 s[30:31] 20789; 20790; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_7: 20791; GFX90A: ; %bb.0: 20792; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20793; GFX90A-NEXT: ;;#ASMSTART 20794; GFX90A-NEXT: ; def s[4:7] 20795; GFX90A-NEXT: ;;#ASMEND 20796; GFX90A-NEXT: s_mov_b32 s8, s7 20797; GFX90A-NEXT: ;;#ASMSTART 20798; GFX90A-NEXT: ; use s8 20799; GFX90A-NEXT: ;;#ASMEND 20800; GFX90A-NEXT: s_setpc_b64 s[30:31] 20801; 20802; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_7: 20803; GFX940: ; %bb.0: 20804; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20805; GFX940-NEXT: ;;#ASMSTART 20806; GFX940-NEXT: ; def s[0:3] 20807; GFX940-NEXT: ;;#ASMEND 20808; GFX940-NEXT: s_mov_b32 s8, s3 20809; GFX940-NEXT: ;;#ASMSTART 20810; GFX940-NEXT: ; use s8 20811; GFX940-NEXT: ;;#ASMEND 20812; GFX940-NEXT: s_setpc_b64 s[30:31] 20813 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20814 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 7> 20815 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20816 ret void 20817} 20818 20819define void @s_shuffle_v2i16_v8i16__9_7() { 20820; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_7: 20821; GFX900: ; %bb.0: 20822; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20823; GFX900-NEXT: ;;#ASMSTART 20824; GFX900-NEXT: ; def s[8:11] 20825; GFX900-NEXT: ;;#ASMEND 20826; GFX900-NEXT: ;;#ASMSTART 20827; GFX900-NEXT: ; def s[4:7] 20828; GFX900-NEXT: ;;#ASMEND 20829; GFX900-NEXT: s_pack_hh_b32_b16 s8, s8, s7 20830; GFX900-NEXT: ;;#ASMSTART 20831; GFX900-NEXT: ; use s8 20832; GFX900-NEXT: ;;#ASMEND 20833; GFX900-NEXT: s_setpc_b64 s[30:31] 20834; 20835; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_7: 20836; GFX90A: ; %bb.0: 20837; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20838; GFX90A-NEXT: ;;#ASMSTART 20839; GFX90A-NEXT: ; def s[8:11] 20840; GFX90A-NEXT: ;;#ASMEND 20841; GFX90A-NEXT: ;;#ASMSTART 20842; GFX90A-NEXT: ; def s[4:7] 20843; GFX90A-NEXT: ;;#ASMEND 20844; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s8, s7 20845; GFX90A-NEXT: ;;#ASMSTART 20846; GFX90A-NEXT: ; use s8 20847; GFX90A-NEXT: ;;#ASMEND 20848; GFX90A-NEXT: s_setpc_b64 s[30:31] 20849; 20850; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_7: 20851; GFX940: ; %bb.0: 20852; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20853; GFX940-NEXT: ;;#ASMSTART 20854; GFX940-NEXT: ; def s[0:3] 20855; GFX940-NEXT: ;;#ASMEND 20856; GFX940-NEXT: ;;#ASMSTART 20857; GFX940-NEXT: ; def s[4:7] 20858; GFX940-NEXT: ;;#ASMEND 20859; GFX940-NEXT: s_pack_hh_b32_b16 s8, s4, s3 20860; GFX940-NEXT: ;;#ASMSTART 20861; GFX940-NEXT: ; use s8 20862; GFX940-NEXT: ;;#ASMEND 20863; GFX940-NEXT: s_setpc_b64 s[30:31] 20864 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20865 %vec1 = call <8 x i16> asm "; def $0", "=s"() 20866 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 7> 20867 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20868 ret void 20869} 20870 20871define void @s_shuffle_v2i16_v8i16__10_7() { 20872; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_7: 20873; GFX900: ; %bb.0: 20874; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20875; GFX900-NEXT: ;;#ASMSTART 20876; GFX900-NEXT: ; def s[8:11] 20877; GFX900-NEXT: ;;#ASMEND 20878; GFX900-NEXT: ;;#ASMSTART 20879; GFX900-NEXT: ; def s[4:7] 20880; GFX900-NEXT: ;;#ASMEND 20881; GFX900-NEXT: s_pack_lh_b32_b16 s8, s9, s7 20882; GFX900-NEXT: ;;#ASMSTART 20883; GFX900-NEXT: ; use s8 20884; GFX900-NEXT: ;;#ASMEND 20885; GFX900-NEXT: s_setpc_b64 s[30:31] 20886; 20887; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_7: 20888; GFX90A: ; %bb.0: 20889; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20890; GFX90A-NEXT: ;;#ASMSTART 20891; GFX90A-NEXT: ; def s[8:11] 20892; GFX90A-NEXT: ;;#ASMEND 20893; GFX90A-NEXT: ;;#ASMSTART 20894; GFX90A-NEXT: ; def s[4:7] 20895; GFX90A-NEXT: ;;#ASMEND 20896; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s9, s7 20897; GFX90A-NEXT: ;;#ASMSTART 20898; GFX90A-NEXT: ; use s8 20899; GFX90A-NEXT: ;;#ASMEND 20900; GFX90A-NEXT: s_setpc_b64 s[30:31] 20901; 20902; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_7: 20903; GFX940: ; %bb.0: 20904; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20905; GFX940-NEXT: ;;#ASMSTART 20906; GFX940-NEXT: ; def s[0:3] 20907; GFX940-NEXT: ;;#ASMEND 20908; GFX940-NEXT: ;;#ASMSTART 20909; GFX940-NEXT: ; def s[4:7] 20910; GFX940-NEXT: ;;#ASMEND 20911; GFX940-NEXT: s_pack_lh_b32_b16 s8, s5, s3 20912; GFX940-NEXT: ;;#ASMSTART 20913; GFX940-NEXT: ; use s8 20914; GFX940-NEXT: ;;#ASMEND 20915; GFX940-NEXT: s_setpc_b64 s[30:31] 20916 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20917 %vec1 = call <8 x i16> asm "; def $0", "=s"() 20918 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 7> 20919 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20920 ret void 20921} 20922 20923define void @s_shuffle_v2i16_v8i16__11_7() { 20924; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_7: 20925; GFX900: ; %bb.0: 20926; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20927; GFX900-NEXT: ;;#ASMSTART 20928; GFX900-NEXT: ; def s[8:11] 20929; GFX900-NEXT: ;;#ASMEND 20930; GFX900-NEXT: ;;#ASMSTART 20931; GFX900-NEXT: ; def s[4:7] 20932; GFX900-NEXT: ;;#ASMEND 20933; GFX900-NEXT: s_pack_hh_b32_b16 s8, s9, s7 20934; GFX900-NEXT: ;;#ASMSTART 20935; GFX900-NEXT: ; use s8 20936; GFX900-NEXT: ;;#ASMEND 20937; GFX900-NEXT: s_setpc_b64 s[30:31] 20938; 20939; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_7: 20940; GFX90A: ; %bb.0: 20941; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20942; GFX90A-NEXT: ;;#ASMSTART 20943; GFX90A-NEXT: ; def s[8:11] 20944; GFX90A-NEXT: ;;#ASMEND 20945; GFX90A-NEXT: ;;#ASMSTART 20946; GFX90A-NEXT: ; def s[4:7] 20947; GFX90A-NEXT: ;;#ASMEND 20948; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s9, s7 20949; GFX90A-NEXT: ;;#ASMSTART 20950; GFX90A-NEXT: ; use s8 20951; GFX90A-NEXT: ;;#ASMEND 20952; GFX90A-NEXT: s_setpc_b64 s[30:31] 20953; 20954; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_7: 20955; GFX940: ; %bb.0: 20956; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20957; GFX940-NEXT: ;;#ASMSTART 20958; GFX940-NEXT: ; def s[0:3] 20959; GFX940-NEXT: ;;#ASMEND 20960; GFX940-NEXT: ;;#ASMSTART 20961; GFX940-NEXT: ; def s[4:7] 20962; GFX940-NEXT: ;;#ASMEND 20963; GFX940-NEXT: s_pack_hh_b32_b16 s8, s5, s3 20964; GFX940-NEXT: ;;#ASMSTART 20965; GFX940-NEXT: ; use s8 20966; GFX940-NEXT: ;;#ASMEND 20967; GFX940-NEXT: s_setpc_b64 s[30:31] 20968 %vec0 = call <8 x i16> asm "; def $0", "=s"() 20969 %vec1 = call <8 x i16> asm "; def $0", "=s"() 20970 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 7> 20971 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 20972 ret void 20973} 20974 20975define void @s_shuffle_v2i16_v8i16__12_7() { 20976; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_7: 20977; GFX900: ; %bb.0: 20978; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20979; GFX900-NEXT: ;;#ASMSTART 20980; GFX900-NEXT: ; def s[8:11] 20981; GFX900-NEXT: ;;#ASMEND 20982; GFX900-NEXT: ;;#ASMSTART 20983; GFX900-NEXT: ; def s[4:7] 20984; GFX900-NEXT: ;;#ASMEND 20985; GFX900-NEXT: s_pack_lh_b32_b16 s8, s10, s7 20986; GFX900-NEXT: ;;#ASMSTART 20987; GFX900-NEXT: ; use s8 20988; GFX900-NEXT: ;;#ASMEND 20989; GFX900-NEXT: s_setpc_b64 s[30:31] 20990; 20991; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_7: 20992; GFX90A: ; %bb.0: 20993; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20994; GFX90A-NEXT: ;;#ASMSTART 20995; GFX90A-NEXT: ; def s[8:11] 20996; GFX90A-NEXT: ;;#ASMEND 20997; GFX90A-NEXT: ;;#ASMSTART 20998; GFX90A-NEXT: ; def s[4:7] 20999; GFX90A-NEXT: ;;#ASMEND 21000; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s10, s7 21001; GFX90A-NEXT: ;;#ASMSTART 21002; GFX90A-NEXT: ; use s8 21003; GFX90A-NEXT: ;;#ASMEND 21004; GFX90A-NEXT: s_setpc_b64 s[30:31] 21005; 21006; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_7: 21007; GFX940: ; %bb.0: 21008; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21009; GFX940-NEXT: ;;#ASMSTART 21010; GFX940-NEXT: ; def s[0:3] 21011; GFX940-NEXT: ;;#ASMEND 21012; GFX940-NEXT: ;;#ASMSTART 21013; GFX940-NEXT: ; def s[4:7] 21014; GFX940-NEXT: ;;#ASMEND 21015; GFX940-NEXT: s_pack_lh_b32_b16 s8, s6, s3 21016; GFX940-NEXT: ;;#ASMSTART 21017; GFX940-NEXT: ; use s8 21018; GFX940-NEXT: ;;#ASMEND 21019; GFX940-NEXT: s_setpc_b64 s[30:31] 21020 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21021 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21022 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 7> 21023 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21024 ret void 21025} 21026 21027define void @s_shuffle_v2i16_v8i16__13_7() { 21028; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_7: 21029; GFX900: ; %bb.0: 21030; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21031; GFX900-NEXT: ;;#ASMSTART 21032; GFX900-NEXT: ; def s[8:11] 21033; GFX900-NEXT: ;;#ASMEND 21034; GFX900-NEXT: ;;#ASMSTART 21035; GFX900-NEXT: ; def s[4:7] 21036; GFX900-NEXT: ;;#ASMEND 21037; GFX900-NEXT: s_pack_hh_b32_b16 s8, s10, s7 21038; GFX900-NEXT: ;;#ASMSTART 21039; GFX900-NEXT: ; use s8 21040; GFX900-NEXT: ;;#ASMEND 21041; GFX900-NEXT: s_setpc_b64 s[30:31] 21042; 21043; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_7: 21044; GFX90A: ; %bb.0: 21045; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21046; GFX90A-NEXT: ;;#ASMSTART 21047; GFX90A-NEXT: ; def s[8:11] 21048; GFX90A-NEXT: ;;#ASMEND 21049; GFX90A-NEXT: ;;#ASMSTART 21050; GFX90A-NEXT: ; def s[4:7] 21051; GFX90A-NEXT: ;;#ASMEND 21052; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s10, s7 21053; GFX90A-NEXT: ;;#ASMSTART 21054; GFX90A-NEXT: ; use s8 21055; GFX90A-NEXT: ;;#ASMEND 21056; GFX90A-NEXT: s_setpc_b64 s[30:31] 21057; 21058; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_7: 21059; GFX940: ; %bb.0: 21060; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21061; GFX940-NEXT: ;;#ASMSTART 21062; GFX940-NEXT: ; def s[0:3] 21063; GFX940-NEXT: ;;#ASMEND 21064; GFX940-NEXT: ;;#ASMSTART 21065; GFX940-NEXT: ; def s[4:7] 21066; GFX940-NEXT: ;;#ASMEND 21067; GFX940-NEXT: s_pack_hh_b32_b16 s8, s6, s3 21068; GFX940-NEXT: ;;#ASMSTART 21069; GFX940-NEXT: ; use s8 21070; GFX940-NEXT: ;;#ASMEND 21071; GFX940-NEXT: s_setpc_b64 s[30:31] 21072 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21073 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21074 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 7> 21075 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21076 ret void 21077} 21078 21079define void @s_shuffle_v2i16_v8i16__14_7() { 21080; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_7: 21081; GFX900: ; %bb.0: 21082; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21083; GFX900-NEXT: ;;#ASMSTART 21084; GFX900-NEXT: ; def s[8:11] 21085; GFX900-NEXT: ;;#ASMEND 21086; GFX900-NEXT: ;;#ASMSTART 21087; GFX900-NEXT: ; def s[4:7] 21088; GFX900-NEXT: ;;#ASMEND 21089; GFX900-NEXT: s_pack_lh_b32_b16 s8, s11, s7 21090; GFX900-NEXT: ;;#ASMSTART 21091; GFX900-NEXT: ; use s8 21092; GFX900-NEXT: ;;#ASMEND 21093; GFX900-NEXT: s_setpc_b64 s[30:31] 21094; 21095; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_7: 21096; GFX90A: ; %bb.0: 21097; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21098; GFX90A-NEXT: ;;#ASMSTART 21099; GFX90A-NEXT: ; def s[8:11] 21100; GFX90A-NEXT: ;;#ASMEND 21101; GFX90A-NEXT: ;;#ASMSTART 21102; GFX90A-NEXT: ; def s[4:7] 21103; GFX90A-NEXT: ;;#ASMEND 21104; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s11, s7 21105; GFX90A-NEXT: ;;#ASMSTART 21106; GFX90A-NEXT: ; use s8 21107; GFX90A-NEXT: ;;#ASMEND 21108; GFX90A-NEXT: s_setpc_b64 s[30:31] 21109; 21110; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_7: 21111; GFX940: ; %bb.0: 21112; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21113; GFX940-NEXT: ;;#ASMSTART 21114; GFX940-NEXT: ; def s[0:3] 21115; GFX940-NEXT: ;;#ASMEND 21116; GFX940-NEXT: ;;#ASMSTART 21117; GFX940-NEXT: ; def s[4:7] 21118; GFX940-NEXT: ;;#ASMEND 21119; GFX940-NEXT: s_pack_lh_b32_b16 s8, s7, s3 21120; GFX940-NEXT: ;;#ASMSTART 21121; GFX940-NEXT: ; use s8 21122; GFX940-NEXT: ;;#ASMEND 21123; GFX940-NEXT: s_setpc_b64 s[30:31] 21124 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21125 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21126 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 7> 21127 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21128 ret void 21129} 21130 21131define void @s_shuffle_v2i16_v8i16__u_8() { 21132; GFX9-LABEL: s_shuffle_v2i16_v8i16__u_8: 21133; GFX9: ; %bb.0: 21134; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21135; GFX9-NEXT: ;;#ASMSTART 21136; GFX9-NEXT: ; use s8 21137; GFX9-NEXT: ;;#ASMEND 21138; GFX9-NEXT: s_setpc_b64 s[30:31] 21139 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21140 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 poison, i32 8> 21141 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21142 ret void 21143} 21144 21145define void @s_shuffle_v2i16_v8i16__0_8() { 21146; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_8: 21147; GFX900: ; %bb.0: 21148; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21149; GFX900-NEXT: ;;#ASMSTART 21150; GFX900-NEXT: ; def s[8:11] 21151; GFX900-NEXT: ;;#ASMEND 21152; GFX900-NEXT: ;;#ASMSTART 21153; GFX900-NEXT: ; use s8 21154; GFX900-NEXT: ;;#ASMEND 21155; GFX900-NEXT: s_setpc_b64 s[30:31] 21156; 21157; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_8: 21158; GFX90A: ; %bb.0: 21159; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21160; GFX90A-NEXT: ;;#ASMSTART 21161; GFX90A-NEXT: ; def s[8:11] 21162; GFX90A-NEXT: ;;#ASMEND 21163; GFX90A-NEXT: ;;#ASMSTART 21164; GFX90A-NEXT: ; use s8 21165; GFX90A-NEXT: ;;#ASMEND 21166; GFX90A-NEXT: s_setpc_b64 s[30:31] 21167; 21168; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_8: 21169; GFX940: ; %bb.0: 21170; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21171; GFX940-NEXT: ;;#ASMSTART 21172; GFX940-NEXT: ; def s[8:11] 21173; GFX940-NEXT: ;;#ASMEND 21174; GFX940-NEXT: s_nop 0 21175; GFX940-NEXT: ;;#ASMSTART 21176; GFX940-NEXT: ; use s8 21177; GFX940-NEXT: ;;#ASMEND 21178; GFX940-NEXT: s_setpc_b64 s[30:31] 21179 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21180 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 0, i32 8> 21181 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21182 ret void 21183} 21184 21185define void @s_shuffle_v2i16_v8i16__1_8() { 21186; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_8: 21187; GFX900: ; %bb.0: 21188; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21189; GFX900-NEXT: ;;#ASMSTART 21190; GFX900-NEXT: ; def s[4:7] 21191; GFX900-NEXT: ;;#ASMEND 21192; GFX900-NEXT: s_lshr_b32 s8, s4, 16 21193; GFX900-NEXT: ;;#ASMSTART 21194; GFX900-NEXT: ; use s8 21195; GFX900-NEXT: ;;#ASMEND 21196; GFX900-NEXT: s_setpc_b64 s[30:31] 21197; 21198; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_8: 21199; GFX90A: ; %bb.0: 21200; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21201; GFX90A-NEXT: ;;#ASMSTART 21202; GFX90A-NEXT: ; def s[4:7] 21203; GFX90A-NEXT: ;;#ASMEND 21204; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 21205; GFX90A-NEXT: ;;#ASMSTART 21206; GFX90A-NEXT: ; use s8 21207; GFX90A-NEXT: ;;#ASMEND 21208; GFX90A-NEXT: s_setpc_b64 s[30:31] 21209; 21210; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_8: 21211; GFX940: ; %bb.0: 21212; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21213; GFX940-NEXT: ;;#ASMSTART 21214; GFX940-NEXT: ; def s[0:3] 21215; GFX940-NEXT: ;;#ASMEND 21216; GFX940-NEXT: s_lshr_b32 s8, s0, 16 21217; GFX940-NEXT: ;;#ASMSTART 21218; GFX940-NEXT: ; use s8 21219; GFX940-NEXT: ;;#ASMEND 21220; GFX940-NEXT: s_setpc_b64 s[30:31] 21221 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21222 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 1, i32 8> 21223 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21224 ret void 21225} 21226 21227define void @s_shuffle_v2i16_v8i16__2_8() { 21228; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_8: 21229; GFX900: ; %bb.0: 21230; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21231; GFX900-NEXT: ;;#ASMSTART 21232; GFX900-NEXT: ; def s[4:7] 21233; GFX900-NEXT: ;;#ASMEND 21234; GFX900-NEXT: s_mov_b32 s8, s5 21235; GFX900-NEXT: ;;#ASMSTART 21236; GFX900-NEXT: ; use s8 21237; GFX900-NEXT: ;;#ASMEND 21238; GFX900-NEXT: s_setpc_b64 s[30:31] 21239; 21240; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_8: 21241; GFX90A: ; %bb.0: 21242; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21243; GFX90A-NEXT: ;;#ASMSTART 21244; GFX90A-NEXT: ; def s[4:7] 21245; GFX90A-NEXT: ;;#ASMEND 21246; GFX90A-NEXT: s_mov_b32 s8, s5 21247; GFX90A-NEXT: ;;#ASMSTART 21248; GFX90A-NEXT: ; use s8 21249; GFX90A-NEXT: ;;#ASMEND 21250; GFX90A-NEXT: s_setpc_b64 s[30:31] 21251; 21252; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_8: 21253; GFX940: ; %bb.0: 21254; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21255; GFX940-NEXT: ;;#ASMSTART 21256; GFX940-NEXT: ; def s[0:3] 21257; GFX940-NEXT: ;;#ASMEND 21258; GFX940-NEXT: s_mov_b32 s8, s1 21259; GFX940-NEXT: ;;#ASMSTART 21260; GFX940-NEXT: ; use s8 21261; GFX940-NEXT: ;;#ASMEND 21262; GFX940-NEXT: s_setpc_b64 s[30:31] 21263 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21264 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 2, i32 8> 21265 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21266 ret void 21267} 21268 21269define void @s_shuffle_v2i16_v8i16__3_8() { 21270; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_8: 21271; GFX900: ; %bb.0: 21272; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21273; GFX900-NEXT: ;;#ASMSTART 21274; GFX900-NEXT: ; def s[4:7] 21275; GFX900-NEXT: ;;#ASMEND 21276; GFX900-NEXT: s_lshr_b32 s8, s5, 16 21277; GFX900-NEXT: ;;#ASMSTART 21278; GFX900-NEXT: ; use s8 21279; GFX900-NEXT: ;;#ASMEND 21280; GFX900-NEXT: s_setpc_b64 s[30:31] 21281; 21282; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_8: 21283; GFX90A: ; %bb.0: 21284; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21285; GFX90A-NEXT: ;;#ASMSTART 21286; GFX90A-NEXT: ; def s[4:7] 21287; GFX90A-NEXT: ;;#ASMEND 21288; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 21289; GFX90A-NEXT: ;;#ASMSTART 21290; GFX90A-NEXT: ; use s8 21291; GFX90A-NEXT: ;;#ASMEND 21292; GFX90A-NEXT: s_setpc_b64 s[30:31] 21293; 21294; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_8: 21295; GFX940: ; %bb.0: 21296; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21297; GFX940-NEXT: ;;#ASMSTART 21298; GFX940-NEXT: ; def s[0:3] 21299; GFX940-NEXT: ;;#ASMEND 21300; GFX940-NEXT: s_lshr_b32 s8, s1, 16 21301; GFX940-NEXT: ;;#ASMSTART 21302; GFX940-NEXT: ; use s8 21303; GFX940-NEXT: ;;#ASMEND 21304; GFX940-NEXT: s_setpc_b64 s[30:31] 21305 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21306 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 3, i32 8> 21307 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21308 ret void 21309} 21310 21311define void @s_shuffle_v2i16_v8i16__4_8() { 21312; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_8: 21313; GFX900: ; %bb.0: 21314; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21315; GFX900-NEXT: ;;#ASMSTART 21316; GFX900-NEXT: ; def s[4:7] 21317; GFX900-NEXT: ;;#ASMEND 21318; GFX900-NEXT: s_mov_b32 s8, s6 21319; GFX900-NEXT: ;;#ASMSTART 21320; GFX900-NEXT: ; use s8 21321; GFX900-NEXT: ;;#ASMEND 21322; GFX900-NEXT: s_setpc_b64 s[30:31] 21323; 21324; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_8: 21325; GFX90A: ; %bb.0: 21326; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21327; GFX90A-NEXT: ;;#ASMSTART 21328; GFX90A-NEXT: ; def s[4:7] 21329; GFX90A-NEXT: ;;#ASMEND 21330; GFX90A-NEXT: s_mov_b32 s8, s6 21331; GFX90A-NEXT: ;;#ASMSTART 21332; GFX90A-NEXT: ; use s8 21333; GFX90A-NEXT: ;;#ASMEND 21334; GFX90A-NEXT: s_setpc_b64 s[30:31] 21335; 21336; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_8: 21337; GFX940: ; %bb.0: 21338; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21339; GFX940-NEXT: ;;#ASMSTART 21340; GFX940-NEXT: ; def s[0:3] 21341; GFX940-NEXT: ;;#ASMEND 21342; GFX940-NEXT: s_mov_b32 s8, s2 21343; GFX940-NEXT: ;;#ASMSTART 21344; GFX940-NEXT: ; use s8 21345; GFX940-NEXT: ;;#ASMEND 21346; GFX940-NEXT: s_setpc_b64 s[30:31] 21347 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21348 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 4, i32 8> 21349 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21350 ret void 21351} 21352 21353define void @s_shuffle_v2i16_v8i16__5_8() { 21354; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_8: 21355; GFX900: ; %bb.0: 21356; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21357; GFX900-NEXT: ;;#ASMSTART 21358; GFX900-NEXT: ; def s[4:7] 21359; GFX900-NEXT: ;;#ASMEND 21360; GFX900-NEXT: s_lshr_b32 s8, s6, 16 21361; GFX900-NEXT: ;;#ASMSTART 21362; GFX900-NEXT: ; use s8 21363; GFX900-NEXT: ;;#ASMEND 21364; GFX900-NEXT: s_setpc_b64 s[30:31] 21365; 21366; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_8: 21367; GFX90A: ; %bb.0: 21368; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21369; GFX90A-NEXT: ;;#ASMSTART 21370; GFX90A-NEXT: ; def s[4:7] 21371; GFX90A-NEXT: ;;#ASMEND 21372; GFX90A-NEXT: s_lshr_b32 s8, s6, 16 21373; GFX90A-NEXT: ;;#ASMSTART 21374; GFX90A-NEXT: ; use s8 21375; GFX90A-NEXT: ;;#ASMEND 21376; GFX90A-NEXT: s_setpc_b64 s[30:31] 21377; 21378; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_8: 21379; GFX940: ; %bb.0: 21380; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21381; GFX940-NEXT: ;;#ASMSTART 21382; GFX940-NEXT: ; def s[0:3] 21383; GFX940-NEXT: ;;#ASMEND 21384; GFX940-NEXT: s_lshr_b32 s8, s2, 16 21385; GFX940-NEXT: ;;#ASMSTART 21386; GFX940-NEXT: ; use s8 21387; GFX940-NEXT: ;;#ASMEND 21388; GFX940-NEXT: s_setpc_b64 s[30:31] 21389 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21390 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 5, i32 8> 21391 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21392 ret void 21393} 21394 21395define void @s_shuffle_v2i16_v8i16__6_8() { 21396; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_8: 21397; GFX900: ; %bb.0: 21398; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21399; GFX900-NEXT: ;;#ASMSTART 21400; GFX900-NEXT: ; def s[4:7] 21401; GFX900-NEXT: ;;#ASMEND 21402; GFX900-NEXT: s_mov_b32 s8, s7 21403; GFX900-NEXT: ;;#ASMSTART 21404; GFX900-NEXT: ; use s8 21405; GFX900-NEXT: ;;#ASMEND 21406; GFX900-NEXT: s_setpc_b64 s[30:31] 21407; 21408; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_8: 21409; GFX90A: ; %bb.0: 21410; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21411; GFX90A-NEXT: ;;#ASMSTART 21412; GFX90A-NEXT: ; def s[4:7] 21413; GFX90A-NEXT: ;;#ASMEND 21414; GFX90A-NEXT: s_mov_b32 s8, s7 21415; GFX90A-NEXT: ;;#ASMSTART 21416; GFX90A-NEXT: ; use s8 21417; GFX90A-NEXT: ;;#ASMEND 21418; GFX90A-NEXT: s_setpc_b64 s[30:31] 21419; 21420; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_8: 21421; GFX940: ; %bb.0: 21422; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21423; GFX940-NEXT: ;;#ASMSTART 21424; GFX940-NEXT: ; def s[0:3] 21425; GFX940-NEXT: ;;#ASMEND 21426; GFX940-NEXT: s_mov_b32 s8, s3 21427; GFX940-NEXT: ;;#ASMSTART 21428; GFX940-NEXT: ; use s8 21429; GFX940-NEXT: ;;#ASMEND 21430; GFX940-NEXT: s_setpc_b64 s[30:31] 21431 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21432 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 6, i32 8> 21433 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21434 ret void 21435} 21436 21437define void @s_shuffle_v2i16_v8i16__7_8() { 21438; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_8: 21439; GFX900: ; %bb.0: 21440; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21441; GFX900-NEXT: ;;#ASMSTART 21442; GFX900-NEXT: ; def s[4:7] 21443; GFX900-NEXT: ;;#ASMEND 21444; GFX900-NEXT: s_lshr_b32 s8, s7, 16 21445; GFX900-NEXT: ;;#ASMSTART 21446; GFX900-NEXT: ; use s8 21447; GFX900-NEXT: ;;#ASMEND 21448; GFX900-NEXT: s_setpc_b64 s[30:31] 21449; 21450; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_8: 21451; GFX90A: ; %bb.0: 21452; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21453; GFX90A-NEXT: ;;#ASMSTART 21454; GFX90A-NEXT: ; def s[4:7] 21455; GFX90A-NEXT: ;;#ASMEND 21456; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 21457; GFX90A-NEXT: ;;#ASMSTART 21458; GFX90A-NEXT: ; use s8 21459; GFX90A-NEXT: ;;#ASMEND 21460; GFX90A-NEXT: s_setpc_b64 s[30:31] 21461; 21462; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_8: 21463; GFX940: ; %bb.0: 21464; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21465; GFX940-NEXT: ;;#ASMSTART 21466; GFX940-NEXT: ; def s[0:3] 21467; GFX940-NEXT: ;;#ASMEND 21468; GFX940-NEXT: s_lshr_b32 s8, s3, 16 21469; GFX940-NEXT: ;;#ASMSTART 21470; GFX940-NEXT: ; use s8 21471; GFX940-NEXT: ;;#ASMEND 21472; GFX940-NEXT: s_setpc_b64 s[30:31] 21473 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21474 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 7, i32 8> 21475 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21476 ret void 21477} 21478 21479define void @s_shuffle_v2i16_v8i16__8_8() { 21480; GFX9-LABEL: s_shuffle_v2i16_v8i16__8_8: 21481; GFX9: ; %bb.0: 21482; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21483; GFX9-NEXT: ;;#ASMSTART 21484; GFX9-NEXT: ; use s8 21485; GFX9-NEXT: ;;#ASMEND 21486; GFX9-NEXT: s_setpc_b64 s[30:31] 21487 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21488 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> poison, <2 x i32> <i32 8, i32 8> 21489 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21490 ret void 21491} 21492 21493define void @s_shuffle_v2i16_v8i16__9_8() { 21494; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_8: 21495; GFX900: ; %bb.0: 21496; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21497; GFX900-NEXT: ;;#ASMSTART 21498; GFX900-NEXT: ; def s[4:7] 21499; GFX900-NEXT: ;;#ASMEND 21500; GFX900-NEXT: s_lshr_b32 s5, s4, 16 21501; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 21502; GFX900-NEXT: ;;#ASMSTART 21503; GFX900-NEXT: ; use s8 21504; GFX900-NEXT: ;;#ASMEND 21505; GFX900-NEXT: s_setpc_b64 s[30:31] 21506; 21507; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_8: 21508; GFX90A: ; %bb.0: 21509; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21510; GFX90A-NEXT: ;;#ASMSTART 21511; GFX90A-NEXT: ; def s[4:7] 21512; GFX90A-NEXT: ;;#ASMEND 21513; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 21514; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 21515; GFX90A-NEXT: ;;#ASMSTART 21516; GFX90A-NEXT: ; use s8 21517; GFX90A-NEXT: ;;#ASMEND 21518; GFX90A-NEXT: s_setpc_b64 s[30:31] 21519; 21520; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_8: 21521; GFX940: ; %bb.0: 21522; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21523; GFX940-NEXT: ;;#ASMSTART 21524; GFX940-NEXT: ; def s[0:3] 21525; GFX940-NEXT: ;;#ASMEND 21526; GFX940-NEXT: s_lshr_b32 s1, s0, 16 21527; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 21528; GFX940-NEXT: ;;#ASMSTART 21529; GFX940-NEXT: ; use s8 21530; GFX940-NEXT: ;;#ASMEND 21531; GFX940-NEXT: s_setpc_b64 s[30:31] 21532 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21533 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21534 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 8> 21535 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21536 ret void 21537} 21538 21539define void @s_shuffle_v2i16_v8i16__10_8() { 21540; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_8: 21541; GFX900: ; %bb.0: 21542; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21543; GFX900-NEXT: ;;#ASMSTART 21544; GFX900-NEXT: ; def s[4:7] 21545; GFX900-NEXT: ;;#ASMEND 21546; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 21547; GFX900-NEXT: ;;#ASMSTART 21548; GFX900-NEXT: ; use s8 21549; GFX900-NEXT: ;;#ASMEND 21550; GFX900-NEXT: s_setpc_b64 s[30:31] 21551; 21552; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_8: 21553; GFX90A: ; %bb.0: 21554; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21555; GFX90A-NEXT: ;;#ASMSTART 21556; GFX90A-NEXT: ; def s[4:7] 21557; GFX90A-NEXT: ;;#ASMEND 21558; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 21559; GFX90A-NEXT: ;;#ASMSTART 21560; GFX90A-NEXT: ; use s8 21561; GFX90A-NEXT: ;;#ASMEND 21562; GFX90A-NEXT: s_setpc_b64 s[30:31] 21563; 21564; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_8: 21565; GFX940: ; %bb.0: 21566; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21567; GFX940-NEXT: ;;#ASMSTART 21568; GFX940-NEXT: ; def s[0:3] 21569; GFX940-NEXT: ;;#ASMEND 21570; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 21571; GFX940-NEXT: ;;#ASMSTART 21572; GFX940-NEXT: ; use s8 21573; GFX940-NEXT: ;;#ASMEND 21574; GFX940-NEXT: s_setpc_b64 s[30:31] 21575 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21576 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21577 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 8> 21578 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21579 ret void 21580} 21581 21582define void @s_shuffle_v2i16_v8i16__11_8() { 21583; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_8: 21584; GFX900: ; %bb.0: 21585; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21586; GFX900-NEXT: ;;#ASMSTART 21587; GFX900-NEXT: ; def s[4:7] 21588; GFX900-NEXT: ;;#ASMEND 21589; GFX900-NEXT: s_lshr_b32 s5, s5, 16 21590; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 21591; GFX900-NEXT: ;;#ASMSTART 21592; GFX900-NEXT: ; use s8 21593; GFX900-NEXT: ;;#ASMEND 21594; GFX900-NEXT: s_setpc_b64 s[30:31] 21595; 21596; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_8: 21597; GFX90A: ; %bb.0: 21598; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21599; GFX90A-NEXT: ;;#ASMSTART 21600; GFX90A-NEXT: ; def s[4:7] 21601; GFX90A-NEXT: ;;#ASMEND 21602; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 21603; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 21604; GFX90A-NEXT: ;;#ASMSTART 21605; GFX90A-NEXT: ; use s8 21606; GFX90A-NEXT: ;;#ASMEND 21607; GFX90A-NEXT: s_setpc_b64 s[30:31] 21608; 21609; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_8: 21610; GFX940: ; %bb.0: 21611; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21612; GFX940-NEXT: ;;#ASMSTART 21613; GFX940-NEXT: ; def s[0:3] 21614; GFX940-NEXT: ;;#ASMEND 21615; GFX940-NEXT: s_lshr_b32 s1, s1, 16 21616; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 21617; GFX940-NEXT: ;;#ASMSTART 21618; GFX940-NEXT: ; use s8 21619; GFX940-NEXT: ;;#ASMEND 21620; GFX940-NEXT: s_setpc_b64 s[30:31] 21621 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21622 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21623 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 8> 21624 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21625 ret void 21626} 21627 21628define void @s_shuffle_v2i16_v8i16__12_8() { 21629; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_8: 21630; GFX900: ; %bb.0: 21631; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21632; GFX900-NEXT: ;;#ASMSTART 21633; GFX900-NEXT: ; def s[4:7] 21634; GFX900-NEXT: ;;#ASMEND 21635; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4 21636; GFX900-NEXT: ;;#ASMSTART 21637; GFX900-NEXT: ; use s8 21638; GFX900-NEXT: ;;#ASMEND 21639; GFX900-NEXT: s_setpc_b64 s[30:31] 21640; 21641; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_8: 21642; GFX90A: ; %bb.0: 21643; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21644; GFX90A-NEXT: ;;#ASMSTART 21645; GFX90A-NEXT: ; def s[4:7] 21646; GFX90A-NEXT: ;;#ASMEND 21647; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4 21648; GFX90A-NEXT: ;;#ASMSTART 21649; GFX90A-NEXT: ; use s8 21650; GFX90A-NEXT: ;;#ASMEND 21651; GFX90A-NEXT: s_setpc_b64 s[30:31] 21652; 21653; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_8: 21654; GFX940: ; %bb.0: 21655; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21656; GFX940-NEXT: ;;#ASMSTART 21657; GFX940-NEXT: ; def s[0:3] 21658; GFX940-NEXT: ;;#ASMEND 21659; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 21660; GFX940-NEXT: ;;#ASMSTART 21661; GFX940-NEXT: ; use s8 21662; GFX940-NEXT: ;;#ASMEND 21663; GFX940-NEXT: s_setpc_b64 s[30:31] 21664 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21665 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21666 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 8> 21667 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21668 ret void 21669} 21670 21671define void @s_shuffle_v2i16_v8i16__13_8() { 21672; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_8: 21673; GFX900: ; %bb.0: 21674; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21675; GFX900-NEXT: ;;#ASMSTART 21676; GFX900-NEXT: ; def s[4:7] 21677; GFX900-NEXT: ;;#ASMEND 21678; GFX900-NEXT: s_lshr_b32 s5, s6, 16 21679; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 21680; GFX900-NEXT: ;;#ASMSTART 21681; GFX900-NEXT: ; use s8 21682; GFX900-NEXT: ;;#ASMEND 21683; GFX900-NEXT: s_setpc_b64 s[30:31] 21684; 21685; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_8: 21686; GFX90A: ; %bb.0: 21687; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21688; GFX90A-NEXT: ;;#ASMSTART 21689; GFX90A-NEXT: ; def s[4:7] 21690; GFX90A-NEXT: ;;#ASMEND 21691; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 21692; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 21693; GFX90A-NEXT: ;;#ASMSTART 21694; GFX90A-NEXT: ; use s8 21695; GFX90A-NEXT: ;;#ASMEND 21696; GFX90A-NEXT: s_setpc_b64 s[30:31] 21697; 21698; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_8: 21699; GFX940: ; %bb.0: 21700; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21701; GFX940-NEXT: ;;#ASMSTART 21702; GFX940-NEXT: ; def s[0:3] 21703; GFX940-NEXT: ;;#ASMEND 21704; GFX940-NEXT: s_lshr_b32 s1, s2, 16 21705; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 21706; GFX940-NEXT: ;;#ASMSTART 21707; GFX940-NEXT: ; use s8 21708; GFX940-NEXT: ;;#ASMEND 21709; GFX940-NEXT: s_setpc_b64 s[30:31] 21710 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21711 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21712 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 8> 21713 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21714 ret void 21715} 21716 21717define void @s_shuffle_v2i16_v8i16__14_8() { 21718; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_8: 21719; GFX900: ; %bb.0: 21720; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21721; GFX900-NEXT: ;;#ASMSTART 21722; GFX900-NEXT: ; def s[4:7] 21723; GFX900-NEXT: ;;#ASMEND 21724; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 21725; GFX900-NEXT: ;;#ASMSTART 21726; GFX900-NEXT: ; use s8 21727; GFX900-NEXT: ;;#ASMEND 21728; GFX900-NEXT: s_setpc_b64 s[30:31] 21729; 21730; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_8: 21731; GFX90A: ; %bb.0: 21732; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21733; GFX90A-NEXT: ;;#ASMSTART 21734; GFX90A-NEXT: ; def s[4:7] 21735; GFX90A-NEXT: ;;#ASMEND 21736; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 21737; GFX90A-NEXT: ;;#ASMSTART 21738; GFX90A-NEXT: ; use s8 21739; GFX90A-NEXT: ;;#ASMEND 21740; GFX90A-NEXT: s_setpc_b64 s[30:31] 21741; 21742; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_8: 21743; GFX940: ; %bb.0: 21744; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21745; GFX940-NEXT: ;;#ASMSTART 21746; GFX940-NEXT: ; def s[0:3] 21747; GFX940-NEXT: ;;#ASMEND 21748; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 21749; GFX940-NEXT: ;;#ASMSTART 21750; GFX940-NEXT: ; use s8 21751; GFX940-NEXT: ;;#ASMEND 21752; GFX940-NEXT: s_setpc_b64 s[30:31] 21753 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21754 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21755 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 8> 21756 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21757 ret void 21758} 21759 21760define void @s_shuffle_v2i16_v8i16__u_9() { 21761; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_9: 21762; GFX900: ; %bb.0: 21763; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21764; GFX900-NEXT: ;;#ASMSTART 21765; GFX900-NEXT: ; def s[8:11] 21766; GFX900-NEXT: ;;#ASMEND 21767; GFX900-NEXT: ;;#ASMSTART 21768; GFX900-NEXT: ; use s8 21769; GFX900-NEXT: ;;#ASMEND 21770; GFX900-NEXT: s_setpc_b64 s[30:31] 21771; 21772; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_9: 21773; GFX90A: ; %bb.0: 21774; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21775; GFX90A-NEXT: ;;#ASMSTART 21776; GFX90A-NEXT: ; def s[8:11] 21777; GFX90A-NEXT: ;;#ASMEND 21778; GFX90A-NEXT: ;;#ASMSTART 21779; GFX90A-NEXT: ; use s8 21780; GFX90A-NEXT: ;;#ASMEND 21781; GFX90A-NEXT: s_setpc_b64 s[30:31] 21782; 21783; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_9: 21784; GFX940: ; %bb.0: 21785; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21786; GFX940-NEXT: ;;#ASMSTART 21787; GFX940-NEXT: ; def s[8:11] 21788; GFX940-NEXT: ;;#ASMEND 21789; GFX940-NEXT: s_nop 0 21790; GFX940-NEXT: ;;#ASMSTART 21791; GFX940-NEXT: ; use s8 21792; GFX940-NEXT: ;;#ASMEND 21793; GFX940-NEXT: s_setpc_b64 s[30:31] 21794 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21795 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21796 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 9> 21797 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21798 ret void 21799} 21800 21801define void @s_shuffle_v2i16_v8i16__0_9() { 21802; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_9: 21803; GFX900: ; %bb.0: 21804; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21805; GFX900-NEXT: ;;#ASMSTART 21806; GFX900-NEXT: ; def s[8:11] 21807; GFX900-NEXT: ;;#ASMEND 21808; GFX900-NEXT: ;;#ASMSTART 21809; GFX900-NEXT: ; def s[4:7] 21810; GFX900-NEXT: ;;#ASMEND 21811; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s8 21812; GFX900-NEXT: ;;#ASMSTART 21813; GFX900-NEXT: ; use s8 21814; GFX900-NEXT: ;;#ASMEND 21815; GFX900-NEXT: s_setpc_b64 s[30:31] 21816; 21817; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_9: 21818; GFX90A: ; %bb.0: 21819; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21820; GFX90A-NEXT: ;;#ASMSTART 21821; GFX90A-NEXT: ; def s[8:11] 21822; GFX90A-NEXT: ;;#ASMEND 21823; GFX90A-NEXT: ;;#ASMSTART 21824; GFX90A-NEXT: ; def s[4:7] 21825; GFX90A-NEXT: ;;#ASMEND 21826; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s8 21827; GFX90A-NEXT: ;;#ASMSTART 21828; GFX90A-NEXT: ; use s8 21829; GFX90A-NEXT: ;;#ASMEND 21830; GFX90A-NEXT: s_setpc_b64 s[30:31] 21831; 21832; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_9: 21833; GFX940: ; %bb.0: 21834; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21835; GFX940-NEXT: ;;#ASMSTART 21836; GFX940-NEXT: ; def s[0:3] 21837; GFX940-NEXT: ;;#ASMEND 21838; GFX940-NEXT: ;;#ASMSTART 21839; GFX940-NEXT: ; def s[4:7] 21840; GFX940-NEXT: ;;#ASMEND 21841; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s4 21842; GFX940-NEXT: ;;#ASMSTART 21843; GFX940-NEXT: ; use s8 21844; GFX940-NEXT: ;;#ASMEND 21845; GFX940-NEXT: s_setpc_b64 s[30:31] 21846 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21847 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21848 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 9> 21849 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21850 ret void 21851} 21852 21853define void @s_shuffle_v2i16_v8i16__1_9() { 21854; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_9: 21855; GFX900: ; %bb.0: 21856; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21857; GFX900-NEXT: ;;#ASMSTART 21858; GFX900-NEXT: ; def s[8:11] 21859; GFX900-NEXT: ;;#ASMEND 21860; GFX900-NEXT: ;;#ASMSTART 21861; GFX900-NEXT: ; def s[4:7] 21862; GFX900-NEXT: ;;#ASMEND 21863; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s8 21864; GFX900-NEXT: ;;#ASMSTART 21865; GFX900-NEXT: ; use s8 21866; GFX900-NEXT: ;;#ASMEND 21867; GFX900-NEXT: s_setpc_b64 s[30:31] 21868; 21869; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_9: 21870; GFX90A: ; %bb.0: 21871; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21872; GFX90A-NEXT: ;;#ASMSTART 21873; GFX90A-NEXT: ; def s[8:11] 21874; GFX90A-NEXT: ;;#ASMEND 21875; GFX90A-NEXT: ;;#ASMSTART 21876; GFX90A-NEXT: ; def s[4:7] 21877; GFX90A-NEXT: ;;#ASMEND 21878; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s8 21879; GFX90A-NEXT: ;;#ASMSTART 21880; GFX90A-NEXT: ; use s8 21881; GFX90A-NEXT: ;;#ASMEND 21882; GFX90A-NEXT: s_setpc_b64 s[30:31] 21883; 21884; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_9: 21885; GFX940: ; %bb.0: 21886; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21887; GFX940-NEXT: ;;#ASMSTART 21888; GFX940-NEXT: ; def s[0:3] 21889; GFX940-NEXT: ;;#ASMEND 21890; GFX940-NEXT: ;;#ASMSTART 21891; GFX940-NEXT: ; def s[4:7] 21892; GFX940-NEXT: ;;#ASMEND 21893; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s4 21894; GFX940-NEXT: ;;#ASMSTART 21895; GFX940-NEXT: ; use s8 21896; GFX940-NEXT: ;;#ASMEND 21897; GFX940-NEXT: s_setpc_b64 s[30:31] 21898 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21899 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21900 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 9> 21901 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21902 ret void 21903} 21904 21905define void @s_shuffle_v2i16_v8i16__2_9() { 21906; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_9: 21907; GFX900: ; %bb.0: 21908; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21909; GFX900-NEXT: ;;#ASMSTART 21910; GFX900-NEXT: ; def s[8:11] 21911; GFX900-NEXT: ;;#ASMEND 21912; GFX900-NEXT: ;;#ASMSTART 21913; GFX900-NEXT: ; def s[4:7] 21914; GFX900-NEXT: ;;#ASMEND 21915; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s8 21916; GFX900-NEXT: ;;#ASMSTART 21917; GFX900-NEXT: ; use s8 21918; GFX900-NEXT: ;;#ASMEND 21919; GFX900-NEXT: s_setpc_b64 s[30:31] 21920; 21921; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_9: 21922; GFX90A: ; %bb.0: 21923; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21924; GFX90A-NEXT: ;;#ASMSTART 21925; GFX90A-NEXT: ; def s[8:11] 21926; GFX90A-NEXT: ;;#ASMEND 21927; GFX90A-NEXT: ;;#ASMSTART 21928; GFX90A-NEXT: ; def s[4:7] 21929; GFX90A-NEXT: ;;#ASMEND 21930; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s8 21931; GFX90A-NEXT: ;;#ASMSTART 21932; GFX90A-NEXT: ; use s8 21933; GFX90A-NEXT: ;;#ASMEND 21934; GFX90A-NEXT: s_setpc_b64 s[30:31] 21935; 21936; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_9: 21937; GFX940: ; %bb.0: 21938; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21939; GFX940-NEXT: ;;#ASMSTART 21940; GFX940-NEXT: ; def s[0:3] 21941; GFX940-NEXT: ;;#ASMEND 21942; GFX940-NEXT: ;;#ASMSTART 21943; GFX940-NEXT: ; def s[4:7] 21944; GFX940-NEXT: ;;#ASMEND 21945; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s4 21946; GFX940-NEXT: ;;#ASMSTART 21947; GFX940-NEXT: ; use s8 21948; GFX940-NEXT: ;;#ASMEND 21949; GFX940-NEXT: s_setpc_b64 s[30:31] 21950 %vec0 = call <8 x i16> asm "; def $0", "=s"() 21951 %vec1 = call <8 x i16> asm "; def $0", "=s"() 21952 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 9> 21953 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 21954 ret void 21955} 21956 21957define void @s_shuffle_v2i16_v8i16__3_9() { 21958; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_9: 21959; GFX900: ; %bb.0: 21960; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21961; GFX900-NEXT: ;;#ASMSTART 21962; GFX900-NEXT: ; def s[8:11] 21963; GFX900-NEXT: ;;#ASMEND 21964; GFX900-NEXT: ;;#ASMSTART 21965; GFX900-NEXT: ; def s[4:7] 21966; GFX900-NEXT: ;;#ASMEND 21967; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s8 21968; GFX900-NEXT: ;;#ASMSTART 21969; GFX900-NEXT: ; use s8 21970; GFX900-NEXT: ;;#ASMEND 21971; GFX900-NEXT: s_setpc_b64 s[30:31] 21972; 21973; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_9: 21974; GFX90A: ; %bb.0: 21975; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21976; GFX90A-NEXT: ;;#ASMSTART 21977; GFX90A-NEXT: ; def s[8:11] 21978; GFX90A-NEXT: ;;#ASMEND 21979; GFX90A-NEXT: ;;#ASMSTART 21980; GFX90A-NEXT: ; def s[4:7] 21981; GFX90A-NEXT: ;;#ASMEND 21982; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s8 21983; GFX90A-NEXT: ;;#ASMSTART 21984; GFX90A-NEXT: ; use s8 21985; GFX90A-NEXT: ;;#ASMEND 21986; GFX90A-NEXT: s_setpc_b64 s[30:31] 21987; 21988; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_9: 21989; GFX940: ; %bb.0: 21990; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21991; GFX940-NEXT: ;;#ASMSTART 21992; GFX940-NEXT: ; def s[0:3] 21993; GFX940-NEXT: ;;#ASMEND 21994; GFX940-NEXT: ;;#ASMSTART 21995; GFX940-NEXT: ; def s[4:7] 21996; GFX940-NEXT: ;;#ASMEND 21997; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s4 21998; GFX940-NEXT: ;;#ASMSTART 21999; GFX940-NEXT: ; use s8 22000; GFX940-NEXT: ;;#ASMEND 22001; GFX940-NEXT: s_setpc_b64 s[30:31] 22002 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22003 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22004 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 9> 22005 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22006 ret void 22007} 22008 22009define void @s_shuffle_v2i16_v8i16__4_9() { 22010; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_9: 22011; GFX900: ; %bb.0: 22012; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22013; GFX900-NEXT: ;;#ASMSTART 22014; GFX900-NEXT: ; def s[8:11] 22015; GFX900-NEXT: ;;#ASMEND 22016; GFX900-NEXT: ;;#ASMSTART 22017; GFX900-NEXT: ; def s[4:7] 22018; GFX900-NEXT: ;;#ASMEND 22019; GFX900-NEXT: s_pack_lh_b32_b16 s8, s6, s8 22020; GFX900-NEXT: ;;#ASMSTART 22021; GFX900-NEXT: ; use s8 22022; GFX900-NEXT: ;;#ASMEND 22023; GFX900-NEXT: s_setpc_b64 s[30:31] 22024; 22025; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_9: 22026; GFX90A: ; %bb.0: 22027; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22028; GFX90A-NEXT: ;;#ASMSTART 22029; GFX90A-NEXT: ; def s[8:11] 22030; GFX90A-NEXT: ;;#ASMEND 22031; GFX90A-NEXT: ;;#ASMSTART 22032; GFX90A-NEXT: ; def s[4:7] 22033; GFX90A-NEXT: ;;#ASMEND 22034; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s6, s8 22035; GFX90A-NEXT: ;;#ASMSTART 22036; GFX90A-NEXT: ; use s8 22037; GFX90A-NEXT: ;;#ASMEND 22038; GFX90A-NEXT: s_setpc_b64 s[30:31] 22039; 22040; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_9: 22041; GFX940: ; %bb.0: 22042; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22043; GFX940-NEXT: ;;#ASMSTART 22044; GFX940-NEXT: ; def s[0:3] 22045; GFX940-NEXT: ;;#ASMEND 22046; GFX940-NEXT: ;;#ASMSTART 22047; GFX940-NEXT: ; def s[4:7] 22048; GFX940-NEXT: ;;#ASMEND 22049; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s4 22050; GFX940-NEXT: ;;#ASMSTART 22051; GFX940-NEXT: ; use s8 22052; GFX940-NEXT: ;;#ASMEND 22053; GFX940-NEXT: s_setpc_b64 s[30:31] 22054 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22055 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22056 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 9> 22057 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22058 ret void 22059} 22060 22061define void @s_shuffle_v2i16_v8i16__5_9() { 22062; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_9: 22063; GFX900: ; %bb.0: 22064; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22065; GFX900-NEXT: ;;#ASMSTART 22066; GFX900-NEXT: ; def s[8:11] 22067; GFX900-NEXT: ;;#ASMEND 22068; GFX900-NEXT: ;;#ASMSTART 22069; GFX900-NEXT: ; def s[4:7] 22070; GFX900-NEXT: ;;#ASMEND 22071; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s8 22072; GFX900-NEXT: ;;#ASMSTART 22073; GFX900-NEXT: ; use s8 22074; GFX900-NEXT: ;;#ASMEND 22075; GFX900-NEXT: s_setpc_b64 s[30:31] 22076; 22077; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_9: 22078; GFX90A: ; %bb.0: 22079; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22080; GFX90A-NEXT: ;;#ASMSTART 22081; GFX90A-NEXT: ; def s[8:11] 22082; GFX90A-NEXT: ;;#ASMEND 22083; GFX90A-NEXT: ;;#ASMSTART 22084; GFX90A-NEXT: ; def s[4:7] 22085; GFX90A-NEXT: ;;#ASMEND 22086; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s8 22087; GFX90A-NEXT: ;;#ASMSTART 22088; GFX90A-NEXT: ; use s8 22089; GFX90A-NEXT: ;;#ASMEND 22090; GFX90A-NEXT: s_setpc_b64 s[30:31] 22091; 22092; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_9: 22093; GFX940: ; %bb.0: 22094; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22095; GFX940-NEXT: ;;#ASMSTART 22096; GFX940-NEXT: ; def s[0:3] 22097; GFX940-NEXT: ;;#ASMEND 22098; GFX940-NEXT: ;;#ASMSTART 22099; GFX940-NEXT: ; def s[4:7] 22100; GFX940-NEXT: ;;#ASMEND 22101; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s4 22102; GFX940-NEXT: ;;#ASMSTART 22103; GFX940-NEXT: ; use s8 22104; GFX940-NEXT: ;;#ASMEND 22105; GFX940-NEXT: s_setpc_b64 s[30:31] 22106 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22107 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22108 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 9> 22109 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22110 ret void 22111} 22112 22113define void @s_shuffle_v2i16_v8i16__6_9() { 22114; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_9: 22115; GFX900: ; %bb.0: 22116; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22117; GFX900-NEXT: ;;#ASMSTART 22118; GFX900-NEXT: ; def s[8:11] 22119; GFX900-NEXT: ;;#ASMEND 22120; GFX900-NEXT: ;;#ASMSTART 22121; GFX900-NEXT: ; def s[4:7] 22122; GFX900-NEXT: ;;#ASMEND 22123; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s8 22124; GFX900-NEXT: ;;#ASMSTART 22125; GFX900-NEXT: ; use s8 22126; GFX900-NEXT: ;;#ASMEND 22127; GFX900-NEXT: s_setpc_b64 s[30:31] 22128; 22129; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_9: 22130; GFX90A: ; %bb.0: 22131; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22132; GFX90A-NEXT: ;;#ASMSTART 22133; GFX90A-NEXT: ; def s[8:11] 22134; GFX90A-NEXT: ;;#ASMEND 22135; GFX90A-NEXT: ;;#ASMSTART 22136; GFX90A-NEXT: ; def s[4:7] 22137; GFX90A-NEXT: ;;#ASMEND 22138; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s8 22139; GFX90A-NEXT: ;;#ASMSTART 22140; GFX90A-NEXT: ; use s8 22141; GFX90A-NEXT: ;;#ASMEND 22142; GFX90A-NEXT: s_setpc_b64 s[30:31] 22143; 22144; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_9: 22145; GFX940: ; %bb.0: 22146; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22147; GFX940-NEXT: ;;#ASMSTART 22148; GFX940-NEXT: ; def s[0:3] 22149; GFX940-NEXT: ;;#ASMEND 22150; GFX940-NEXT: ;;#ASMSTART 22151; GFX940-NEXT: ; def s[4:7] 22152; GFX940-NEXT: ;;#ASMEND 22153; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s4 22154; GFX940-NEXT: ;;#ASMSTART 22155; GFX940-NEXT: ; use s8 22156; GFX940-NEXT: ;;#ASMEND 22157; GFX940-NEXT: s_setpc_b64 s[30:31] 22158 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22159 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22160 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 9> 22161 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22162 ret void 22163} 22164 22165define void @s_shuffle_v2i16_v8i16__7_9() { 22166; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_9: 22167; GFX900: ; %bb.0: 22168; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22169; GFX900-NEXT: ;;#ASMSTART 22170; GFX900-NEXT: ; def s[8:11] 22171; GFX900-NEXT: ;;#ASMEND 22172; GFX900-NEXT: ;;#ASMSTART 22173; GFX900-NEXT: ; def s[4:7] 22174; GFX900-NEXT: ;;#ASMEND 22175; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s8 22176; GFX900-NEXT: ;;#ASMSTART 22177; GFX900-NEXT: ; use s8 22178; GFX900-NEXT: ;;#ASMEND 22179; GFX900-NEXT: s_setpc_b64 s[30:31] 22180; 22181; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_9: 22182; GFX90A: ; %bb.0: 22183; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22184; GFX90A-NEXT: ;;#ASMSTART 22185; GFX90A-NEXT: ; def s[8:11] 22186; GFX90A-NEXT: ;;#ASMEND 22187; GFX90A-NEXT: ;;#ASMSTART 22188; GFX90A-NEXT: ; def s[4:7] 22189; GFX90A-NEXT: ;;#ASMEND 22190; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s8 22191; GFX90A-NEXT: ;;#ASMSTART 22192; GFX90A-NEXT: ; use s8 22193; GFX90A-NEXT: ;;#ASMEND 22194; GFX90A-NEXT: s_setpc_b64 s[30:31] 22195; 22196; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_9: 22197; GFX940: ; %bb.0: 22198; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22199; GFX940-NEXT: ;;#ASMSTART 22200; GFX940-NEXT: ; def s[0:3] 22201; GFX940-NEXT: ;;#ASMEND 22202; GFX940-NEXT: ;;#ASMSTART 22203; GFX940-NEXT: ; def s[4:7] 22204; GFX940-NEXT: ;;#ASMEND 22205; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s4 22206; GFX940-NEXT: ;;#ASMSTART 22207; GFX940-NEXT: ; use s8 22208; GFX940-NEXT: ;;#ASMEND 22209; GFX940-NEXT: s_setpc_b64 s[30:31] 22210 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22211 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22212 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 9> 22213 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22214 ret void 22215} 22216 22217define void @s_shuffle_v2i16_v8i16__8_9() { 22218; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_9: 22219; GFX900: ; %bb.0: 22220; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22221; GFX900-NEXT: ;;#ASMSTART 22222; GFX900-NEXT: ; def s[8:11] 22223; GFX900-NEXT: ;;#ASMEND 22224; GFX900-NEXT: ;;#ASMSTART 22225; GFX900-NEXT: ; use s8 22226; GFX900-NEXT: ;;#ASMEND 22227; GFX900-NEXT: s_setpc_b64 s[30:31] 22228; 22229; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_9: 22230; GFX90A: ; %bb.0: 22231; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22232; GFX90A-NEXT: ;;#ASMSTART 22233; GFX90A-NEXT: ; def s[8:11] 22234; GFX90A-NEXT: ;;#ASMEND 22235; GFX90A-NEXT: ;;#ASMSTART 22236; GFX90A-NEXT: ; use s8 22237; GFX90A-NEXT: ;;#ASMEND 22238; GFX90A-NEXT: s_setpc_b64 s[30:31] 22239; 22240; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_9: 22241; GFX940: ; %bb.0: 22242; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22243; GFX940-NEXT: ;;#ASMSTART 22244; GFX940-NEXT: ; def s[8:11] 22245; GFX940-NEXT: ;;#ASMEND 22246; GFX940-NEXT: s_nop 0 22247; GFX940-NEXT: ;;#ASMSTART 22248; GFX940-NEXT: ; use s8 22249; GFX940-NEXT: ;;#ASMEND 22250; GFX940-NEXT: s_setpc_b64 s[30:31] 22251 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22252 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22253 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 9> 22254 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22255 ret void 22256} 22257 22258define void @s_shuffle_v2i16_v8i16__9_9() { 22259; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_9: 22260; GFX900: ; %bb.0: 22261; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22262; GFX900-NEXT: ;;#ASMSTART 22263; GFX900-NEXT: ; def s[4:7] 22264; GFX900-NEXT: ;;#ASMEND 22265; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s4 22266; GFX900-NEXT: ;;#ASMSTART 22267; GFX900-NEXT: ; use s8 22268; GFX900-NEXT: ;;#ASMEND 22269; GFX900-NEXT: s_setpc_b64 s[30:31] 22270; 22271; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_9: 22272; GFX90A: ; %bb.0: 22273; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22274; GFX90A-NEXT: ;;#ASMSTART 22275; GFX90A-NEXT: ; def s[4:7] 22276; GFX90A-NEXT: ;;#ASMEND 22277; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s4 22278; GFX90A-NEXT: ;;#ASMSTART 22279; GFX90A-NEXT: ; use s8 22280; GFX90A-NEXT: ;;#ASMEND 22281; GFX90A-NEXT: s_setpc_b64 s[30:31] 22282; 22283; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_9: 22284; GFX940: ; %bb.0: 22285; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22286; GFX940-NEXT: ;;#ASMSTART 22287; GFX940-NEXT: ; def s[0:3] 22288; GFX940-NEXT: ;;#ASMEND 22289; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s0 22290; GFX940-NEXT: ;;#ASMSTART 22291; GFX940-NEXT: ; use s8 22292; GFX940-NEXT: ;;#ASMEND 22293; GFX940-NEXT: s_setpc_b64 s[30:31] 22294 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22295 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22296 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 9> 22297 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22298 ret void 22299} 22300 22301define void @s_shuffle_v2i16_v8i16__10_9() { 22302; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_9: 22303; GFX900: ; %bb.0: 22304; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22305; GFX900-NEXT: ;;#ASMSTART 22306; GFX900-NEXT: ; def s[4:7] 22307; GFX900-NEXT: ;;#ASMEND 22308; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s4 22309; GFX900-NEXT: ;;#ASMSTART 22310; GFX900-NEXT: ; use s8 22311; GFX900-NEXT: ;;#ASMEND 22312; GFX900-NEXT: s_setpc_b64 s[30:31] 22313; 22314; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_9: 22315; GFX90A: ; %bb.0: 22316; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22317; GFX90A-NEXT: ;;#ASMSTART 22318; GFX90A-NEXT: ; def s[4:7] 22319; GFX90A-NEXT: ;;#ASMEND 22320; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s4 22321; GFX90A-NEXT: ;;#ASMSTART 22322; GFX90A-NEXT: ; use s8 22323; GFX90A-NEXT: ;;#ASMEND 22324; GFX90A-NEXT: s_setpc_b64 s[30:31] 22325; 22326; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_9: 22327; GFX940: ; %bb.0: 22328; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22329; GFX940-NEXT: ;;#ASMSTART 22330; GFX940-NEXT: ; def s[0:3] 22331; GFX940-NEXT: ;;#ASMEND 22332; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s0 22333; GFX940-NEXT: ;;#ASMSTART 22334; GFX940-NEXT: ; use s8 22335; GFX940-NEXT: ;;#ASMEND 22336; GFX940-NEXT: s_setpc_b64 s[30:31] 22337 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22338 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22339 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 9> 22340 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22341 ret void 22342} 22343 22344define void @s_shuffle_v2i16_v8i16__11_9() { 22345; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_9: 22346; GFX900: ; %bb.0: 22347; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22348; GFX900-NEXT: ;;#ASMSTART 22349; GFX900-NEXT: ; def s[4:7] 22350; GFX900-NEXT: ;;#ASMEND 22351; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s4 22352; GFX900-NEXT: ;;#ASMSTART 22353; GFX900-NEXT: ; use s8 22354; GFX900-NEXT: ;;#ASMEND 22355; GFX900-NEXT: s_setpc_b64 s[30:31] 22356; 22357; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_9: 22358; GFX90A: ; %bb.0: 22359; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22360; GFX90A-NEXT: ;;#ASMSTART 22361; GFX90A-NEXT: ; def s[4:7] 22362; GFX90A-NEXT: ;;#ASMEND 22363; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s4 22364; GFX90A-NEXT: ;;#ASMSTART 22365; GFX90A-NEXT: ; use s8 22366; GFX90A-NEXT: ;;#ASMEND 22367; GFX90A-NEXT: s_setpc_b64 s[30:31] 22368; 22369; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_9: 22370; GFX940: ; %bb.0: 22371; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22372; GFX940-NEXT: ;;#ASMSTART 22373; GFX940-NEXT: ; def s[0:3] 22374; GFX940-NEXT: ;;#ASMEND 22375; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s0 22376; GFX940-NEXT: ;;#ASMSTART 22377; GFX940-NEXT: ; use s8 22378; GFX940-NEXT: ;;#ASMEND 22379; GFX940-NEXT: s_setpc_b64 s[30:31] 22380 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22381 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22382 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 9> 22383 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22384 ret void 22385} 22386 22387define void @s_shuffle_v2i16_v8i16__12_9() { 22388; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_9: 22389; GFX900: ; %bb.0: 22390; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22391; GFX900-NEXT: ;;#ASMSTART 22392; GFX900-NEXT: ; def s[4:7] 22393; GFX900-NEXT: ;;#ASMEND 22394; GFX900-NEXT: s_pack_lh_b32_b16 s8, s6, s4 22395; GFX900-NEXT: ;;#ASMSTART 22396; GFX900-NEXT: ; use s8 22397; GFX900-NEXT: ;;#ASMEND 22398; GFX900-NEXT: s_setpc_b64 s[30:31] 22399; 22400; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_9: 22401; GFX90A: ; %bb.0: 22402; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22403; GFX90A-NEXT: ;;#ASMSTART 22404; GFX90A-NEXT: ; def s[4:7] 22405; GFX90A-NEXT: ;;#ASMEND 22406; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s6, s4 22407; GFX90A-NEXT: ;;#ASMSTART 22408; GFX90A-NEXT: ; use s8 22409; GFX90A-NEXT: ;;#ASMEND 22410; GFX90A-NEXT: s_setpc_b64 s[30:31] 22411; 22412; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_9: 22413; GFX940: ; %bb.0: 22414; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22415; GFX940-NEXT: ;;#ASMSTART 22416; GFX940-NEXT: ; def s[0:3] 22417; GFX940-NEXT: ;;#ASMEND 22418; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s0 22419; GFX940-NEXT: ;;#ASMSTART 22420; GFX940-NEXT: ; use s8 22421; GFX940-NEXT: ;;#ASMEND 22422; GFX940-NEXT: s_setpc_b64 s[30:31] 22423 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22424 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22425 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 9> 22426 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22427 ret void 22428} 22429 22430define void @s_shuffle_v2i16_v8i16__13_9() { 22431; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_9: 22432; GFX900: ; %bb.0: 22433; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22434; GFX900-NEXT: ;;#ASMSTART 22435; GFX900-NEXT: ; def s[4:7] 22436; GFX900-NEXT: ;;#ASMEND 22437; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s4 22438; GFX900-NEXT: ;;#ASMSTART 22439; GFX900-NEXT: ; use s8 22440; GFX900-NEXT: ;;#ASMEND 22441; GFX900-NEXT: s_setpc_b64 s[30:31] 22442; 22443; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_9: 22444; GFX90A: ; %bb.0: 22445; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22446; GFX90A-NEXT: ;;#ASMSTART 22447; GFX90A-NEXT: ; def s[4:7] 22448; GFX90A-NEXT: ;;#ASMEND 22449; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s4 22450; GFX90A-NEXT: ;;#ASMSTART 22451; GFX90A-NEXT: ; use s8 22452; GFX90A-NEXT: ;;#ASMEND 22453; GFX90A-NEXT: s_setpc_b64 s[30:31] 22454; 22455; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_9: 22456; GFX940: ; %bb.0: 22457; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22458; GFX940-NEXT: ;;#ASMSTART 22459; GFX940-NEXT: ; def s[0:3] 22460; GFX940-NEXT: ;;#ASMEND 22461; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s0 22462; GFX940-NEXT: ;;#ASMSTART 22463; GFX940-NEXT: ; use s8 22464; GFX940-NEXT: ;;#ASMEND 22465; GFX940-NEXT: s_setpc_b64 s[30:31] 22466 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22467 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22468 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 9> 22469 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22470 ret void 22471} 22472 22473define void @s_shuffle_v2i16_v8i16__14_9() { 22474; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_9: 22475; GFX900: ; %bb.0: 22476; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22477; GFX900-NEXT: ;;#ASMSTART 22478; GFX900-NEXT: ; def s[4:7] 22479; GFX900-NEXT: ;;#ASMEND 22480; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s4 22481; GFX900-NEXT: ;;#ASMSTART 22482; GFX900-NEXT: ; use s8 22483; GFX900-NEXT: ;;#ASMEND 22484; GFX900-NEXT: s_setpc_b64 s[30:31] 22485; 22486; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_9: 22487; GFX90A: ; %bb.0: 22488; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22489; GFX90A-NEXT: ;;#ASMSTART 22490; GFX90A-NEXT: ; def s[4:7] 22491; GFX90A-NEXT: ;;#ASMEND 22492; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s4 22493; GFX90A-NEXT: ;;#ASMSTART 22494; GFX90A-NEXT: ; use s8 22495; GFX90A-NEXT: ;;#ASMEND 22496; GFX90A-NEXT: s_setpc_b64 s[30:31] 22497; 22498; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_9: 22499; GFX940: ; %bb.0: 22500; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22501; GFX940-NEXT: ;;#ASMSTART 22502; GFX940-NEXT: ; def s[0:3] 22503; GFX940-NEXT: ;;#ASMEND 22504; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s0 22505; GFX940-NEXT: ;;#ASMSTART 22506; GFX940-NEXT: ; use s8 22507; GFX940-NEXT: ;;#ASMEND 22508; GFX940-NEXT: s_setpc_b64 s[30:31] 22509 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22510 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22511 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 9> 22512 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22513 ret void 22514} 22515 22516define void @s_shuffle_v2i16_v8i16__u_10() { 22517; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_10: 22518; GFX900: ; %bb.0: 22519; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22520; GFX900-NEXT: ;;#ASMSTART 22521; GFX900-NEXT: ; def s[4:7] 22522; GFX900-NEXT: ;;#ASMEND 22523; GFX900-NEXT: s_lshl_b32 s8, s5, 16 22524; GFX900-NEXT: ;;#ASMSTART 22525; GFX900-NEXT: ; use s8 22526; GFX900-NEXT: ;;#ASMEND 22527; GFX900-NEXT: s_setpc_b64 s[30:31] 22528; 22529; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_10: 22530; GFX90A: ; %bb.0: 22531; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22532; GFX90A-NEXT: ;;#ASMSTART 22533; GFX90A-NEXT: ; def s[4:7] 22534; GFX90A-NEXT: ;;#ASMEND 22535; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 22536; GFX90A-NEXT: ;;#ASMSTART 22537; GFX90A-NEXT: ; use s8 22538; GFX90A-NEXT: ;;#ASMEND 22539; GFX90A-NEXT: s_setpc_b64 s[30:31] 22540; 22541; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_10: 22542; GFX940: ; %bb.0: 22543; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22544; GFX940-NEXT: ;;#ASMSTART 22545; GFX940-NEXT: ; def s[0:3] 22546; GFX940-NEXT: ;;#ASMEND 22547; GFX940-NEXT: s_lshl_b32 s8, s1, 16 22548; GFX940-NEXT: ;;#ASMSTART 22549; GFX940-NEXT: ; use s8 22550; GFX940-NEXT: ;;#ASMEND 22551; GFX940-NEXT: s_setpc_b64 s[30:31] 22552 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22553 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22554 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 10> 22555 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22556 ret void 22557} 22558 22559define void @s_shuffle_v2i16_v8i16__0_10() { 22560; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_10: 22561; GFX900: ; %bb.0: 22562; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22563; GFX900-NEXT: ;;#ASMSTART 22564; GFX900-NEXT: ; def s[8:11] 22565; GFX900-NEXT: ;;#ASMEND 22566; GFX900-NEXT: ;;#ASMSTART 22567; GFX900-NEXT: ; def s[4:7] 22568; GFX900-NEXT: ;;#ASMEND 22569; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 22570; GFX900-NEXT: ;;#ASMSTART 22571; GFX900-NEXT: ; use s8 22572; GFX900-NEXT: ;;#ASMEND 22573; GFX900-NEXT: s_setpc_b64 s[30:31] 22574; 22575; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_10: 22576; GFX90A: ; %bb.0: 22577; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22578; GFX90A-NEXT: ;;#ASMSTART 22579; GFX90A-NEXT: ; def s[8:11] 22580; GFX90A-NEXT: ;;#ASMEND 22581; GFX90A-NEXT: ;;#ASMSTART 22582; GFX90A-NEXT: ; def s[4:7] 22583; GFX90A-NEXT: ;;#ASMEND 22584; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 22585; GFX90A-NEXT: ;;#ASMSTART 22586; GFX90A-NEXT: ; use s8 22587; GFX90A-NEXT: ;;#ASMEND 22588; GFX90A-NEXT: s_setpc_b64 s[30:31] 22589; 22590; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_10: 22591; GFX940: ; %bb.0: 22592; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22593; GFX940-NEXT: ;;#ASMSTART 22594; GFX940-NEXT: ; def s[0:3] 22595; GFX940-NEXT: ;;#ASMEND 22596; GFX940-NEXT: ;;#ASMSTART 22597; GFX940-NEXT: ; def s[4:7] 22598; GFX940-NEXT: ;;#ASMEND 22599; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 22600; GFX940-NEXT: ;;#ASMSTART 22601; GFX940-NEXT: ; use s8 22602; GFX940-NEXT: ;;#ASMEND 22603; GFX940-NEXT: s_setpc_b64 s[30:31] 22604 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22605 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22606 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 10> 22607 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22608 ret void 22609} 22610 22611define void @s_shuffle_v2i16_v8i16__1_10() { 22612; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_10: 22613; GFX900: ; %bb.0: 22614; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22615; GFX900-NEXT: ;;#ASMSTART 22616; GFX900-NEXT: ; def s[4:7] 22617; GFX900-NEXT: ;;#ASMEND 22618; GFX900-NEXT: ;;#ASMSTART 22619; GFX900-NEXT: ; def s[8:11] 22620; GFX900-NEXT: ;;#ASMEND 22621; GFX900-NEXT: s_lshr_b32 s4, s4, 16 22622; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 22623; GFX900-NEXT: ;;#ASMSTART 22624; GFX900-NEXT: ; use s8 22625; GFX900-NEXT: ;;#ASMEND 22626; GFX900-NEXT: s_setpc_b64 s[30:31] 22627; 22628; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_10: 22629; GFX90A: ; %bb.0: 22630; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22631; GFX90A-NEXT: ;;#ASMSTART 22632; GFX90A-NEXT: ; def s[4:7] 22633; GFX90A-NEXT: ;;#ASMEND 22634; GFX90A-NEXT: ;;#ASMSTART 22635; GFX90A-NEXT: ; def s[8:11] 22636; GFX90A-NEXT: ;;#ASMEND 22637; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 22638; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 22639; GFX90A-NEXT: ;;#ASMSTART 22640; GFX90A-NEXT: ; use s8 22641; GFX90A-NEXT: ;;#ASMEND 22642; GFX90A-NEXT: s_setpc_b64 s[30:31] 22643; 22644; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_10: 22645; GFX940: ; %bb.0: 22646; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22647; GFX940-NEXT: ;;#ASMSTART 22648; GFX940-NEXT: ; def s[0:3] 22649; GFX940-NEXT: ;;#ASMEND 22650; GFX940-NEXT: s_lshr_b32 s0, s0, 16 22651; GFX940-NEXT: ;;#ASMSTART 22652; GFX940-NEXT: ; def s[4:7] 22653; GFX940-NEXT: ;;#ASMEND 22654; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 22655; GFX940-NEXT: ;;#ASMSTART 22656; GFX940-NEXT: ; use s8 22657; GFX940-NEXT: ;;#ASMEND 22658; GFX940-NEXT: s_setpc_b64 s[30:31] 22659 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22660 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22661 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 10> 22662 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22663 ret void 22664} 22665 22666define void @s_shuffle_v2i16_v8i16__2_10() { 22667; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_10: 22668; GFX900: ; %bb.0: 22669; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22670; GFX900-NEXT: ;;#ASMSTART 22671; GFX900-NEXT: ; def s[8:11] 22672; GFX900-NEXT: ;;#ASMEND 22673; GFX900-NEXT: ;;#ASMSTART 22674; GFX900-NEXT: ; def s[4:7] 22675; GFX900-NEXT: ;;#ASMEND 22676; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9 22677; GFX900-NEXT: ;;#ASMSTART 22678; GFX900-NEXT: ; use s8 22679; GFX900-NEXT: ;;#ASMEND 22680; GFX900-NEXT: s_setpc_b64 s[30:31] 22681; 22682; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_10: 22683; GFX90A: ; %bb.0: 22684; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22685; GFX90A-NEXT: ;;#ASMSTART 22686; GFX90A-NEXT: ; def s[8:11] 22687; GFX90A-NEXT: ;;#ASMEND 22688; GFX90A-NEXT: ;;#ASMSTART 22689; GFX90A-NEXT: ; def s[4:7] 22690; GFX90A-NEXT: ;;#ASMEND 22691; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9 22692; GFX90A-NEXT: ;;#ASMSTART 22693; GFX90A-NEXT: ; use s8 22694; GFX90A-NEXT: ;;#ASMEND 22695; GFX90A-NEXT: s_setpc_b64 s[30:31] 22696; 22697; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_10: 22698; GFX940: ; %bb.0: 22699; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22700; GFX940-NEXT: ;;#ASMSTART 22701; GFX940-NEXT: ; def s[0:3] 22702; GFX940-NEXT: ;;#ASMEND 22703; GFX940-NEXT: ;;#ASMSTART 22704; GFX940-NEXT: ; def s[4:7] 22705; GFX940-NEXT: ;;#ASMEND 22706; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s5 22707; GFX940-NEXT: ;;#ASMSTART 22708; GFX940-NEXT: ; use s8 22709; GFX940-NEXT: ;;#ASMEND 22710; GFX940-NEXT: s_setpc_b64 s[30:31] 22711 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22712 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22713 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 10> 22714 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22715 ret void 22716} 22717 22718define void @s_shuffle_v2i16_v8i16__3_10() { 22719; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_10: 22720; GFX900: ; %bb.0: 22721; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22722; GFX900-NEXT: ;;#ASMSTART 22723; GFX900-NEXT: ; def s[4:7] 22724; GFX900-NEXT: ;;#ASMEND 22725; GFX900-NEXT: ;;#ASMSTART 22726; GFX900-NEXT: ; def s[8:11] 22727; GFX900-NEXT: ;;#ASMEND 22728; GFX900-NEXT: s_lshr_b32 s4, s5, 16 22729; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 22730; GFX900-NEXT: ;;#ASMSTART 22731; GFX900-NEXT: ; use s8 22732; GFX900-NEXT: ;;#ASMEND 22733; GFX900-NEXT: s_setpc_b64 s[30:31] 22734; 22735; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_10: 22736; GFX90A: ; %bb.0: 22737; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22738; GFX90A-NEXT: ;;#ASMSTART 22739; GFX90A-NEXT: ; def s[4:7] 22740; GFX90A-NEXT: ;;#ASMEND 22741; GFX90A-NEXT: ;;#ASMSTART 22742; GFX90A-NEXT: ; def s[8:11] 22743; GFX90A-NEXT: ;;#ASMEND 22744; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 22745; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 22746; GFX90A-NEXT: ;;#ASMSTART 22747; GFX90A-NEXT: ; use s8 22748; GFX90A-NEXT: ;;#ASMEND 22749; GFX90A-NEXT: s_setpc_b64 s[30:31] 22750; 22751; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_10: 22752; GFX940: ; %bb.0: 22753; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22754; GFX940-NEXT: ;;#ASMSTART 22755; GFX940-NEXT: ; def s[0:3] 22756; GFX940-NEXT: ;;#ASMEND 22757; GFX940-NEXT: s_lshr_b32 s0, s1, 16 22758; GFX940-NEXT: ;;#ASMSTART 22759; GFX940-NEXT: ; def s[4:7] 22760; GFX940-NEXT: ;;#ASMEND 22761; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 22762; GFX940-NEXT: ;;#ASMSTART 22763; GFX940-NEXT: ; use s8 22764; GFX940-NEXT: ;;#ASMEND 22765; GFX940-NEXT: s_setpc_b64 s[30:31] 22766 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22767 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22768 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 10> 22769 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22770 ret void 22771} 22772 22773define void @s_shuffle_v2i16_v8i16__4_10() { 22774; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_10: 22775; GFX900: ; %bb.0: 22776; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22777; GFX900-NEXT: ;;#ASMSTART 22778; GFX900-NEXT: ; def s[8:11] 22779; GFX900-NEXT: ;;#ASMEND 22780; GFX900-NEXT: ;;#ASMSTART 22781; GFX900-NEXT: ; def s[4:7] 22782; GFX900-NEXT: ;;#ASMEND 22783; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s9 22784; GFX900-NEXT: ;;#ASMSTART 22785; GFX900-NEXT: ; use s8 22786; GFX900-NEXT: ;;#ASMEND 22787; GFX900-NEXT: s_setpc_b64 s[30:31] 22788; 22789; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_10: 22790; GFX90A: ; %bb.0: 22791; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22792; GFX90A-NEXT: ;;#ASMSTART 22793; GFX90A-NEXT: ; def s[8:11] 22794; GFX90A-NEXT: ;;#ASMEND 22795; GFX90A-NEXT: ;;#ASMSTART 22796; GFX90A-NEXT: ; def s[4:7] 22797; GFX90A-NEXT: ;;#ASMEND 22798; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s9 22799; GFX90A-NEXT: ;;#ASMSTART 22800; GFX90A-NEXT: ; use s8 22801; GFX90A-NEXT: ;;#ASMEND 22802; GFX90A-NEXT: s_setpc_b64 s[30:31] 22803; 22804; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_10: 22805; GFX940: ; %bb.0: 22806; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22807; GFX940-NEXT: ;;#ASMSTART 22808; GFX940-NEXT: ; def s[0:3] 22809; GFX940-NEXT: ;;#ASMEND 22810; GFX940-NEXT: ;;#ASMSTART 22811; GFX940-NEXT: ; def s[4:7] 22812; GFX940-NEXT: ;;#ASMEND 22813; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s5 22814; GFX940-NEXT: ;;#ASMSTART 22815; GFX940-NEXT: ; use s8 22816; GFX940-NEXT: ;;#ASMEND 22817; GFX940-NEXT: s_setpc_b64 s[30:31] 22818 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22819 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22820 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 10> 22821 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22822 ret void 22823} 22824 22825define void @s_shuffle_v2i16_v8i16__5_10() { 22826; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_10: 22827; GFX900: ; %bb.0: 22828; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22829; GFX900-NEXT: ;;#ASMSTART 22830; GFX900-NEXT: ; def s[4:7] 22831; GFX900-NEXT: ;;#ASMEND 22832; GFX900-NEXT: ;;#ASMSTART 22833; GFX900-NEXT: ; def s[8:11] 22834; GFX900-NEXT: ;;#ASMEND 22835; GFX900-NEXT: s_lshr_b32 s4, s6, 16 22836; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 22837; GFX900-NEXT: ;;#ASMSTART 22838; GFX900-NEXT: ; use s8 22839; GFX900-NEXT: ;;#ASMEND 22840; GFX900-NEXT: s_setpc_b64 s[30:31] 22841; 22842; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_10: 22843; GFX90A: ; %bb.0: 22844; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22845; GFX90A-NEXT: ;;#ASMSTART 22846; GFX90A-NEXT: ; def s[4:7] 22847; GFX90A-NEXT: ;;#ASMEND 22848; GFX90A-NEXT: ;;#ASMSTART 22849; GFX90A-NEXT: ; def s[8:11] 22850; GFX90A-NEXT: ;;#ASMEND 22851; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 22852; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 22853; GFX90A-NEXT: ;;#ASMSTART 22854; GFX90A-NEXT: ; use s8 22855; GFX90A-NEXT: ;;#ASMEND 22856; GFX90A-NEXT: s_setpc_b64 s[30:31] 22857; 22858; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_10: 22859; GFX940: ; %bb.0: 22860; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22861; GFX940-NEXT: ;;#ASMSTART 22862; GFX940-NEXT: ; def s[0:3] 22863; GFX940-NEXT: ;;#ASMEND 22864; GFX940-NEXT: s_lshr_b32 s0, s2, 16 22865; GFX940-NEXT: ;;#ASMSTART 22866; GFX940-NEXT: ; def s[4:7] 22867; GFX940-NEXT: ;;#ASMEND 22868; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 22869; GFX940-NEXT: ;;#ASMSTART 22870; GFX940-NEXT: ; use s8 22871; GFX940-NEXT: ;;#ASMEND 22872; GFX940-NEXT: s_setpc_b64 s[30:31] 22873 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22874 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22875 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 10> 22876 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22877 ret void 22878} 22879 22880define void @s_shuffle_v2i16_v8i16__6_10() { 22881; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_10: 22882; GFX900: ; %bb.0: 22883; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22884; GFX900-NEXT: ;;#ASMSTART 22885; GFX900-NEXT: ; def s[8:11] 22886; GFX900-NEXT: ;;#ASMEND 22887; GFX900-NEXT: ;;#ASMSTART 22888; GFX900-NEXT: ; def s[4:7] 22889; GFX900-NEXT: ;;#ASMEND 22890; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s9 22891; GFX900-NEXT: ;;#ASMSTART 22892; GFX900-NEXT: ; use s8 22893; GFX900-NEXT: ;;#ASMEND 22894; GFX900-NEXT: s_setpc_b64 s[30:31] 22895; 22896; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_10: 22897; GFX90A: ; %bb.0: 22898; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22899; GFX90A-NEXT: ;;#ASMSTART 22900; GFX90A-NEXT: ; def s[8:11] 22901; GFX90A-NEXT: ;;#ASMEND 22902; GFX90A-NEXT: ;;#ASMSTART 22903; GFX90A-NEXT: ; def s[4:7] 22904; GFX90A-NEXT: ;;#ASMEND 22905; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s9 22906; GFX90A-NEXT: ;;#ASMSTART 22907; GFX90A-NEXT: ; use s8 22908; GFX90A-NEXT: ;;#ASMEND 22909; GFX90A-NEXT: s_setpc_b64 s[30:31] 22910; 22911; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_10: 22912; GFX940: ; %bb.0: 22913; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22914; GFX940-NEXT: ;;#ASMSTART 22915; GFX940-NEXT: ; def s[0:3] 22916; GFX940-NEXT: ;;#ASMEND 22917; GFX940-NEXT: ;;#ASMSTART 22918; GFX940-NEXT: ; def s[4:7] 22919; GFX940-NEXT: ;;#ASMEND 22920; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s5 22921; GFX940-NEXT: ;;#ASMSTART 22922; GFX940-NEXT: ; use s8 22923; GFX940-NEXT: ;;#ASMEND 22924; GFX940-NEXT: s_setpc_b64 s[30:31] 22925 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22926 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22927 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 10> 22928 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22929 ret void 22930} 22931 22932define void @s_shuffle_v2i16_v8i16__7_10() { 22933; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_10: 22934; GFX900: ; %bb.0: 22935; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22936; GFX900-NEXT: ;;#ASMSTART 22937; GFX900-NEXT: ; def s[4:7] 22938; GFX900-NEXT: ;;#ASMEND 22939; GFX900-NEXT: ;;#ASMSTART 22940; GFX900-NEXT: ; def s[8:11] 22941; GFX900-NEXT: ;;#ASMEND 22942; GFX900-NEXT: s_lshr_b32 s4, s7, 16 22943; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 22944; GFX900-NEXT: ;;#ASMSTART 22945; GFX900-NEXT: ; use s8 22946; GFX900-NEXT: ;;#ASMEND 22947; GFX900-NEXT: s_setpc_b64 s[30:31] 22948; 22949; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_10: 22950; GFX90A: ; %bb.0: 22951; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22952; GFX90A-NEXT: ;;#ASMSTART 22953; GFX90A-NEXT: ; def s[4:7] 22954; GFX90A-NEXT: ;;#ASMEND 22955; GFX90A-NEXT: ;;#ASMSTART 22956; GFX90A-NEXT: ; def s[8:11] 22957; GFX90A-NEXT: ;;#ASMEND 22958; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 22959; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 22960; GFX90A-NEXT: ;;#ASMSTART 22961; GFX90A-NEXT: ; use s8 22962; GFX90A-NEXT: ;;#ASMEND 22963; GFX90A-NEXT: s_setpc_b64 s[30:31] 22964; 22965; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_10: 22966; GFX940: ; %bb.0: 22967; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22968; GFX940-NEXT: ;;#ASMSTART 22969; GFX940-NEXT: ; def s[0:3] 22970; GFX940-NEXT: ;;#ASMEND 22971; GFX940-NEXT: s_lshr_b32 s0, s3, 16 22972; GFX940-NEXT: ;;#ASMSTART 22973; GFX940-NEXT: ; def s[4:7] 22974; GFX940-NEXT: ;;#ASMEND 22975; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s5 22976; GFX940-NEXT: ;;#ASMSTART 22977; GFX940-NEXT: ; use s8 22978; GFX940-NEXT: ;;#ASMEND 22979; GFX940-NEXT: s_setpc_b64 s[30:31] 22980 %vec0 = call <8 x i16> asm "; def $0", "=s"() 22981 %vec1 = call <8 x i16> asm "; def $0", "=s"() 22982 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 10> 22983 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 22984 ret void 22985} 22986 22987define void @s_shuffle_v2i16_v8i16__8_10() { 22988; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_10: 22989; GFX900: ; %bb.0: 22990; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22991; GFX900-NEXT: ;;#ASMSTART 22992; GFX900-NEXT: ; def s[4:7] 22993; GFX900-NEXT: ;;#ASMEND 22994; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 22995; GFX900-NEXT: ;;#ASMSTART 22996; GFX900-NEXT: ; use s8 22997; GFX900-NEXT: ;;#ASMEND 22998; GFX900-NEXT: s_setpc_b64 s[30:31] 22999; 23000; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_10: 23001; GFX90A: ; %bb.0: 23002; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23003; GFX90A-NEXT: ;;#ASMSTART 23004; GFX90A-NEXT: ; def s[4:7] 23005; GFX90A-NEXT: ;;#ASMEND 23006; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 23007; GFX90A-NEXT: ;;#ASMSTART 23008; GFX90A-NEXT: ; use s8 23009; GFX90A-NEXT: ;;#ASMEND 23010; GFX90A-NEXT: s_setpc_b64 s[30:31] 23011; 23012; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_10: 23013; GFX940: ; %bb.0: 23014; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23015; GFX940-NEXT: ;;#ASMSTART 23016; GFX940-NEXT: ; def s[0:3] 23017; GFX940-NEXT: ;;#ASMEND 23018; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 23019; GFX940-NEXT: ;;#ASMSTART 23020; GFX940-NEXT: ; use s8 23021; GFX940-NEXT: ;;#ASMEND 23022; GFX940-NEXT: s_setpc_b64 s[30:31] 23023 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23024 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23025 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 10> 23026 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23027 ret void 23028} 23029 23030define void @s_shuffle_v2i16_v8i16__9_10() { 23031; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_10: 23032; GFX900: ; %bb.0: 23033; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23034; GFX900-NEXT: ;;#ASMSTART 23035; GFX900-NEXT: ; def s[4:7] 23036; GFX900-NEXT: ;;#ASMEND 23037; GFX900-NEXT: s_lshr_b32 s4, s4, 16 23038; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 23039; GFX900-NEXT: ;;#ASMSTART 23040; GFX900-NEXT: ; use s8 23041; GFX900-NEXT: ;;#ASMEND 23042; GFX900-NEXT: s_setpc_b64 s[30:31] 23043; 23044; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_10: 23045; GFX90A: ; %bb.0: 23046; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23047; GFX90A-NEXT: ;;#ASMSTART 23048; GFX90A-NEXT: ; def s[4:7] 23049; GFX90A-NEXT: ;;#ASMEND 23050; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 23051; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 23052; GFX90A-NEXT: ;;#ASMSTART 23053; GFX90A-NEXT: ; use s8 23054; GFX90A-NEXT: ;;#ASMEND 23055; GFX90A-NEXT: s_setpc_b64 s[30:31] 23056; 23057; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_10: 23058; GFX940: ; %bb.0: 23059; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23060; GFX940-NEXT: ;;#ASMSTART 23061; GFX940-NEXT: ; def s[0:3] 23062; GFX940-NEXT: ;;#ASMEND 23063; GFX940-NEXT: s_lshr_b32 s0, s0, 16 23064; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 23065; GFX940-NEXT: ;;#ASMSTART 23066; GFX940-NEXT: ; use s8 23067; GFX940-NEXT: ;;#ASMEND 23068; GFX940-NEXT: s_setpc_b64 s[30:31] 23069 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23070 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23071 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 10> 23072 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23073 ret void 23074} 23075 23076define void @s_shuffle_v2i16_v8i16__10_10() { 23077; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_10: 23078; GFX900: ; %bb.0: 23079; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23080; GFX900-NEXT: ;;#ASMSTART 23081; GFX900-NEXT: ; def s[4:7] 23082; GFX900-NEXT: ;;#ASMEND 23083; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 23084; GFX900-NEXT: ;;#ASMSTART 23085; GFX900-NEXT: ; use s8 23086; GFX900-NEXT: ;;#ASMEND 23087; GFX900-NEXT: s_setpc_b64 s[30:31] 23088; 23089; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_10: 23090; GFX90A: ; %bb.0: 23091; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23092; GFX90A-NEXT: ;;#ASMSTART 23093; GFX90A-NEXT: ; def s[4:7] 23094; GFX90A-NEXT: ;;#ASMEND 23095; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 23096; GFX90A-NEXT: ;;#ASMSTART 23097; GFX90A-NEXT: ; use s8 23098; GFX90A-NEXT: ;;#ASMEND 23099; GFX90A-NEXT: s_setpc_b64 s[30:31] 23100; 23101; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_10: 23102; GFX940: ; %bb.0: 23103; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23104; GFX940-NEXT: ;;#ASMSTART 23105; GFX940-NEXT: ; def s[0:3] 23106; GFX940-NEXT: ;;#ASMEND 23107; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 23108; GFX940-NEXT: ;;#ASMSTART 23109; GFX940-NEXT: ; use s8 23110; GFX940-NEXT: ;;#ASMEND 23111; GFX940-NEXT: s_setpc_b64 s[30:31] 23112 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23113 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23114 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 10> 23115 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23116 ret void 23117} 23118 23119define void @s_shuffle_v2i16_v8i16__11_10() { 23120; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_10: 23121; GFX900: ; %bb.0: 23122; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23123; GFX900-NEXT: ;;#ASMSTART 23124; GFX900-NEXT: ; def s[4:7] 23125; GFX900-NEXT: ;;#ASMEND 23126; GFX900-NEXT: s_lshr_b32 s4, s5, 16 23127; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 23128; GFX900-NEXT: ;;#ASMSTART 23129; GFX900-NEXT: ; use s8 23130; GFX900-NEXT: ;;#ASMEND 23131; GFX900-NEXT: s_setpc_b64 s[30:31] 23132; 23133; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_10: 23134; GFX90A: ; %bb.0: 23135; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23136; GFX90A-NEXT: ;;#ASMSTART 23137; GFX90A-NEXT: ; def s[4:7] 23138; GFX90A-NEXT: ;;#ASMEND 23139; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 23140; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 23141; GFX90A-NEXT: ;;#ASMSTART 23142; GFX90A-NEXT: ; use s8 23143; GFX90A-NEXT: ;;#ASMEND 23144; GFX90A-NEXT: s_setpc_b64 s[30:31] 23145; 23146; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_10: 23147; GFX940: ; %bb.0: 23148; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23149; GFX940-NEXT: ;;#ASMSTART 23150; GFX940-NEXT: ; def s[0:3] 23151; GFX940-NEXT: ;;#ASMEND 23152; GFX940-NEXT: s_lshr_b32 s0, s1, 16 23153; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 23154; GFX940-NEXT: ;;#ASMSTART 23155; GFX940-NEXT: ; use s8 23156; GFX940-NEXT: ;;#ASMEND 23157; GFX940-NEXT: s_setpc_b64 s[30:31] 23158 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23159 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23160 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 10> 23161 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23162 ret void 23163} 23164 23165define void @s_shuffle_v2i16_v8i16__12_10() { 23166; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_10: 23167; GFX900: ; %bb.0: 23168; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23169; GFX900-NEXT: ;;#ASMSTART 23170; GFX900-NEXT: ; def s[4:7] 23171; GFX900-NEXT: ;;#ASMEND 23172; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 23173; GFX900-NEXT: ;;#ASMSTART 23174; GFX900-NEXT: ; use s8 23175; GFX900-NEXT: ;;#ASMEND 23176; GFX900-NEXT: s_setpc_b64 s[30:31] 23177; 23178; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_10: 23179; GFX90A: ; %bb.0: 23180; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23181; GFX90A-NEXT: ;;#ASMSTART 23182; GFX90A-NEXT: ; def s[4:7] 23183; GFX90A-NEXT: ;;#ASMEND 23184; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 23185; GFX90A-NEXT: ;;#ASMSTART 23186; GFX90A-NEXT: ; use s8 23187; GFX90A-NEXT: ;;#ASMEND 23188; GFX90A-NEXT: s_setpc_b64 s[30:31] 23189; 23190; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_10: 23191; GFX940: ; %bb.0: 23192; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23193; GFX940-NEXT: ;;#ASMSTART 23194; GFX940-NEXT: ; def s[0:3] 23195; GFX940-NEXT: ;;#ASMEND 23196; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 23197; GFX940-NEXT: ;;#ASMSTART 23198; GFX940-NEXT: ; use s8 23199; GFX940-NEXT: ;;#ASMEND 23200; GFX940-NEXT: s_setpc_b64 s[30:31] 23201 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23202 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23203 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 10> 23204 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23205 ret void 23206} 23207 23208define void @s_shuffle_v2i16_v8i16__13_10() { 23209; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_10: 23210; GFX900: ; %bb.0: 23211; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23212; GFX900-NEXT: ;;#ASMSTART 23213; GFX900-NEXT: ; def s[4:7] 23214; GFX900-NEXT: ;;#ASMEND 23215; GFX900-NEXT: s_lshr_b32 s4, s6, 16 23216; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 23217; GFX900-NEXT: ;;#ASMSTART 23218; GFX900-NEXT: ; use s8 23219; GFX900-NEXT: ;;#ASMEND 23220; GFX900-NEXT: s_setpc_b64 s[30:31] 23221; 23222; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_10: 23223; GFX90A: ; %bb.0: 23224; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23225; GFX90A-NEXT: ;;#ASMSTART 23226; GFX90A-NEXT: ; def s[4:7] 23227; GFX90A-NEXT: ;;#ASMEND 23228; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 23229; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 23230; GFX90A-NEXT: ;;#ASMSTART 23231; GFX90A-NEXT: ; use s8 23232; GFX90A-NEXT: ;;#ASMEND 23233; GFX90A-NEXT: s_setpc_b64 s[30:31] 23234; 23235; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_10: 23236; GFX940: ; %bb.0: 23237; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23238; GFX940-NEXT: ;;#ASMSTART 23239; GFX940-NEXT: ; def s[0:3] 23240; GFX940-NEXT: ;;#ASMEND 23241; GFX940-NEXT: s_lshr_b32 s0, s2, 16 23242; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 23243; GFX940-NEXT: ;;#ASMSTART 23244; GFX940-NEXT: ; use s8 23245; GFX940-NEXT: ;;#ASMEND 23246; GFX940-NEXT: s_setpc_b64 s[30:31] 23247 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23248 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23249 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 10> 23250 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23251 ret void 23252} 23253 23254define void @s_shuffle_v2i16_v8i16__14_10() { 23255; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_10: 23256; GFX900: ; %bb.0: 23257; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23258; GFX900-NEXT: ;;#ASMSTART 23259; GFX900-NEXT: ; def s[4:7] 23260; GFX900-NEXT: ;;#ASMEND 23261; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 23262; GFX900-NEXT: ;;#ASMSTART 23263; GFX900-NEXT: ; use s8 23264; GFX900-NEXT: ;;#ASMEND 23265; GFX900-NEXT: s_setpc_b64 s[30:31] 23266; 23267; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_10: 23268; GFX90A: ; %bb.0: 23269; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23270; GFX90A-NEXT: ;;#ASMSTART 23271; GFX90A-NEXT: ; def s[4:7] 23272; GFX90A-NEXT: ;;#ASMEND 23273; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 23274; GFX90A-NEXT: ;;#ASMSTART 23275; GFX90A-NEXT: ; use s8 23276; GFX90A-NEXT: ;;#ASMEND 23277; GFX90A-NEXT: s_setpc_b64 s[30:31] 23278; 23279; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_10: 23280; GFX940: ; %bb.0: 23281; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23282; GFX940-NEXT: ;;#ASMSTART 23283; GFX940-NEXT: ; def s[0:3] 23284; GFX940-NEXT: ;;#ASMEND 23285; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 23286; GFX940-NEXT: ;;#ASMSTART 23287; GFX940-NEXT: ; use s8 23288; GFX940-NEXT: ;;#ASMEND 23289; GFX940-NEXT: s_setpc_b64 s[30:31] 23290 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23291 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23292 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 10> 23293 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23294 ret void 23295} 23296 23297define void @s_shuffle_v2i16_v8i16__u_11() { 23298; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_11: 23299; GFX900: ; %bb.0: 23300; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23301; GFX900-NEXT: ;;#ASMSTART 23302; GFX900-NEXT: ; def s[4:7] 23303; GFX900-NEXT: ;;#ASMEND 23304; GFX900-NEXT: s_mov_b32 s8, s5 23305; GFX900-NEXT: ;;#ASMSTART 23306; GFX900-NEXT: ; use s8 23307; GFX900-NEXT: ;;#ASMEND 23308; GFX900-NEXT: s_setpc_b64 s[30:31] 23309; 23310; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_11: 23311; GFX90A: ; %bb.0: 23312; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23313; GFX90A-NEXT: ;;#ASMSTART 23314; GFX90A-NEXT: ; def s[4:7] 23315; GFX90A-NEXT: ;;#ASMEND 23316; GFX90A-NEXT: s_mov_b32 s8, s5 23317; GFX90A-NEXT: ;;#ASMSTART 23318; GFX90A-NEXT: ; use s8 23319; GFX90A-NEXT: ;;#ASMEND 23320; GFX90A-NEXT: s_setpc_b64 s[30:31] 23321; 23322; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_11: 23323; GFX940: ; %bb.0: 23324; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23325; GFX940-NEXT: ;;#ASMSTART 23326; GFX940-NEXT: ; def s[0:3] 23327; GFX940-NEXT: ;;#ASMEND 23328; GFX940-NEXT: s_mov_b32 s8, s1 23329; GFX940-NEXT: ;;#ASMSTART 23330; GFX940-NEXT: ; use s8 23331; GFX940-NEXT: ;;#ASMEND 23332; GFX940-NEXT: s_setpc_b64 s[30:31] 23333 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23334 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23335 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 11> 23336 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23337 ret void 23338} 23339 23340define void @s_shuffle_v2i16_v8i16__0_11() { 23341; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_11: 23342; GFX900: ; %bb.0: 23343; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23344; GFX900-NEXT: ;;#ASMSTART 23345; GFX900-NEXT: ; def s[8:11] 23346; GFX900-NEXT: ;;#ASMEND 23347; GFX900-NEXT: ;;#ASMSTART 23348; GFX900-NEXT: ; def s[4:7] 23349; GFX900-NEXT: ;;#ASMEND 23350; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s9 23351; GFX900-NEXT: ;;#ASMSTART 23352; GFX900-NEXT: ; use s8 23353; GFX900-NEXT: ;;#ASMEND 23354; GFX900-NEXT: s_setpc_b64 s[30:31] 23355; 23356; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_11: 23357; GFX90A: ; %bb.0: 23358; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23359; GFX90A-NEXT: ;;#ASMSTART 23360; GFX90A-NEXT: ; def s[8:11] 23361; GFX90A-NEXT: ;;#ASMEND 23362; GFX90A-NEXT: ;;#ASMSTART 23363; GFX90A-NEXT: ; def s[4:7] 23364; GFX90A-NEXT: ;;#ASMEND 23365; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s9 23366; GFX90A-NEXT: ;;#ASMSTART 23367; GFX90A-NEXT: ; use s8 23368; GFX90A-NEXT: ;;#ASMEND 23369; GFX90A-NEXT: s_setpc_b64 s[30:31] 23370; 23371; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_11: 23372; GFX940: ; %bb.0: 23373; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23374; GFX940-NEXT: ;;#ASMSTART 23375; GFX940-NEXT: ; def s[0:3] 23376; GFX940-NEXT: ;;#ASMEND 23377; GFX940-NEXT: ;;#ASMSTART 23378; GFX940-NEXT: ; def s[4:7] 23379; GFX940-NEXT: ;;#ASMEND 23380; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s5 23381; GFX940-NEXT: ;;#ASMSTART 23382; GFX940-NEXT: ; use s8 23383; GFX940-NEXT: ;;#ASMEND 23384; GFX940-NEXT: s_setpc_b64 s[30:31] 23385 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23386 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23387 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 11> 23388 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23389 ret void 23390} 23391 23392define void @s_shuffle_v2i16_v8i16__1_11() { 23393; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_11: 23394; GFX900: ; %bb.0: 23395; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23396; GFX900-NEXT: ;;#ASMSTART 23397; GFX900-NEXT: ; def s[8:11] 23398; GFX900-NEXT: ;;#ASMEND 23399; GFX900-NEXT: ;;#ASMSTART 23400; GFX900-NEXT: ; def s[4:7] 23401; GFX900-NEXT: ;;#ASMEND 23402; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s9 23403; GFX900-NEXT: ;;#ASMSTART 23404; GFX900-NEXT: ; use s8 23405; GFX900-NEXT: ;;#ASMEND 23406; GFX900-NEXT: s_setpc_b64 s[30:31] 23407; 23408; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_11: 23409; GFX90A: ; %bb.0: 23410; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23411; GFX90A-NEXT: ;;#ASMSTART 23412; GFX90A-NEXT: ; def s[8:11] 23413; GFX90A-NEXT: ;;#ASMEND 23414; GFX90A-NEXT: ;;#ASMSTART 23415; GFX90A-NEXT: ; def s[4:7] 23416; GFX90A-NEXT: ;;#ASMEND 23417; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s9 23418; GFX90A-NEXT: ;;#ASMSTART 23419; GFX90A-NEXT: ; use s8 23420; GFX90A-NEXT: ;;#ASMEND 23421; GFX90A-NEXT: s_setpc_b64 s[30:31] 23422; 23423; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_11: 23424; GFX940: ; %bb.0: 23425; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23426; GFX940-NEXT: ;;#ASMSTART 23427; GFX940-NEXT: ; def s[0:3] 23428; GFX940-NEXT: ;;#ASMEND 23429; GFX940-NEXT: ;;#ASMSTART 23430; GFX940-NEXT: ; def s[4:7] 23431; GFX940-NEXT: ;;#ASMEND 23432; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s5 23433; GFX940-NEXT: ;;#ASMSTART 23434; GFX940-NEXT: ; use s8 23435; GFX940-NEXT: ;;#ASMEND 23436; GFX940-NEXT: s_setpc_b64 s[30:31] 23437 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23438 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23439 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 11> 23440 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23441 ret void 23442} 23443 23444define void @s_shuffle_v2i16_v8i16__2_11() { 23445; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_11: 23446; GFX900: ; %bb.0: 23447; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23448; GFX900-NEXT: ;;#ASMSTART 23449; GFX900-NEXT: ; def s[8:11] 23450; GFX900-NEXT: ;;#ASMEND 23451; GFX900-NEXT: ;;#ASMSTART 23452; GFX900-NEXT: ; def s[4:7] 23453; GFX900-NEXT: ;;#ASMEND 23454; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s9 23455; GFX900-NEXT: ;;#ASMSTART 23456; GFX900-NEXT: ; use s8 23457; GFX900-NEXT: ;;#ASMEND 23458; GFX900-NEXT: s_setpc_b64 s[30:31] 23459; 23460; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_11: 23461; GFX90A: ; %bb.0: 23462; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23463; GFX90A-NEXT: ;;#ASMSTART 23464; GFX90A-NEXT: ; def s[8:11] 23465; GFX90A-NEXT: ;;#ASMEND 23466; GFX90A-NEXT: ;;#ASMSTART 23467; GFX90A-NEXT: ; def s[4:7] 23468; GFX90A-NEXT: ;;#ASMEND 23469; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s9 23470; GFX90A-NEXT: ;;#ASMSTART 23471; GFX90A-NEXT: ; use s8 23472; GFX90A-NEXT: ;;#ASMEND 23473; GFX90A-NEXT: s_setpc_b64 s[30:31] 23474; 23475; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_11: 23476; GFX940: ; %bb.0: 23477; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23478; GFX940-NEXT: ;;#ASMSTART 23479; GFX940-NEXT: ; def s[0:3] 23480; GFX940-NEXT: ;;#ASMEND 23481; GFX940-NEXT: ;;#ASMSTART 23482; GFX940-NEXT: ; def s[4:7] 23483; GFX940-NEXT: ;;#ASMEND 23484; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s5 23485; GFX940-NEXT: ;;#ASMSTART 23486; GFX940-NEXT: ; use s8 23487; GFX940-NEXT: ;;#ASMEND 23488; GFX940-NEXT: s_setpc_b64 s[30:31] 23489 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23490 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23491 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 11> 23492 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23493 ret void 23494} 23495 23496define void @s_shuffle_v2i16_v8i16__3_11() { 23497; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_11: 23498; GFX900: ; %bb.0: 23499; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23500; GFX900-NEXT: ;;#ASMSTART 23501; GFX900-NEXT: ; def s[8:11] 23502; GFX900-NEXT: ;;#ASMEND 23503; GFX900-NEXT: ;;#ASMSTART 23504; GFX900-NEXT: ; def s[4:7] 23505; GFX900-NEXT: ;;#ASMEND 23506; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s9 23507; GFX900-NEXT: ;;#ASMSTART 23508; GFX900-NEXT: ; use s8 23509; GFX900-NEXT: ;;#ASMEND 23510; GFX900-NEXT: s_setpc_b64 s[30:31] 23511; 23512; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_11: 23513; GFX90A: ; %bb.0: 23514; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23515; GFX90A-NEXT: ;;#ASMSTART 23516; GFX90A-NEXT: ; def s[8:11] 23517; GFX90A-NEXT: ;;#ASMEND 23518; GFX90A-NEXT: ;;#ASMSTART 23519; GFX90A-NEXT: ; def s[4:7] 23520; GFX90A-NEXT: ;;#ASMEND 23521; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s9 23522; GFX90A-NEXT: ;;#ASMSTART 23523; GFX90A-NEXT: ; use s8 23524; GFX90A-NEXT: ;;#ASMEND 23525; GFX90A-NEXT: s_setpc_b64 s[30:31] 23526; 23527; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_11: 23528; GFX940: ; %bb.0: 23529; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23530; GFX940-NEXT: ;;#ASMSTART 23531; GFX940-NEXT: ; def s[0:3] 23532; GFX940-NEXT: ;;#ASMEND 23533; GFX940-NEXT: ;;#ASMSTART 23534; GFX940-NEXT: ; def s[4:7] 23535; GFX940-NEXT: ;;#ASMEND 23536; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s5 23537; GFX940-NEXT: ;;#ASMSTART 23538; GFX940-NEXT: ; use s8 23539; GFX940-NEXT: ;;#ASMEND 23540; GFX940-NEXT: s_setpc_b64 s[30:31] 23541 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23542 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23543 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 11> 23544 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23545 ret void 23546} 23547 23548define void @s_shuffle_v2i16_v8i16__4_11() { 23549; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_11: 23550; GFX900: ; %bb.0: 23551; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23552; GFX900-NEXT: ;;#ASMSTART 23553; GFX900-NEXT: ; def s[8:11] 23554; GFX900-NEXT: ;;#ASMEND 23555; GFX900-NEXT: ;;#ASMSTART 23556; GFX900-NEXT: ; def s[4:7] 23557; GFX900-NEXT: ;;#ASMEND 23558; GFX900-NEXT: s_pack_lh_b32_b16 s8, s6, s9 23559; GFX900-NEXT: ;;#ASMSTART 23560; GFX900-NEXT: ; use s8 23561; GFX900-NEXT: ;;#ASMEND 23562; GFX900-NEXT: s_setpc_b64 s[30:31] 23563; 23564; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_11: 23565; GFX90A: ; %bb.0: 23566; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23567; GFX90A-NEXT: ;;#ASMSTART 23568; GFX90A-NEXT: ; def s[8:11] 23569; GFX90A-NEXT: ;;#ASMEND 23570; GFX90A-NEXT: ;;#ASMSTART 23571; GFX90A-NEXT: ; def s[4:7] 23572; GFX90A-NEXT: ;;#ASMEND 23573; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s6, s9 23574; GFX90A-NEXT: ;;#ASMSTART 23575; GFX90A-NEXT: ; use s8 23576; GFX90A-NEXT: ;;#ASMEND 23577; GFX90A-NEXT: s_setpc_b64 s[30:31] 23578; 23579; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_11: 23580; GFX940: ; %bb.0: 23581; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23582; GFX940-NEXT: ;;#ASMSTART 23583; GFX940-NEXT: ; def s[0:3] 23584; GFX940-NEXT: ;;#ASMEND 23585; GFX940-NEXT: ;;#ASMSTART 23586; GFX940-NEXT: ; def s[4:7] 23587; GFX940-NEXT: ;;#ASMEND 23588; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s5 23589; GFX940-NEXT: ;;#ASMSTART 23590; GFX940-NEXT: ; use s8 23591; GFX940-NEXT: ;;#ASMEND 23592; GFX940-NEXT: s_setpc_b64 s[30:31] 23593 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23594 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23595 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 11> 23596 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23597 ret void 23598} 23599 23600define void @s_shuffle_v2i16_v8i16__5_11() { 23601; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_11: 23602; GFX900: ; %bb.0: 23603; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23604; GFX900-NEXT: ;;#ASMSTART 23605; GFX900-NEXT: ; def s[8:11] 23606; GFX900-NEXT: ;;#ASMEND 23607; GFX900-NEXT: ;;#ASMSTART 23608; GFX900-NEXT: ; def s[4:7] 23609; GFX900-NEXT: ;;#ASMEND 23610; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s9 23611; GFX900-NEXT: ;;#ASMSTART 23612; GFX900-NEXT: ; use s8 23613; GFX900-NEXT: ;;#ASMEND 23614; GFX900-NEXT: s_setpc_b64 s[30:31] 23615; 23616; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_11: 23617; GFX90A: ; %bb.0: 23618; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23619; GFX90A-NEXT: ;;#ASMSTART 23620; GFX90A-NEXT: ; def s[8:11] 23621; GFX90A-NEXT: ;;#ASMEND 23622; GFX90A-NEXT: ;;#ASMSTART 23623; GFX90A-NEXT: ; def s[4:7] 23624; GFX90A-NEXT: ;;#ASMEND 23625; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s9 23626; GFX90A-NEXT: ;;#ASMSTART 23627; GFX90A-NEXT: ; use s8 23628; GFX90A-NEXT: ;;#ASMEND 23629; GFX90A-NEXT: s_setpc_b64 s[30:31] 23630; 23631; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_11: 23632; GFX940: ; %bb.0: 23633; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23634; GFX940-NEXT: ;;#ASMSTART 23635; GFX940-NEXT: ; def s[0:3] 23636; GFX940-NEXT: ;;#ASMEND 23637; GFX940-NEXT: ;;#ASMSTART 23638; GFX940-NEXT: ; def s[4:7] 23639; GFX940-NEXT: ;;#ASMEND 23640; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s5 23641; GFX940-NEXT: ;;#ASMSTART 23642; GFX940-NEXT: ; use s8 23643; GFX940-NEXT: ;;#ASMEND 23644; GFX940-NEXT: s_setpc_b64 s[30:31] 23645 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23646 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23647 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 11> 23648 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23649 ret void 23650} 23651 23652define void @s_shuffle_v2i16_v8i16__6_11() { 23653; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_11: 23654; GFX900: ; %bb.0: 23655; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23656; GFX900-NEXT: ;;#ASMSTART 23657; GFX900-NEXT: ; def s[8:11] 23658; GFX900-NEXT: ;;#ASMEND 23659; GFX900-NEXT: ;;#ASMSTART 23660; GFX900-NEXT: ; def s[4:7] 23661; GFX900-NEXT: ;;#ASMEND 23662; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s9 23663; GFX900-NEXT: ;;#ASMSTART 23664; GFX900-NEXT: ; use s8 23665; GFX900-NEXT: ;;#ASMEND 23666; GFX900-NEXT: s_setpc_b64 s[30:31] 23667; 23668; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_11: 23669; GFX90A: ; %bb.0: 23670; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23671; GFX90A-NEXT: ;;#ASMSTART 23672; GFX90A-NEXT: ; def s[8:11] 23673; GFX90A-NEXT: ;;#ASMEND 23674; GFX90A-NEXT: ;;#ASMSTART 23675; GFX90A-NEXT: ; def s[4:7] 23676; GFX90A-NEXT: ;;#ASMEND 23677; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s9 23678; GFX90A-NEXT: ;;#ASMSTART 23679; GFX90A-NEXT: ; use s8 23680; GFX90A-NEXT: ;;#ASMEND 23681; GFX90A-NEXT: s_setpc_b64 s[30:31] 23682; 23683; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_11: 23684; GFX940: ; %bb.0: 23685; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23686; GFX940-NEXT: ;;#ASMSTART 23687; GFX940-NEXT: ; def s[0:3] 23688; GFX940-NEXT: ;;#ASMEND 23689; GFX940-NEXT: ;;#ASMSTART 23690; GFX940-NEXT: ; def s[4:7] 23691; GFX940-NEXT: ;;#ASMEND 23692; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s5 23693; GFX940-NEXT: ;;#ASMSTART 23694; GFX940-NEXT: ; use s8 23695; GFX940-NEXT: ;;#ASMEND 23696; GFX940-NEXT: s_setpc_b64 s[30:31] 23697 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23698 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23699 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 11> 23700 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23701 ret void 23702} 23703 23704define void @s_shuffle_v2i16_v8i16__7_11() { 23705; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_11: 23706; GFX900: ; %bb.0: 23707; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23708; GFX900-NEXT: ;;#ASMSTART 23709; GFX900-NEXT: ; def s[8:11] 23710; GFX900-NEXT: ;;#ASMEND 23711; GFX900-NEXT: ;;#ASMSTART 23712; GFX900-NEXT: ; def s[4:7] 23713; GFX900-NEXT: ;;#ASMEND 23714; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s9 23715; GFX900-NEXT: ;;#ASMSTART 23716; GFX900-NEXT: ; use s8 23717; GFX900-NEXT: ;;#ASMEND 23718; GFX900-NEXT: s_setpc_b64 s[30:31] 23719; 23720; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_11: 23721; GFX90A: ; %bb.0: 23722; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23723; GFX90A-NEXT: ;;#ASMSTART 23724; GFX90A-NEXT: ; def s[8:11] 23725; GFX90A-NEXT: ;;#ASMEND 23726; GFX90A-NEXT: ;;#ASMSTART 23727; GFX90A-NEXT: ; def s[4:7] 23728; GFX90A-NEXT: ;;#ASMEND 23729; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s9 23730; GFX90A-NEXT: ;;#ASMSTART 23731; GFX90A-NEXT: ; use s8 23732; GFX90A-NEXT: ;;#ASMEND 23733; GFX90A-NEXT: s_setpc_b64 s[30:31] 23734; 23735; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_11: 23736; GFX940: ; %bb.0: 23737; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23738; GFX940-NEXT: ;;#ASMSTART 23739; GFX940-NEXT: ; def s[0:3] 23740; GFX940-NEXT: ;;#ASMEND 23741; GFX940-NEXT: ;;#ASMSTART 23742; GFX940-NEXT: ; def s[4:7] 23743; GFX940-NEXT: ;;#ASMEND 23744; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s5 23745; GFX940-NEXT: ;;#ASMSTART 23746; GFX940-NEXT: ; use s8 23747; GFX940-NEXT: ;;#ASMEND 23748; GFX940-NEXT: s_setpc_b64 s[30:31] 23749 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23750 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23751 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 11> 23752 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23753 ret void 23754} 23755 23756define void @s_shuffle_v2i16_v8i16__8_11() { 23757; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_11: 23758; GFX900: ; %bb.0: 23759; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23760; GFX900-NEXT: ;;#ASMSTART 23761; GFX900-NEXT: ; def s[4:7] 23762; GFX900-NEXT: ;;#ASMEND 23763; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s5 23764; GFX900-NEXT: ;;#ASMSTART 23765; GFX900-NEXT: ; use s8 23766; GFX900-NEXT: ;;#ASMEND 23767; GFX900-NEXT: s_setpc_b64 s[30:31] 23768; 23769; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_11: 23770; GFX90A: ; %bb.0: 23771; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23772; GFX90A-NEXT: ;;#ASMSTART 23773; GFX90A-NEXT: ; def s[4:7] 23774; GFX90A-NEXT: ;;#ASMEND 23775; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s5 23776; GFX90A-NEXT: ;;#ASMSTART 23777; GFX90A-NEXT: ; use s8 23778; GFX90A-NEXT: ;;#ASMEND 23779; GFX90A-NEXT: s_setpc_b64 s[30:31] 23780; 23781; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_11: 23782; GFX940: ; %bb.0: 23783; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23784; GFX940-NEXT: ;;#ASMSTART 23785; GFX940-NEXT: ; def s[0:3] 23786; GFX940-NEXT: ;;#ASMEND 23787; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s1 23788; GFX940-NEXT: ;;#ASMSTART 23789; GFX940-NEXT: ; use s8 23790; GFX940-NEXT: ;;#ASMEND 23791; GFX940-NEXT: s_setpc_b64 s[30:31] 23792 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23793 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23794 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 11> 23795 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23796 ret void 23797} 23798 23799define void @s_shuffle_v2i16_v8i16__9_11() { 23800; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_11: 23801; GFX900: ; %bb.0: 23802; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23803; GFX900-NEXT: ;;#ASMSTART 23804; GFX900-NEXT: ; def s[4:7] 23805; GFX900-NEXT: ;;#ASMEND 23806; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s5 23807; GFX900-NEXT: ;;#ASMSTART 23808; GFX900-NEXT: ; use s8 23809; GFX900-NEXT: ;;#ASMEND 23810; GFX900-NEXT: s_setpc_b64 s[30:31] 23811; 23812; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_11: 23813; GFX90A: ; %bb.0: 23814; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23815; GFX90A-NEXT: ;;#ASMSTART 23816; GFX90A-NEXT: ; def s[4:7] 23817; GFX90A-NEXT: ;;#ASMEND 23818; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s5 23819; GFX90A-NEXT: ;;#ASMSTART 23820; GFX90A-NEXT: ; use s8 23821; GFX90A-NEXT: ;;#ASMEND 23822; GFX90A-NEXT: s_setpc_b64 s[30:31] 23823; 23824; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_11: 23825; GFX940: ; %bb.0: 23826; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23827; GFX940-NEXT: ;;#ASMSTART 23828; GFX940-NEXT: ; def s[0:3] 23829; GFX940-NEXT: ;;#ASMEND 23830; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s1 23831; GFX940-NEXT: ;;#ASMSTART 23832; GFX940-NEXT: ; use s8 23833; GFX940-NEXT: ;;#ASMEND 23834; GFX940-NEXT: s_setpc_b64 s[30:31] 23835 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23836 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23837 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 11> 23838 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23839 ret void 23840} 23841 23842define void @s_shuffle_v2i16_v8i16__10_11() { 23843; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_11: 23844; GFX900: ; %bb.0: 23845; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23846; GFX900-NEXT: ;;#ASMSTART 23847; GFX900-NEXT: ; def s[4:7] 23848; GFX900-NEXT: ;;#ASMEND 23849; GFX900-NEXT: s_mov_b32 s8, s5 23850; GFX900-NEXT: ;;#ASMSTART 23851; GFX900-NEXT: ; use s8 23852; GFX900-NEXT: ;;#ASMEND 23853; GFX900-NEXT: s_setpc_b64 s[30:31] 23854; 23855; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_11: 23856; GFX90A: ; %bb.0: 23857; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23858; GFX90A-NEXT: ;;#ASMSTART 23859; GFX90A-NEXT: ; def s[4:7] 23860; GFX90A-NEXT: ;;#ASMEND 23861; GFX90A-NEXT: s_mov_b32 s8, s5 23862; GFX90A-NEXT: ;;#ASMSTART 23863; GFX90A-NEXT: ; use s8 23864; GFX90A-NEXT: ;;#ASMEND 23865; GFX90A-NEXT: s_setpc_b64 s[30:31] 23866; 23867; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_11: 23868; GFX940: ; %bb.0: 23869; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23870; GFX940-NEXT: ;;#ASMSTART 23871; GFX940-NEXT: ; def s[0:3] 23872; GFX940-NEXT: ;;#ASMEND 23873; GFX940-NEXT: s_mov_b32 s8, s1 23874; GFX940-NEXT: ;;#ASMSTART 23875; GFX940-NEXT: ; use s8 23876; GFX940-NEXT: ;;#ASMEND 23877; GFX940-NEXT: s_setpc_b64 s[30:31] 23878 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23879 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23880 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 11> 23881 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23882 ret void 23883} 23884 23885define void @s_shuffle_v2i16_v8i16__11_11() { 23886; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_11: 23887; GFX900: ; %bb.0: 23888; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23889; GFX900-NEXT: ;;#ASMSTART 23890; GFX900-NEXT: ; def s[4:7] 23891; GFX900-NEXT: ;;#ASMEND 23892; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s5 23893; GFX900-NEXT: ;;#ASMSTART 23894; GFX900-NEXT: ; use s8 23895; GFX900-NEXT: ;;#ASMEND 23896; GFX900-NEXT: s_setpc_b64 s[30:31] 23897; 23898; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_11: 23899; GFX90A: ; %bb.0: 23900; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23901; GFX90A-NEXT: ;;#ASMSTART 23902; GFX90A-NEXT: ; def s[4:7] 23903; GFX90A-NEXT: ;;#ASMEND 23904; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s5 23905; GFX90A-NEXT: ;;#ASMSTART 23906; GFX90A-NEXT: ; use s8 23907; GFX90A-NEXT: ;;#ASMEND 23908; GFX90A-NEXT: s_setpc_b64 s[30:31] 23909; 23910; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_11: 23911; GFX940: ; %bb.0: 23912; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23913; GFX940-NEXT: ;;#ASMSTART 23914; GFX940-NEXT: ; def s[0:3] 23915; GFX940-NEXT: ;;#ASMEND 23916; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s1 23917; GFX940-NEXT: ;;#ASMSTART 23918; GFX940-NEXT: ; use s8 23919; GFX940-NEXT: ;;#ASMEND 23920; GFX940-NEXT: s_setpc_b64 s[30:31] 23921 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23922 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23923 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 11> 23924 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23925 ret void 23926} 23927 23928define void @s_shuffle_v2i16_v8i16__12_11() { 23929; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_11: 23930; GFX900: ; %bb.0: 23931; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23932; GFX900-NEXT: ;;#ASMSTART 23933; GFX900-NEXT: ; def s[4:7] 23934; GFX900-NEXT: ;;#ASMEND 23935; GFX900-NEXT: s_pack_lh_b32_b16 s8, s6, s5 23936; GFX900-NEXT: ;;#ASMSTART 23937; GFX900-NEXT: ; use s8 23938; GFX900-NEXT: ;;#ASMEND 23939; GFX900-NEXT: s_setpc_b64 s[30:31] 23940; 23941; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_11: 23942; GFX90A: ; %bb.0: 23943; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23944; GFX90A-NEXT: ;;#ASMSTART 23945; GFX90A-NEXT: ; def s[4:7] 23946; GFX90A-NEXT: ;;#ASMEND 23947; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s6, s5 23948; GFX90A-NEXT: ;;#ASMSTART 23949; GFX90A-NEXT: ; use s8 23950; GFX90A-NEXT: ;;#ASMEND 23951; GFX90A-NEXT: s_setpc_b64 s[30:31] 23952; 23953; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_11: 23954; GFX940: ; %bb.0: 23955; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23956; GFX940-NEXT: ;;#ASMSTART 23957; GFX940-NEXT: ; def s[0:3] 23958; GFX940-NEXT: ;;#ASMEND 23959; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s1 23960; GFX940-NEXT: ;;#ASMSTART 23961; GFX940-NEXT: ; use s8 23962; GFX940-NEXT: ;;#ASMEND 23963; GFX940-NEXT: s_setpc_b64 s[30:31] 23964 %vec0 = call <8 x i16> asm "; def $0", "=s"() 23965 %vec1 = call <8 x i16> asm "; def $0", "=s"() 23966 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 11> 23967 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 23968 ret void 23969} 23970 23971define void @s_shuffle_v2i16_v8i16__13_11() { 23972; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_11: 23973; GFX900: ; %bb.0: 23974; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23975; GFX900-NEXT: ;;#ASMSTART 23976; GFX900-NEXT: ; def s[4:7] 23977; GFX900-NEXT: ;;#ASMEND 23978; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s5 23979; GFX900-NEXT: ;;#ASMSTART 23980; GFX900-NEXT: ; use s8 23981; GFX900-NEXT: ;;#ASMEND 23982; GFX900-NEXT: s_setpc_b64 s[30:31] 23983; 23984; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_11: 23985; GFX90A: ; %bb.0: 23986; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23987; GFX90A-NEXT: ;;#ASMSTART 23988; GFX90A-NEXT: ; def s[4:7] 23989; GFX90A-NEXT: ;;#ASMEND 23990; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s5 23991; GFX90A-NEXT: ;;#ASMSTART 23992; GFX90A-NEXT: ; use s8 23993; GFX90A-NEXT: ;;#ASMEND 23994; GFX90A-NEXT: s_setpc_b64 s[30:31] 23995; 23996; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_11: 23997; GFX940: ; %bb.0: 23998; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23999; GFX940-NEXT: ;;#ASMSTART 24000; GFX940-NEXT: ; def s[0:3] 24001; GFX940-NEXT: ;;#ASMEND 24002; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s1 24003; GFX940-NEXT: ;;#ASMSTART 24004; GFX940-NEXT: ; use s8 24005; GFX940-NEXT: ;;#ASMEND 24006; GFX940-NEXT: s_setpc_b64 s[30:31] 24007 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24008 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24009 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 11> 24010 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24011 ret void 24012} 24013 24014define void @s_shuffle_v2i16_v8i16__14_11() { 24015; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_11: 24016; GFX900: ; %bb.0: 24017; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24018; GFX900-NEXT: ;;#ASMSTART 24019; GFX900-NEXT: ; def s[4:7] 24020; GFX900-NEXT: ;;#ASMEND 24021; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s5 24022; GFX900-NEXT: ;;#ASMSTART 24023; GFX900-NEXT: ; use s8 24024; GFX900-NEXT: ;;#ASMEND 24025; GFX900-NEXT: s_setpc_b64 s[30:31] 24026; 24027; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_11: 24028; GFX90A: ; %bb.0: 24029; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24030; GFX90A-NEXT: ;;#ASMSTART 24031; GFX90A-NEXT: ; def s[4:7] 24032; GFX90A-NEXT: ;;#ASMEND 24033; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s5 24034; GFX90A-NEXT: ;;#ASMSTART 24035; GFX90A-NEXT: ; use s8 24036; GFX90A-NEXT: ;;#ASMEND 24037; GFX90A-NEXT: s_setpc_b64 s[30:31] 24038; 24039; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_11: 24040; GFX940: ; %bb.0: 24041; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24042; GFX940-NEXT: ;;#ASMSTART 24043; GFX940-NEXT: ; def s[0:3] 24044; GFX940-NEXT: ;;#ASMEND 24045; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s1 24046; GFX940-NEXT: ;;#ASMSTART 24047; GFX940-NEXT: ; use s8 24048; GFX940-NEXT: ;;#ASMEND 24049; GFX940-NEXT: s_setpc_b64 s[30:31] 24050 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24051 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24052 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 11> 24053 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24054 ret void 24055} 24056 24057define void @s_shuffle_v2i16_v8i16__u_12() { 24058; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_12: 24059; GFX900: ; %bb.0: 24060; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24061; GFX900-NEXT: ;;#ASMSTART 24062; GFX900-NEXT: ; def s[4:7] 24063; GFX900-NEXT: ;;#ASMEND 24064; GFX900-NEXT: s_lshl_b32 s8, s6, 16 24065; GFX900-NEXT: ;;#ASMSTART 24066; GFX900-NEXT: ; use s8 24067; GFX900-NEXT: ;;#ASMEND 24068; GFX900-NEXT: s_setpc_b64 s[30:31] 24069; 24070; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_12: 24071; GFX90A: ; %bb.0: 24072; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24073; GFX90A-NEXT: ;;#ASMSTART 24074; GFX90A-NEXT: ; def s[4:7] 24075; GFX90A-NEXT: ;;#ASMEND 24076; GFX90A-NEXT: s_lshl_b32 s8, s6, 16 24077; GFX90A-NEXT: ;;#ASMSTART 24078; GFX90A-NEXT: ; use s8 24079; GFX90A-NEXT: ;;#ASMEND 24080; GFX90A-NEXT: s_setpc_b64 s[30:31] 24081; 24082; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_12: 24083; GFX940: ; %bb.0: 24084; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24085; GFX940-NEXT: ;;#ASMSTART 24086; GFX940-NEXT: ; def s[0:3] 24087; GFX940-NEXT: ;;#ASMEND 24088; GFX940-NEXT: s_lshl_b32 s8, s2, 16 24089; GFX940-NEXT: ;;#ASMSTART 24090; GFX940-NEXT: ; use s8 24091; GFX940-NEXT: ;;#ASMEND 24092; GFX940-NEXT: s_setpc_b64 s[30:31] 24093 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24094 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24095 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 12> 24096 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24097 ret void 24098} 24099 24100define void @s_shuffle_v2i16_v8i16__0_12() { 24101; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_12: 24102; GFX900: ; %bb.0: 24103; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24104; GFX900-NEXT: ;;#ASMSTART 24105; GFX900-NEXT: ; def s[8:11] 24106; GFX900-NEXT: ;;#ASMEND 24107; GFX900-NEXT: ;;#ASMSTART 24108; GFX900-NEXT: ; def s[4:7] 24109; GFX900-NEXT: ;;#ASMEND 24110; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s10 24111; GFX900-NEXT: ;;#ASMSTART 24112; GFX900-NEXT: ; use s8 24113; GFX900-NEXT: ;;#ASMEND 24114; GFX900-NEXT: s_setpc_b64 s[30:31] 24115; 24116; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_12: 24117; GFX90A: ; %bb.0: 24118; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24119; GFX90A-NEXT: ;;#ASMSTART 24120; GFX90A-NEXT: ; def s[8:11] 24121; GFX90A-NEXT: ;;#ASMEND 24122; GFX90A-NEXT: ;;#ASMSTART 24123; GFX90A-NEXT: ; def s[4:7] 24124; GFX90A-NEXT: ;;#ASMEND 24125; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s10 24126; GFX90A-NEXT: ;;#ASMSTART 24127; GFX90A-NEXT: ; use s8 24128; GFX90A-NEXT: ;;#ASMEND 24129; GFX90A-NEXT: s_setpc_b64 s[30:31] 24130; 24131; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_12: 24132; GFX940: ; %bb.0: 24133; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24134; GFX940-NEXT: ;;#ASMSTART 24135; GFX940-NEXT: ; def s[0:3] 24136; GFX940-NEXT: ;;#ASMEND 24137; GFX940-NEXT: ;;#ASMSTART 24138; GFX940-NEXT: ; def s[4:7] 24139; GFX940-NEXT: ;;#ASMEND 24140; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 24141; GFX940-NEXT: ;;#ASMSTART 24142; GFX940-NEXT: ; use s8 24143; GFX940-NEXT: ;;#ASMEND 24144; GFX940-NEXT: s_setpc_b64 s[30:31] 24145 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24146 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24147 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 12> 24148 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24149 ret void 24150} 24151 24152define void @s_shuffle_v2i16_v8i16__1_12() { 24153; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_12: 24154; GFX900: ; %bb.0: 24155; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24156; GFX900-NEXT: ;;#ASMSTART 24157; GFX900-NEXT: ; def s[4:7] 24158; GFX900-NEXT: ;;#ASMEND 24159; GFX900-NEXT: ;;#ASMSTART 24160; GFX900-NEXT: ; def s[8:11] 24161; GFX900-NEXT: ;;#ASMEND 24162; GFX900-NEXT: s_lshr_b32 s4, s4, 16 24163; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s10 24164; GFX900-NEXT: ;;#ASMSTART 24165; GFX900-NEXT: ; use s8 24166; GFX900-NEXT: ;;#ASMEND 24167; GFX900-NEXT: s_setpc_b64 s[30:31] 24168; 24169; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_12: 24170; GFX90A: ; %bb.0: 24171; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24172; GFX90A-NEXT: ;;#ASMSTART 24173; GFX90A-NEXT: ; def s[4:7] 24174; GFX90A-NEXT: ;;#ASMEND 24175; GFX90A-NEXT: ;;#ASMSTART 24176; GFX90A-NEXT: ; def s[8:11] 24177; GFX90A-NEXT: ;;#ASMEND 24178; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 24179; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s10 24180; GFX90A-NEXT: ;;#ASMSTART 24181; GFX90A-NEXT: ; use s8 24182; GFX90A-NEXT: ;;#ASMEND 24183; GFX90A-NEXT: s_setpc_b64 s[30:31] 24184; 24185; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_12: 24186; GFX940: ; %bb.0: 24187; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24188; GFX940-NEXT: ;;#ASMSTART 24189; GFX940-NEXT: ; def s[0:3] 24190; GFX940-NEXT: ;;#ASMEND 24191; GFX940-NEXT: s_lshr_b32 s0, s0, 16 24192; GFX940-NEXT: ;;#ASMSTART 24193; GFX940-NEXT: ; def s[4:7] 24194; GFX940-NEXT: ;;#ASMEND 24195; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 24196; GFX940-NEXT: ;;#ASMSTART 24197; GFX940-NEXT: ; use s8 24198; GFX940-NEXT: ;;#ASMEND 24199; GFX940-NEXT: s_setpc_b64 s[30:31] 24200 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24201 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24202 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 12> 24203 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24204 ret void 24205} 24206 24207define void @s_shuffle_v2i16_v8i16__2_12() { 24208; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_12: 24209; GFX900: ; %bb.0: 24210; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24211; GFX900-NEXT: ;;#ASMSTART 24212; GFX900-NEXT: ; def s[8:11] 24213; GFX900-NEXT: ;;#ASMEND 24214; GFX900-NEXT: ;;#ASMSTART 24215; GFX900-NEXT: ; def s[4:7] 24216; GFX900-NEXT: ;;#ASMEND 24217; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s10 24218; GFX900-NEXT: ;;#ASMSTART 24219; GFX900-NEXT: ; use s8 24220; GFX900-NEXT: ;;#ASMEND 24221; GFX900-NEXT: s_setpc_b64 s[30:31] 24222; 24223; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_12: 24224; GFX90A: ; %bb.0: 24225; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24226; GFX90A-NEXT: ;;#ASMSTART 24227; GFX90A-NEXT: ; def s[8:11] 24228; GFX90A-NEXT: ;;#ASMEND 24229; GFX90A-NEXT: ;;#ASMSTART 24230; GFX90A-NEXT: ; def s[4:7] 24231; GFX90A-NEXT: ;;#ASMEND 24232; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s10 24233; GFX90A-NEXT: ;;#ASMSTART 24234; GFX90A-NEXT: ; use s8 24235; GFX90A-NEXT: ;;#ASMEND 24236; GFX90A-NEXT: s_setpc_b64 s[30:31] 24237; 24238; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_12: 24239; GFX940: ; %bb.0: 24240; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24241; GFX940-NEXT: ;;#ASMSTART 24242; GFX940-NEXT: ; def s[0:3] 24243; GFX940-NEXT: ;;#ASMEND 24244; GFX940-NEXT: ;;#ASMSTART 24245; GFX940-NEXT: ; def s[4:7] 24246; GFX940-NEXT: ;;#ASMEND 24247; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s6 24248; GFX940-NEXT: ;;#ASMSTART 24249; GFX940-NEXT: ; use s8 24250; GFX940-NEXT: ;;#ASMEND 24251; GFX940-NEXT: s_setpc_b64 s[30:31] 24252 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24253 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24254 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 12> 24255 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24256 ret void 24257} 24258 24259define void @s_shuffle_v2i16_v8i16__3_12() { 24260; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_12: 24261; GFX900: ; %bb.0: 24262; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24263; GFX900-NEXT: ;;#ASMSTART 24264; GFX900-NEXT: ; def s[4:7] 24265; GFX900-NEXT: ;;#ASMEND 24266; GFX900-NEXT: ;;#ASMSTART 24267; GFX900-NEXT: ; def s[8:11] 24268; GFX900-NEXT: ;;#ASMEND 24269; GFX900-NEXT: s_lshr_b32 s4, s5, 16 24270; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s10 24271; GFX900-NEXT: ;;#ASMSTART 24272; GFX900-NEXT: ; use s8 24273; GFX900-NEXT: ;;#ASMEND 24274; GFX900-NEXT: s_setpc_b64 s[30:31] 24275; 24276; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_12: 24277; GFX90A: ; %bb.0: 24278; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24279; GFX90A-NEXT: ;;#ASMSTART 24280; GFX90A-NEXT: ; def s[4:7] 24281; GFX90A-NEXT: ;;#ASMEND 24282; GFX90A-NEXT: ;;#ASMSTART 24283; GFX90A-NEXT: ; def s[8:11] 24284; GFX90A-NEXT: ;;#ASMEND 24285; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 24286; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s10 24287; GFX90A-NEXT: ;;#ASMSTART 24288; GFX90A-NEXT: ; use s8 24289; GFX90A-NEXT: ;;#ASMEND 24290; GFX90A-NEXT: s_setpc_b64 s[30:31] 24291; 24292; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_12: 24293; GFX940: ; %bb.0: 24294; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24295; GFX940-NEXT: ;;#ASMSTART 24296; GFX940-NEXT: ; def s[0:3] 24297; GFX940-NEXT: ;;#ASMEND 24298; GFX940-NEXT: s_lshr_b32 s0, s1, 16 24299; GFX940-NEXT: ;;#ASMSTART 24300; GFX940-NEXT: ; def s[4:7] 24301; GFX940-NEXT: ;;#ASMEND 24302; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 24303; GFX940-NEXT: ;;#ASMSTART 24304; GFX940-NEXT: ; use s8 24305; GFX940-NEXT: ;;#ASMEND 24306; GFX940-NEXT: s_setpc_b64 s[30:31] 24307 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24308 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24309 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 12> 24310 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24311 ret void 24312} 24313 24314define void @s_shuffle_v2i16_v8i16__4_12() { 24315; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_12: 24316; GFX900: ; %bb.0: 24317; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24318; GFX900-NEXT: ;;#ASMSTART 24319; GFX900-NEXT: ; def s[8:11] 24320; GFX900-NEXT: ;;#ASMEND 24321; GFX900-NEXT: ;;#ASMSTART 24322; GFX900-NEXT: ; def s[4:7] 24323; GFX900-NEXT: ;;#ASMEND 24324; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s10 24325; GFX900-NEXT: ;;#ASMSTART 24326; GFX900-NEXT: ; use s8 24327; GFX900-NEXT: ;;#ASMEND 24328; GFX900-NEXT: s_setpc_b64 s[30:31] 24329; 24330; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_12: 24331; GFX90A: ; %bb.0: 24332; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24333; GFX90A-NEXT: ;;#ASMSTART 24334; GFX90A-NEXT: ; def s[8:11] 24335; GFX90A-NEXT: ;;#ASMEND 24336; GFX90A-NEXT: ;;#ASMSTART 24337; GFX90A-NEXT: ; def s[4:7] 24338; GFX90A-NEXT: ;;#ASMEND 24339; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s10 24340; GFX90A-NEXT: ;;#ASMSTART 24341; GFX90A-NEXT: ; use s8 24342; GFX90A-NEXT: ;;#ASMEND 24343; GFX90A-NEXT: s_setpc_b64 s[30:31] 24344; 24345; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_12: 24346; GFX940: ; %bb.0: 24347; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24348; GFX940-NEXT: ;;#ASMSTART 24349; GFX940-NEXT: ; def s[0:3] 24350; GFX940-NEXT: ;;#ASMEND 24351; GFX940-NEXT: ;;#ASMSTART 24352; GFX940-NEXT: ; def s[4:7] 24353; GFX940-NEXT: ;;#ASMEND 24354; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s6 24355; GFX940-NEXT: ;;#ASMSTART 24356; GFX940-NEXT: ; use s8 24357; GFX940-NEXT: ;;#ASMEND 24358; GFX940-NEXT: s_setpc_b64 s[30:31] 24359 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24360 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24361 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 12> 24362 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24363 ret void 24364} 24365 24366define void @s_shuffle_v2i16_v8i16__5_12() { 24367; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_12: 24368; GFX900: ; %bb.0: 24369; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24370; GFX900-NEXT: ;;#ASMSTART 24371; GFX900-NEXT: ; def s[4:7] 24372; GFX900-NEXT: ;;#ASMEND 24373; GFX900-NEXT: ;;#ASMSTART 24374; GFX900-NEXT: ; def s[8:11] 24375; GFX900-NEXT: ;;#ASMEND 24376; GFX900-NEXT: s_lshr_b32 s4, s6, 16 24377; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s10 24378; GFX900-NEXT: ;;#ASMSTART 24379; GFX900-NEXT: ; use s8 24380; GFX900-NEXT: ;;#ASMEND 24381; GFX900-NEXT: s_setpc_b64 s[30:31] 24382; 24383; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_12: 24384; GFX90A: ; %bb.0: 24385; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24386; GFX90A-NEXT: ;;#ASMSTART 24387; GFX90A-NEXT: ; def s[4:7] 24388; GFX90A-NEXT: ;;#ASMEND 24389; GFX90A-NEXT: ;;#ASMSTART 24390; GFX90A-NEXT: ; def s[8:11] 24391; GFX90A-NEXT: ;;#ASMEND 24392; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 24393; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s10 24394; GFX90A-NEXT: ;;#ASMSTART 24395; GFX90A-NEXT: ; use s8 24396; GFX90A-NEXT: ;;#ASMEND 24397; GFX90A-NEXT: s_setpc_b64 s[30:31] 24398; 24399; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_12: 24400; GFX940: ; %bb.0: 24401; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24402; GFX940-NEXT: ;;#ASMSTART 24403; GFX940-NEXT: ; def s[0:3] 24404; GFX940-NEXT: ;;#ASMEND 24405; GFX940-NEXT: s_lshr_b32 s0, s2, 16 24406; GFX940-NEXT: ;;#ASMSTART 24407; GFX940-NEXT: ; def s[4:7] 24408; GFX940-NEXT: ;;#ASMEND 24409; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 24410; GFX940-NEXT: ;;#ASMSTART 24411; GFX940-NEXT: ; use s8 24412; GFX940-NEXT: ;;#ASMEND 24413; GFX940-NEXT: s_setpc_b64 s[30:31] 24414 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24415 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24416 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 12> 24417 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24418 ret void 24419} 24420 24421define void @s_shuffle_v2i16_v8i16__6_12() { 24422; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_12: 24423; GFX900: ; %bb.0: 24424; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24425; GFX900-NEXT: ;;#ASMSTART 24426; GFX900-NEXT: ; def s[8:11] 24427; GFX900-NEXT: ;;#ASMEND 24428; GFX900-NEXT: ;;#ASMSTART 24429; GFX900-NEXT: ; def s[4:7] 24430; GFX900-NEXT: ;;#ASMEND 24431; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s10 24432; GFX900-NEXT: ;;#ASMSTART 24433; GFX900-NEXT: ; use s8 24434; GFX900-NEXT: ;;#ASMEND 24435; GFX900-NEXT: s_setpc_b64 s[30:31] 24436; 24437; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_12: 24438; GFX90A: ; %bb.0: 24439; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24440; GFX90A-NEXT: ;;#ASMSTART 24441; GFX90A-NEXT: ; def s[8:11] 24442; GFX90A-NEXT: ;;#ASMEND 24443; GFX90A-NEXT: ;;#ASMSTART 24444; GFX90A-NEXT: ; def s[4:7] 24445; GFX90A-NEXT: ;;#ASMEND 24446; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s10 24447; GFX90A-NEXT: ;;#ASMSTART 24448; GFX90A-NEXT: ; use s8 24449; GFX90A-NEXT: ;;#ASMEND 24450; GFX90A-NEXT: s_setpc_b64 s[30:31] 24451; 24452; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_12: 24453; GFX940: ; %bb.0: 24454; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24455; GFX940-NEXT: ;;#ASMSTART 24456; GFX940-NEXT: ; def s[0:3] 24457; GFX940-NEXT: ;;#ASMEND 24458; GFX940-NEXT: ;;#ASMSTART 24459; GFX940-NEXT: ; def s[4:7] 24460; GFX940-NEXT: ;;#ASMEND 24461; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s6 24462; GFX940-NEXT: ;;#ASMSTART 24463; GFX940-NEXT: ; use s8 24464; GFX940-NEXT: ;;#ASMEND 24465; GFX940-NEXT: s_setpc_b64 s[30:31] 24466 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24467 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24468 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 12> 24469 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24470 ret void 24471} 24472 24473define void @s_shuffle_v2i16_v8i16__7_12() { 24474; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_12: 24475; GFX900: ; %bb.0: 24476; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24477; GFX900-NEXT: ;;#ASMSTART 24478; GFX900-NEXT: ; def s[4:7] 24479; GFX900-NEXT: ;;#ASMEND 24480; GFX900-NEXT: ;;#ASMSTART 24481; GFX900-NEXT: ; def s[8:11] 24482; GFX900-NEXT: ;;#ASMEND 24483; GFX900-NEXT: s_lshr_b32 s4, s7, 16 24484; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s10 24485; GFX900-NEXT: ;;#ASMSTART 24486; GFX900-NEXT: ; use s8 24487; GFX900-NEXT: ;;#ASMEND 24488; GFX900-NEXT: s_setpc_b64 s[30:31] 24489; 24490; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_12: 24491; GFX90A: ; %bb.0: 24492; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24493; GFX90A-NEXT: ;;#ASMSTART 24494; GFX90A-NEXT: ; def s[4:7] 24495; GFX90A-NEXT: ;;#ASMEND 24496; GFX90A-NEXT: ;;#ASMSTART 24497; GFX90A-NEXT: ; def s[8:11] 24498; GFX90A-NEXT: ;;#ASMEND 24499; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 24500; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s10 24501; GFX90A-NEXT: ;;#ASMSTART 24502; GFX90A-NEXT: ; use s8 24503; GFX90A-NEXT: ;;#ASMEND 24504; GFX90A-NEXT: s_setpc_b64 s[30:31] 24505; 24506; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_12: 24507; GFX940: ; %bb.0: 24508; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24509; GFX940-NEXT: ;;#ASMSTART 24510; GFX940-NEXT: ; def s[0:3] 24511; GFX940-NEXT: ;;#ASMEND 24512; GFX940-NEXT: s_lshr_b32 s0, s3, 16 24513; GFX940-NEXT: ;;#ASMSTART 24514; GFX940-NEXT: ; def s[4:7] 24515; GFX940-NEXT: ;;#ASMEND 24516; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s6 24517; GFX940-NEXT: ;;#ASMSTART 24518; GFX940-NEXT: ; use s8 24519; GFX940-NEXT: ;;#ASMEND 24520; GFX940-NEXT: s_setpc_b64 s[30:31] 24521 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24522 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24523 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 12> 24524 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24525 ret void 24526} 24527 24528define void @s_shuffle_v2i16_v8i16__8_12() { 24529; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_12: 24530; GFX900: ; %bb.0: 24531; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24532; GFX900-NEXT: ;;#ASMSTART 24533; GFX900-NEXT: ; def s[4:7] 24534; GFX900-NEXT: ;;#ASMEND 24535; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 24536; GFX900-NEXT: ;;#ASMSTART 24537; GFX900-NEXT: ; use s8 24538; GFX900-NEXT: ;;#ASMEND 24539; GFX900-NEXT: s_setpc_b64 s[30:31] 24540; 24541; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_12: 24542; GFX90A: ; %bb.0: 24543; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24544; GFX90A-NEXT: ;;#ASMSTART 24545; GFX90A-NEXT: ; def s[4:7] 24546; GFX90A-NEXT: ;;#ASMEND 24547; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 24548; GFX90A-NEXT: ;;#ASMSTART 24549; GFX90A-NEXT: ; use s8 24550; GFX90A-NEXT: ;;#ASMEND 24551; GFX90A-NEXT: s_setpc_b64 s[30:31] 24552; 24553; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_12: 24554; GFX940: ; %bb.0: 24555; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24556; GFX940-NEXT: ;;#ASMSTART 24557; GFX940-NEXT: ; def s[0:3] 24558; GFX940-NEXT: ;;#ASMEND 24559; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 24560; GFX940-NEXT: ;;#ASMSTART 24561; GFX940-NEXT: ; use s8 24562; GFX940-NEXT: ;;#ASMEND 24563; GFX940-NEXT: s_setpc_b64 s[30:31] 24564 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24565 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24566 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 12> 24567 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24568 ret void 24569} 24570 24571define void @s_shuffle_v2i16_v8i16__9_12() { 24572; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_12: 24573; GFX900: ; %bb.0: 24574; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24575; GFX900-NEXT: ;;#ASMSTART 24576; GFX900-NEXT: ; def s[4:7] 24577; GFX900-NEXT: ;;#ASMEND 24578; GFX900-NEXT: s_lshr_b32 s4, s4, 16 24579; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 24580; GFX900-NEXT: ;;#ASMSTART 24581; GFX900-NEXT: ; use s8 24582; GFX900-NEXT: ;;#ASMEND 24583; GFX900-NEXT: s_setpc_b64 s[30:31] 24584; 24585; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_12: 24586; GFX90A: ; %bb.0: 24587; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24588; GFX90A-NEXT: ;;#ASMSTART 24589; GFX90A-NEXT: ; def s[4:7] 24590; GFX90A-NEXT: ;;#ASMEND 24591; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 24592; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 24593; GFX90A-NEXT: ;;#ASMSTART 24594; GFX90A-NEXT: ; use s8 24595; GFX90A-NEXT: ;;#ASMEND 24596; GFX90A-NEXT: s_setpc_b64 s[30:31] 24597; 24598; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_12: 24599; GFX940: ; %bb.0: 24600; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24601; GFX940-NEXT: ;;#ASMSTART 24602; GFX940-NEXT: ; def s[0:3] 24603; GFX940-NEXT: ;;#ASMEND 24604; GFX940-NEXT: s_lshr_b32 s0, s0, 16 24605; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 24606; GFX940-NEXT: ;;#ASMSTART 24607; GFX940-NEXT: ; use s8 24608; GFX940-NEXT: ;;#ASMEND 24609; GFX940-NEXT: s_setpc_b64 s[30:31] 24610 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24611 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24612 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 12> 24613 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24614 ret void 24615} 24616 24617define void @s_shuffle_v2i16_v8i16__10_12() { 24618; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_12: 24619; GFX900: ; %bb.0: 24620; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24621; GFX900-NEXT: ;;#ASMSTART 24622; GFX900-NEXT: ; def s[4:7] 24623; GFX900-NEXT: ;;#ASMEND 24624; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6 24625; GFX900-NEXT: ;;#ASMSTART 24626; GFX900-NEXT: ; use s8 24627; GFX900-NEXT: ;;#ASMEND 24628; GFX900-NEXT: s_setpc_b64 s[30:31] 24629; 24630; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_12: 24631; GFX90A: ; %bb.0: 24632; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24633; GFX90A-NEXT: ;;#ASMSTART 24634; GFX90A-NEXT: ; def s[4:7] 24635; GFX90A-NEXT: ;;#ASMEND 24636; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6 24637; GFX90A-NEXT: ;;#ASMSTART 24638; GFX90A-NEXT: ; use s8 24639; GFX90A-NEXT: ;;#ASMEND 24640; GFX90A-NEXT: s_setpc_b64 s[30:31] 24641; 24642; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_12: 24643; GFX940: ; %bb.0: 24644; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24645; GFX940-NEXT: ;;#ASMSTART 24646; GFX940-NEXT: ; def s[0:3] 24647; GFX940-NEXT: ;;#ASMEND 24648; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 24649; GFX940-NEXT: ;;#ASMSTART 24650; GFX940-NEXT: ; use s8 24651; GFX940-NEXT: ;;#ASMEND 24652; GFX940-NEXT: s_setpc_b64 s[30:31] 24653 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24654 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24655 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 12> 24656 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24657 ret void 24658} 24659 24660define void @s_shuffle_v2i16_v8i16__11_12() { 24661; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_12: 24662; GFX900: ; %bb.0: 24663; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24664; GFX900-NEXT: ;;#ASMSTART 24665; GFX900-NEXT: ; def s[4:7] 24666; GFX900-NEXT: ;;#ASMEND 24667; GFX900-NEXT: s_lshr_b32 s4, s5, 16 24668; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 24669; GFX900-NEXT: ;;#ASMSTART 24670; GFX900-NEXT: ; use s8 24671; GFX900-NEXT: ;;#ASMEND 24672; GFX900-NEXT: s_setpc_b64 s[30:31] 24673; 24674; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_12: 24675; GFX90A: ; %bb.0: 24676; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24677; GFX90A-NEXT: ;;#ASMSTART 24678; GFX90A-NEXT: ; def s[4:7] 24679; GFX90A-NEXT: ;;#ASMEND 24680; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 24681; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 24682; GFX90A-NEXT: ;;#ASMSTART 24683; GFX90A-NEXT: ; use s8 24684; GFX90A-NEXT: ;;#ASMEND 24685; GFX90A-NEXT: s_setpc_b64 s[30:31] 24686; 24687; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_12: 24688; GFX940: ; %bb.0: 24689; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24690; GFX940-NEXT: ;;#ASMSTART 24691; GFX940-NEXT: ; def s[0:3] 24692; GFX940-NEXT: ;;#ASMEND 24693; GFX940-NEXT: s_lshr_b32 s0, s1, 16 24694; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 24695; GFX940-NEXT: ;;#ASMSTART 24696; GFX940-NEXT: ; use s8 24697; GFX940-NEXT: ;;#ASMEND 24698; GFX940-NEXT: s_setpc_b64 s[30:31] 24699 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24700 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24701 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 12> 24702 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24703 ret void 24704} 24705 24706define void @s_shuffle_v2i16_v8i16__12_12() { 24707; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_12: 24708; GFX900: ; %bb.0: 24709; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24710; GFX900-NEXT: ;;#ASMSTART 24711; GFX900-NEXT: ; def s[4:7] 24712; GFX900-NEXT: ;;#ASMEND 24713; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s6 24714; GFX900-NEXT: ;;#ASMSTART 24715; GFX900-NEXT: ; use s8 24716; GFX900-NEXT: ;;#ASMEND 24717; GFX900-NEXT: s_setpc_b64 s[30:31] 24718; 24719; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_12: 24720; GFX90A: ; %bb.0: 24721; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24722; GFX90A-NEXT: ;;#ASMSTART 24723; GFX90A-NEXT: ; def s[4:7] 24724; GFX90A-NEXT: ;;#ASMEND 24725; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s6 24726; GFX90A-NEXT: ;;#ASMSTART 24727; GFX90A-NEXT: ; use s8 24728; GFX90A-NEXT: ;;#ASMEND 24729; GFX90A-NEXT: s_setpc_b64 s[30:31] 24730; 24731; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_12: 24732; GFX940: ; %bb.0: 24733; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24734; GFX940-NEXT: ;;#ASMSTART 24735; GFX940-NEXT: ; def s[0:3] 24736; GFX940-NEXT: ;;#ASMEND 24737; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s2 24738; GFX940-NEXT: ;;#ASMSTART 24739; GFX940-NEXT: ; use s8 24740; GFX940-NEXT: ;;#ASMEND 24741; GFX940-NEXT: s_setpc_b64 s[30:31] 24742 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24743 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24744 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 12> 24745 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24746 ret void 24747} 24748 24749define void @s_shuffle_v2i16_v8i16__13_12() { 24750; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_12: 24751; GFX900: ; %bb.0: 24752; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24753; GFX900-NEXT: ;;#ASMSTART 24754; GFX900-NEXT: ; def s[4:7] 24755; GFX900-NEXT: ;;#ASMEND 24756; GFX900-NEXT: s_lshr_b32 s4, s6, 16 24757; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 24758; GFX900-NEXT: ;;#ASMSTART 24759; GFX900-NEXT: ; use s8 24760; GFX900-NEXT: ;;#ASMEND 24761; GFX900-NEXT: s_setpc_b64 s[30:31] 24762; 24763; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_12: 24764; GFX90A: ; %bb.0: 24765; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24766; GFX90A-NEXT: ;;#ASMSTART 24767; GFX90A-NEXT: ; def s[4:7] 24768; GFX90A-NEXT: ;;#ASMEND 24769; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 24770; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 24771; GFX90A-NEXT: ;;#ASMSTART 24772; GFX90A-NEXT: ; use s8 24773; GFX90A-NEXT: ;;#ASMEND 24774; GFX90A-NEXT: s_setpc_b64 s[30:31] 24775; 24776; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_12: 24777; GFX940: ; %bb.0: 24778; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24779; GFX940-NEXT: ;;#ASMSTART 24780; GFX940-NEXT: ; def s[0:3] 24781; GFX940-NEXT: ;;#ASMEND 24782; GFX940-NEXT: s_lshr_b32 s0, s2, 16 24783; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 24784; GFX940-NEXT: ;;#ASMSTART 24785; GFX940-NEXT: ; use s8 24786; GFX940-NEXT: ;;#ASMEND 24787; GFX940-NEXT: s_setpc_b64 s[30:31] 24788 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24789 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24790 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 12> 24791 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24792 ret void 24793} 24794 24795define void @s_shuffle_v2i16_v8i16__14_12() { 24796; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_12: 24797; GFX900: ; %bb.0: 24798; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24799; GFX900-NEXT: ;;#ASMSTART 24800; GFX900-NEXT: ; def s[4:7] 24801; GFX900-NEXT: ;;#ASMEND 24802; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s6 24803; GFX900-NEXT: ;;#ASMSTART 24804; GFX900-NEXT: ; use s8 24805; GFX900-NEXT: ;;#ASMEND 24806; GFX900-NEXT: s_setpc_b64 s[30:31] 24807; 24808; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_12: 24809; GFX90A: ; %bb.0: 24810; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24811; GFX90A-NEXT: ;;#ASMSTART 24812; GFX90A-NEXT: ; def s[4:7] 24813; GFX90A-NEXT: ;;#ASMEND 24814; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s6 24815; GFX90A-NEXT: ;;#ASMSTART 24816; GFX90A-NEXT: ; use s8 24817; GFX90A-NEXT: ;;#ASMEND 24818; GFX90A-NEXT: s_setpc_b64 s[30:31] 24819; 24820; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_12: 24821; GFX940: ; %bb.0: 24822; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24823; GFX940-NEXT: ;;#ASMSTART 24824; GFX940-NEXT: ; def s[0:3] 24825; GFX940-NEXT: ;;#ASMEND 24826; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s2 24827; GFX940-NEXT: ;;#ASMSTART 24828; GFX940-NEXT: ; use s8 24829; GFX940-NEXT: ;;#ASMEND 24830; GFX940-NEXT: s_setpc_b64 s[30:31] 24831 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24832 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24833 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 12> 24834 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24835 ret void 24836} 24837 24838define void @s_shuffle_v2i16_v8i16__u_13() { 24839; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_13: 24840; GFX900: ; %bb.0: 24841; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24842; GFX900-NEXT: ;;#ASMSTART 24843; GFX900-NEXT: ; def s[4:7] 24844; GFX900-NEXT: ;;#ASMEND 24845; GFX900-NEXT: s_mov_b32 s8, s6 24846; GFX900-NEXT: ;;#ASMSTART 24847; GFX900-NEXT: ; use s8 24848; GFX900-NEXT: ;;#ASMEND 24849; GFX900-NEXT: s_setpc_b64 s[30:31] 24850; 24851; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_13: 24852; GFX90A: ; %bb.0: 24853; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24854; GFX90A-NEXT: ;;#ASMSTART 24855; GFX90A-NEXT: ; def s[4:7] 24856; GFX90A-NEXT: ;;#ASMEND 24857; GFX90A-NEXT: s_mov_b32 s8, s6 24858; GFX90A-NEXT: ;;#ASMSTART 24859; GFX90A-NEXT: ; use s8 24860; GFX90A-NEXT: ;;#ASMEND 24861; GFX90A-NEXT: s_setpc_b64 s[30:31] 24862; 24863; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_13: 24864; GFX940: ; %bb.0: 24865; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24866; GFX940-NEXT: ;;#ASMSTART 24867; GFX940-NEXT: ; def s[0:3] 24868; GFX940-NEXT: ;;#ASMEND 24869; GFX940-NEXT: s_mov_b32 s8, s2 24870; GFX940-NEXT: ;;#ASMSTART 24871; GFX940-NEXT: ; use s8 24872; GFX940-NEXT: ;;#ASMEND 24873; GFX940-NEXT: s_setpc_b64 s[30:31] 24874 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24875 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24876 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 13> 24877 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24878 ret void 24879} 24880 24881define void @s_shuffle_v2i16_v8i16__0_13() { 24882; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_13: 24883; GFX900: ; %bb.0: 24884; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24885; GFX900-NEXT: ;;#ASMSTART 24886; GFX900-NEXT: ; def s[8:11] 24887; GFX900-NEXT: ;;#ASMEND 24888; GFX900-NEXT: ;;#ASMSTART 24889; GFX900-NEXT: ; def s[4:7] 24890; GFX900-NEXT: ;;#ASMEND 24891; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s10 24892; GFX900-NEXT: ;;#ASMSTART 24893; GFX900-NEXT: ; use s8 24894; GFX900-NEXT: ;;#ASMEND 24895; GFX900-NEXT: s_setpc_b64 s[30:31] 24896; 24897; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_13: 24898; GFX90A: ; %bb.0: 24899; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24900; GFX90A-NEXT: ;;#ASMSTART 24901; GFX90A-NEXT: ; def s[8:11] 24902; GFX90A-NEXT: ;;#ASMEND 24903; GFX90A-NEXT: ;;#ASMSTART 24904; GFX90A-NEXT: ; def s[4:7] 24905; GFX90A-NEXT: ;;#ASMEND 24906; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s10 24907; GFX90A-NEXT: ;;#ASMSTART 24908; GFX90A-NEXT: ; use s8 24909; GFX90A-NEXT: ;;#ASMEND 24910; GFX90A-NEXT: s_setpc_b64 s[30:31] 24911; 24912; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_13: 24913; GFX940: ; %bb.0: 24914; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24915; GFX940-NEXT: ;;#ASMSTART 24916; GFX940-NEXT: ; def s[0:3] 24917; GFX940-NEXT: ;;#ASMEND 24918; GFX940-NEXT: ;;#ASMSTART 24919; GFX940-NEXT: ; def s[4:7] 24920; GFX940-NEXT: ;;#ASMEND 24921; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s6 24922; GFX940-NEXT: ;;#ASMSTART 24923; GFX940-NEXT: ; use s8 24924; GFX940-NEXT: ;;#ASMEND 24925; GFX940-NEXT: s_setpc_b64 s[30:31] 24926 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24927 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24928 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 13> 24929 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24930 ret void 24931} 24932 24933define void @s_shuffle_v2i16_v8i16__1_13() { 24934; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_13: 24935; GFX900: ; %bb.0: 24936; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24937; GFX900-NEXT: ;;#ASMSTART 24938; GFX900-NEXT: ; def s[8:11] 24939; GFX900-NEXT: ;;#ASMEND 24940; GFX900-NEXT: ;;#ASMSTART 24941; GFX900-NEXT: ; def s[4:7] 24942; GFX900-NEXT: ;;#ASMEND 24943; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s10 24944; GFX900-NEXT: ;;#ASMSTART 24945; GFX900-NEXT: ; use s8 24946; GFX900-NEXT: ;;#ASMEND 24947; GFX900-NEXT: s_setpc_b64 s[30:31] 24948; 24949; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_13: 24950; GFX90A: ; %bb.0: 24951; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24952; GFX90A-NEXT: ;;#ASMSTART 24953; GFX90A-NEXT: ; def s[8:11] 24954; GFX90A-NEXT: ;;#ASMEND 24955; GFX90A-NEXT: ;;#ASMSTART 24956; GFX90A-NEXT: ; def s[4:7] 24957; GFX90A-NEXT: ;;#ASMEND 24958; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s10 24959; GFX90A-NEXT: ;;#ASMSTART 24960; GFX90A-NEXT: ; use s8 24961; GFX90A-NEXT: ;;#ASMEND 24962; GFX90A-NEXT: s_setpc_b64 s[30:31] 24963; 24964; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_13: 24965; GFX940: ; %bb.0: 24966; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24967; GFX940-NEXT: ;;#ASMSTART 24968; GFX940-NEXT: ; def s[0:3] 24969; GFX940-NEXT: ;;#ASMEND 24970; GFX940-NEXT: ;;#ASMSTART 24971; GFX940-NEXT: ; def s[4:7] 24972; GFX940-NEXT: ;;#ASMEND 24973; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s6 24974; GFX940-NEXT: ;;#ASMSTART 24975; GFX940-NEXT: ; use s8 24976; GFX940-NEXT: ;;#ASMEND 24977; GFX940-NEXT: s_setpc_b64 s[30:31] 24978 %vec0 = call <8 x i16> asm "; def $0", "=s"() 24979 %vec1 = call <8 x i16> asm "; def $0", "=s"() 24980 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 13> 24981 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 24982 ret void 24983} 24984 24985define void @s_shuffle_v2i16_v8i16__2_13() { 24986; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_13: 24987; GFX900: ; %bb.0: 24988; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24989; GFX900-NEXT: ;;#ASMSTART 24990; GFX900-NEXT: ; def s[8:11] 24991; GFX900-NEXT: ;;#ASMEND 24992; GFX900-NEXT: ;;#ASMSTART 24993; GFX900-NEXT: ; def s[4:7] 24994; GFX900-NEXT: ;;#ASMEND 24995; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s10 24996; GFX900-NEXT: ;;#ASMSTART 24997; GFX900-NEXT: ; use s8 24998; GFX900-NEXT: ;;#ASMEND 24999; GFX900-NEXT: s_setpc_b64 s[30:31] 25000; 25001; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_13: 25002; GFX90A: ; %bb.0: 25003; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25004; GFX90A-NEXT: ;;#ASMSTART 25005; GFX90A-NEXT: ; def s[8:11] 25006; GFX90A-NEXT: ;;#ASMEND 25007; GFX90A-NEXT: ;;#ASMSTART 25008; GFX90A-NEXT: ; def s[4:7] 25009; GFX90A-NEXT: ;;#ASMEND 25010; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s10 25011; GFX90A-NEXT: ;;#ASMSTART 25012; GFX90A-NEXT: ; use s8 25013; GFX90A-NEXT: ;;#ASMEND 25014; GFX90A-NEXT: s_setpc_b64 s[30:31] 25015; 25016; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_13: 25017; GFX940: ; %bb.0: 25018; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25019; GFX940-NEXT: ;;#ASMSTART 25020; GFX940-NEXT: ; def s[0:3] 25021; GFX940-NEXT: ;;#ASMEND 25022; GFX940-NEXT: ;;#ASMSTART 25023; GFX940-NEXT: ; def s[4:7] 25024; GFX940-NEXT: ;;#ASMEND 25025; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s6 25026; GFX940-NEXT: ;;#ASMSTART 25027; GFX940-NEXT: ; use s8 25028; GFX940-NEXT: ;;#ASMEND 25029; GFX940-NEXT: s_setpc_b64 s[30:31] 25030 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25031 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25032 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 13> 25033 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25034 ret void 25035} 25036 25037define void @s_shuffle_v2i16_v8i16__3_13() { 25038; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_13: 25039; GFX900: ; %bb.0: 25040; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25041; GFX900-NEXT: ;;#ASMSTART 25042; GFX900-NEXT: ; def s[8:11] 25043; GFX900-NEXT: ;;#ASMEND 25044; GFX900-NEXT: ;;#ASMSTART 25045; GFX900-NEXT: ; def s[4:7] 25046; GFX900-NEXT: ;;#ASMEND 25047; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s10 25048; GFX900-NEXT: ;;#ASMSTART 25049; GFX900-NEXT: ; use s8 25050; GFX900-NEXT: ;;#ASMEND 25051; GFX900-NEXT: s_setpc_b64 s[30:31] 25052; 25053; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_13: 25054; GFX90A: ; %bb.0: 25055; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25056; GFX90A-NEXT: ;;#ASMSTART 25057; GFX90A-NEXT: ; def s[8:11] 25058; GFX90A-NEXT: ;;#ASMEND 25059; GFX90A-NEXT: ;;#ASMSTART 25060; GFX90A-NEXT: ; def s[4:7] 25061; GFX90A-NEXT: ;;#ASMEND 25062; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s10 25063; GFX90A-NEXT: ;;#ASMSTART 25064; GFX90A-NEXT: ; use s8 25065; GFX90A-NEXT: ;;#ASMEND 25066; GFX90A-NEXT: s_setpc_b64 s[30:31] 25067; 25068; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_13: 25069; GFX940: ; %bb.0: 25070; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25071; GFX940-NEXT: ;;#ASMSTART 25072; GFX940-NEXT: ; def s[0:3] 25073; GFX940-NEXT: ;;#ASMEND 25074; GFX940-NEXT: ;;#ASMSTART 25075; GFX940-NEXT: ; def s[4:7] 25076; GFX940-NEXT: ;;#ASMEND 25077; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s6 25078; GFX940-NEXT: ;;#ASMSTART 25079; GFX940-NEXT: ; use s8 25080; GFX940-NEXT: ;;#ASMEND 25081; GFX940-NEXT: s_setpc_b64 s[30:31] 25082 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25083 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25084 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 13> 25085 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25086 ret void 25087} 25088 25089define void @s_shuffle_v2i16_v8i16__4_13() { 25090; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_13: 25091; GFX900: ; %bb.0: 25092; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25093; GFX900-NEXT: ;;#ASMSTART 25094; GFX900-NEXT: ; def s[8:11] 25095; GFX900-NEXT: ;;#ASMEND 25096; GFX900-NEXT: ;;#ASMSTART 25097; GFX900-NEXT: ; def s[4:7] 25098; GFX900-NEXT: ;;#ASMEND 25099; GFX900-NEXT: s_pack_lh_b32_b16 s8, s6, s10 25100; GFX900-NEXT: ;;#ASMSTART 25101; GFX900-NEXT: ; use s8 25102; GFX900-NEXT: ;;#ASMEND 25103; GFX900-NEXT: s_setpc_b64 s[30:31] 25104; 25105; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_13: 25106; GFX90A: ; %bb.0: 25107; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25108; GFX90A-NEXT: ;;#ASMSTART 25109; GFX90A-NEXT: ; def s[8:11] 25110; GFX90A-NEXT: ;;#ASMEND 25111; GFX90A-NEXT: ;;#ASMSTART 25112; GFX90A-NEXT: ; def s[4:7] 25113; GFX90A-NEXT: ;;#ASMEND 25114; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s6, s10 25115; GFX90A-NEXT: ;;#ASMSTART 25116; GFX90A-NEXT: ; use s8 25117; GFX90A-NEXT: ;;#ASMEND 25118; GFX90A-NEXT: s_setpc_b64 s[30:31] 25119; 25120; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_13: 25121; GFX940: ; %bb.0: 25122; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25123; GFX940-NEXT: ;;#ASMSTART 25124; GFX940-NEXT: ; def s[0:3] 25125; GFX940-NEXT: ;;#ASMEND 25126; GFX940-NEXT: ;;#ASMSTART 25127; GFX940-NEXT: ; def s[4:7] 25128; GFX940-NEXT: ;;#ASMEND 25129; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s6 25130; GFX940-NEXT: ;;#ASMSTART 25131; GFX940-NEXT: ; use s8 25132; GFX940-NEXT: ;;#ASMEND 25133; GFX940-NEXT: s_setpc_b64 s[30:31] 25134 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25135 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25136 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 13> 25137 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25138 ret void 25139} 25140 25141define void @s_shuffle_v2i16_v8i16__5_13() { 25142; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_13: 25143; GFX900: ; %bb.0: 25144; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25145; GFX900-NEXT: ;;#ASMSTART 25146; GFX900-NEXT: ; def s[8:11] 25147; GFX900-NEXT: ;;#ASMEND 25148; GFX900-NEXT: ;;#ASMSTART 25149; GFX900-NEXT: ; def s[4:7] 25150; GFX900-NEXT: ;;#ASMEND 25151; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s10 25152; GFX900-NEXT: ;;#ASMSTART 25153; GFX900-NEXT: ; use s8 25154; GFX900-NEXT: ;;#ASMEND 25155; GFX900-NEXT: s_setpc_b64 s[30:31] 25156; 25157; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_13: 25158; GFX90A: ; %bb.0: 25159; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25160; GFX90A-NEXT: ;;#ASMSTART 25161; GFX90A-NEXT: ; def s[8:11] 25162; GFX90A-NEXT: ;;#ASMEND 25163; GFX90A-NEXT: ;;#ASMSTART 25164; GFX90A-NEXT: ; def s[4:7] 25165; GFX90A-NEXT: ;;#ASMEND 25166; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s10 25167; GFX90A-NEXT: ;;#ASMSTART 25168; GFX90A-NEXT: ; use s8 25169; GFX90A-NEXT: ;;#ASMEND 25170; GFX90A-NEXT: s_setpc_b64 s[30:31] 25171; 25172; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_13: 25173; GFX940: ; %bb.0: 25174; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25175; GFX940-NEXT: ;;#ASMSTART 25176; GFX940-NEXT: ; def s[0:3] 25177; GFX940-NEXT: ;;#ASMEND 25178; GFX940-NEXT: ;;#ASMSTART 25179; GFX940-NEXT: ; def s[4:7] 25180; GFX940-NEXT: ;;#ASMEND 25181; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s6 25182; GFX940-NEXT: ;;#ASMSTART 25183; GFX940-NEXT: ; use s8 25184; GFX940-NEXT: ;;#ASMEND 25185; GFX940-NEXT: s_setpc_b64 s[30:31] 25186 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25187 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25188 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 13> 25189 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25190 ret void 25191} 25192 25193define void @s_shuffle_v2i16_v8i16__6_13() { 25194; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_13: 25195; GFX900: ; %bb.0: 25196; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25197; GFX900-NEXT: ;;#ASMSTART 25198; GFX900-NEXT: ; def s[8:11] 25199; GFX900-NEXT: ;;#ASMEND 25200; GFX900-NEXT: ;;#ASMSTART 25201; GFX900-NEXT: ; def s[4:7] 25202; GFX900-NEXT: ;;#ASMEND 25203; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s10 25204; GFX900-NEXT: ;;#ASMSTART 25205; GFX900-NEXT: ; use s8 25206; GFX900-NEXT: ;;#ASMEND 25207; GFX900-NEXT: s_setpc_b64 s[30:31] 25208; 25209; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_13: 25210; GFX90A: ; %bb.0: 25211; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25212; GFX90A-NEXT: ;;#ASMSTART 25213; GFX90A-NEXT: ; def s[8:11] 25214; GFX90A-NEXT: ;;#ASMEND 25215; GFX90A-NEXT: ;;#ASMSTART 25216; GFX90A-NEXT: ; def s[4:7] 25217; GFX90A-NEXT: ;;#ASMEND 25218; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s10 25219; GFX90A-NEXT: ;;#ASMSTART 25220; GFX90A-NEXT: ; use s8 25221; GFX90A-NEXT: ;;#ASMEND 25222; GFX90A-NEXT: s_setpc_b64 s[30:31] 25223; 25224; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_13: 25225; GFX940: ; %bb.0: 25226; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25227; GFX940-NEXT: ;;#ASMSTART 25228; GFX940-NEXT: ; def s[0:3] 25229; GFX940-NEXT: ;;#ASMEND 25230; GFX940-NEXT: ;;#ASMSTART 25231; GFX940-NEXT: ; def s[4:7] 25232; GFX940-NEXT: ;;#ASMEND 25233; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s6 25234; GFX940-NEXT: ;;#ASMSTART 25235; GFX940-NEXT: ; use s8 25236; GFX940-NEXT: ;;#ASMEND 25237; GFX940-NEXT: s_setpc_b64 s[30:31] 25238 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25239 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25240 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 13> 25241 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25242 ret void 25243} 25244 25245define void @s_shuffle_v2i16_v8i16__7_13() { 25246; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_13: 25247; GFX900: ; %bb.0: 25248; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25249; GFX900-NEXT: ;;#ASMSTART 25250; GFX900-NEXT: ; def s[8:11] 25251; GFX900-NEXT: ;;#ASMEND 25252; GFX900-NEXT: ;;#ASMSTART 25253; GFX900-NEXT: ; def s[4:7] 25254; GFX900-NEXT: ;;#ASMEND 25255; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s10 25256; GFX900-NEXT: ;;#ASMSTART 25257; GFX900-NEXT: ; use s8 25258; GFX900-NEXT: ;;#ASMEND 25259; GFX900-NEXT: s_setpc_b64 s[30:31] 25260; 25261; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_13: 25262; GFX90A: ; %bb.0: 25263; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25264; GFX90A-NEXT: ;;#ASMSTART 25265; GFX90A-NEXT: ; def s[8:11] 25266; GFX90A-NEXT: ;;#ASMEND 25267; GFX90A-NEXT: ;;#ASMSTART 25268; GFX90A-NEXT: ; def s[4:7] 25269; GFX90A-NEXT: ;;#ASMEND 25270; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s10 25271; GFX90A-NEXT: ;;#ASMSTART 25272; GFX90A-NEXT: ; use s8 25273; GFX90A-NEXT: ;;#ASMEND 25274; GFX90A-NEXT: s_setpc_b64 s[30:31] 25275; 25276; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_13: 25277; GFX940: ; %bb.0: 25278; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25279; GFX940-NEXT: ;;#ASMSTART 25280; GFX940-NEXT: ; def s[0:3] 25281; GFX940-NEXT: ;;#ASMEND 25282; GFX940-NEXT: ;;#ASMSTART 25283; GFX940-NEXT: ; def s[4:7] 25284; GFX940-NEXT: ;;#ASMEND 25285; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s6 25286; GFX940-NEXT: ;;#ASMSTART 25287; GFX940-NEXT: ; use s8 25288; GFX940-NEXT: ;;#ASMEND 25289; GFX940-NEXT: s_setpc_b64 s[30:31] 25290 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25291 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25292 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 13> 25293 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25294 ret void 25295} 25296 25297define void @s_shuffle_v2i16_v8i16__8_13() { 25298; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_13: 25299; GFX900: ; %bb.0: 25300; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25301; GFX900-NEXT: ;;#ASMSTART 25302; GFX900-NEXT: ; def s[4:7] 25303; GFX900-NEXT: ;;#ASMEND 25304; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s6 25305; GFX900-NEXT: ;;#ASMSTART 25306; GFX900-NEXT: ; use s8 25307; GFX900-NEXT: ;;#ASMEND 25308; GFX900-NEXT: s_setpc_b64 s[30:31] 25309; 25310; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_13: 25311; GFX90A: ; %bb.0: 25312; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25313; GFX90A-NEXT: ;;#ASMSTART 25314; GFX90A-NEXT: ; def s[4:7] 25315; GFX90A-NEXT: ;;#ASMEND 25316; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s6 25317; GFX90A-NEXT: ;;#ASMSTART 25318; GFX90A-NEXT: ; use s8 25319; GFX90A-NEXT: ;;#ASMEND 25320; GFX90A-NEXT: s_setpc_b64 s[30:31] 25321; 25322; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_13: 25323; GFX940: ; %bb.0: 25324; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25325; GFX940-NEXT: ;;#ASMSTART 25326; GFX940-NEXT: ; def s[0:3] 25327; GFX940-NEXT: ;;#ASMEND 25328; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s2 25329; GFX940-NEXT: ;;#ASMSTART 25330; GFX940-NEXT: ; use s8 25331; GFX940-NEXT: ;;#ASMEND 25332; GFX940-NEXT: s_setpc_b64 s[30:31] 25333 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25334 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25335 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 13> 25336 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25337 ret void 25338} 25339 25340define void @s_shuffle_v2i16_v8i16__9_13() { 25341; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_13: 25342; GFX900: ; %bb.0: 25343; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25344; GFX900-NEXT: ;;#ASMSTART 25345; GFX900-NEXT: ; def s[4:7] 25346; GFX900-NEXT: ;;#ASMEND 25347; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s6 25348; GFX900-NEXT: ;;#ASMSTART 25349; GFX900-NEXT: ; use s8 25350; GFX900-NEXT: ;;#ASMEND 25351; GFX900-NEXT: s_setpc_b64 s[30:31] 25352; 25353; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_13: 25354; GFX90A: ; %bb.0: 25355; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25356; GFX90A-NEXT: ;;#ASMSTART 25357; GFX90A-NEXT: ; def s[4:7] 25358; GFX90A-NEXT: ;;#ASMEND 25359; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s6 25360; GFX90A-NEXT: ;;#ASMSTART 25361; GFX90A-NEXT: ; use s8 25362; GFX90A-NEXT: ;;#ASMEND 25363; GFX90A-NEXT: s_setpc_b64 s[30:31] 25364; 25365; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_13: 25366; GFX940: ; %bb.0: 25367; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25368; GFX940-NEXT: ;;#ASMSTART 25369; GFX940-NEXT: ; def s[0:3] 25370; GFX940-NEXT: ;;#ASMEND 25371; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s2 25372; GFX940-NEXT: ;;#ASMSTART 25373; GFX940-NEXT: ; use s8 25374; GFX940-NEXT: ;;#ASMEND 25375; GFX940-NEXT: s_setpc_b64 s[30:31] 25376 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25377 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25378 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 13> 25379 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25380 ret void 25381} 25382 25383define void @s_shuffle_v2i16_v8i16__10_13() { 25384; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_13: 25385; GFX900: ; %bb.0: 25386; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25387; GFX900-NEXT: ;;#ASMSTART 25388; GFX900-NEXT: ; def s[4:7] 25389; GFX900-NEXT: ;;#ASMEND 25390; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s6 25391; GFX900-NEXT: ;;#ASMSTART 25392; GFX900-NEXT: ; use s8 25393; GFX900-NEXT: ;;#ASMEND 25394; GFX900-NEXT: s_setpc_b64 s[30:31] 25395; 25396; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_13: 25397; GFX90A: ; %bb.0: 25398; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25399; GFX90A-NEXT: ;;#ASMSTART 25400; GFX90A-NEXT: ; def s[4:7] 25401; GFX90A-NEXT: ;;#ASMEND 25402; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s6 25403; GFX90A-NEXT: ;;#ASMSTART 25404; GFX90A-NEXT: ; use s8 25405; GFX90A-NEXT: ;;#ASMEND 25406; GFX90A-NEXT: s_setpc_b64 s[30:31] 25407; 25408; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_13: 25409; GFX940: ; %bb.0: 25410; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25411; GFX940-NEXT: ;;#ASMSTART 25412; GFX940-NEXT: ; def s[0:3] 25413; GFX940-NEXT: ;;#ASMEND 25414; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s2 25415; GFX940-NEXT: ;;#ASMSTART 25416; GFX940-NEXT: ; use s8 25417; GFX940-NEXT: ;;#ASMEND 25418; GFX940-NEXT: s_setpc_b64 s[30:31] 25419 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25420 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25421 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 13> 25422 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25423 ret void 25424} 25425 25426define void @s_shuffle_v2i16_v8i16__11_13() { 25427; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_13: 25428; GFX900: ; %bb.0: 25429; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25430; GFX900-NEXT: ;;#ASMSTART 25431; GFX900-NEXT: ; def s[4:7] 25432; GFX900-NEXT: ;;#ASMEND 25433; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s6 25434; GFX900-NEXT: ;;#ASMSTART 25435; GFX900-NEXT: ; use s8 25436; GFX900-NEXT: ;;#ASMEND 25437; GFX900-NEXT: s_setpc_b64 s[30:31] 25438; 25439; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_13: 25440; GFX90A: ; %bb.0: 25441; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25442; GFX90A-NEXT: ;;#ASMSTART 25443; GFX90A-NEXT: ; def s[4:7] 25444; GFX90A-NEXT: ;;#ASMEND 25445; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s6 25446; GFX90A-NEXT: ;;#ASMSTART 25447; GFX90A-NEXT: ; use s8 25448; GFX90A-NEXT: ;;#ASMEND 25449; GFX90A-NEXT: s_setpc_b64 s[30:31] 25450; 25451; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_13: 25452; GFX940: ; %bb.0: 25453; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25454; GFX940-NEXT: ;;#ASMSTART 25455; GFX940-NEXT: ; def s[0:3] 25456; GFX940-NEXT: ;;#ASMEND 25457; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s2 25458; GFX940-NEXT: ;;#ASMSTART 25459; GFX940-NEXT: ; use s8 25460; GFX940-NEXT: ;;#ASMEND 25461; GFX940-NEXT: s_setpc_b64 s[30:31] 25462 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25463 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25464 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 13> 25465 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25466 ret void 25467} 25468 25469define void @s_shuffle_v2i16_v8i16__12_13() { 25470; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_13: 25471; GFX900: ; %bb.0: 25472; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25473; GFX900-NEXT: ;;#ASMSTART 25474; GFX900-NEXT: ; def s[4:7] 25475; GFX900-NEXT: ;;#ASMEND 25476; GFX900-NEXT: s_mov_b32 s8, s6 25477; GFX900-NEXT: ;;#ASMSTART 25478; GFX900-NEXT: ; use s8 25479; GFX900-NEXT: ;;#ASMEND 25480; GFX900-NEXT: s_setpc_b64 s[30:31] 25481; 25482; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_13: 25483; GFX90A: ; %bb.0: 25484; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25485; GFX90A-NEXT: ;;#ASMSTART 25486; GFX90A-NEXT: ; def s[4:7] 25487; GFX90A-NEXT: ;;#ASMEND 25488; GFX90A-NEXT: s_mov_b32 s8, s6 25489; GFX90A-NEXT: ;;#ASMSTART 25490; GFX90A-NEXT: ; use s8 25491; GFX90A-NEXT: ;;#ASMEND 25492; GFX90A-NEXT: s_setpc_b64 s[30:31] 25493; 25494; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_13: 25495; GFX940: ; %bb.0: 25496; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25497; GFX940-NEXT: ;;#ASMSTART 25498; GFX940-NEXT: ; def s[0:3] 25499; GFX940-NEXT: ;;#ASMEND 25500; GFX940-NEXT: s_mov_b32 s8, s2 25501; GFX940-NEXT: ;;#ASMSTART 25502; GFX940-NEXT: ; use s8 25503; GFX940-NEXT: ;;#ASMEND 25504; GFX940-NEXT: s_setpc_b64 s[30:31] 25505 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25506 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25507 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 13> 25508 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25509 ret void 25510} 25511 25512define void @s_shuffle_v2i16_v8i16__13_13() { 25513; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_13: 25514; GFX900: ; %bb.0: 25515; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25516; GFX900-NEXT: ;;#ASMSTART 25517; GFX900-NEXT: ; def s[4:7] 25518; GFX900-NEXT: ;;#ASMEND 25519; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s6 25520; GFX900-NEXT: ;;#ASMSTART 25521; GFX900-NEXT: ; use s8 25522; GFX900-NEXT: ;;#ASMEND 25523; GFX900-NEXT: s_setpc_b64 s[30:31] 25524; 25525; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_13: 25526; GFX90A: ; %bb.0: 25527; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25528; GFX90A-NEXT: ;;#ASMSTART 25529; GFX90A-NEXT: ; def s[4:7] 25530; GFX90A-NEXT: ;;#ASMEND 25531; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s6 25532; GFX90A-NEXT: ;;#ASMSTART 25533; GFX90A-NEXT: ; use s8 25534; GFX90A-NEXT: ;;#ASMEND 25535; GFX90A-NEXT: s_setpc_b64 s[30:31] 25536; 25537; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_13: 25538; GFX940: ; %bb.0: 25539; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25540; GFX940-NEXT: ;;#ASMSTART 25541; GFX940-NEXT: ; def s[0:3] 25542; GFX940-NEXT: ;;#ASMEND 25543; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s2 25544; GFX940-NEXT: ;;#ASMSTART 25545; GFX940-NEXT: ; use s8 25546; GFX940-NEXT: ;;#ASMEND 25547; GFX940-NEXT: s_setpc_b64 s[30:31] 25548 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25549 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25550 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 13> 25551 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25552 ret void 25553} 25554 25555define void @s_shuffle_v2i16_v8i16__14_13() { 25556; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_13: 25557; GFX900: ; %bb.0: 25558; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25559; GFX900-NEXT: ;;#ASMSTART 25560; GFX900-NEXT: ; def s[4:7] 25561; GFX900-NEXT: ;;#ASMEND 25562; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s6 25563; GFX900-NEXT: ;;#ASMSTART 25564; GFX900-NEXT: ; use s8 25565; GFX900-NEXT: ;;#ASMEND 25566; GFX900-NEXT: s_setpc_b64 s[30:31] 25567; 25568; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_13: 25569; GFX90A: ; %bb.0: 25570; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25571; GFX90A-NEXT: ;;#ASMSTART 25572; GFX90A-NEXT: ; def s[4:7] 25573; GFX90A-NEXT: ;;#ASMEND 25574; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s6 25575; GFX90A-NEXT: ;;#ASMSTART 25576; GFX90A-NEXT: ; use s8 25577; GFX90A-NEXT: ;;#ASMEND 25578; GFX90A-NEXT: s_setpc_b64 s[30:31] 25579; 25580; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_13: 25581; GFX940: ; %bb.0: 25582; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25583; GFX940-NEXT: ;;#ASMSTART 25584; GFX940-NEXT: ; def s[0:3] 25585; GFX940-NEXT: ;;#ASMEND 25586; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s2 25587; GFX940-NEXT: ;;#ASMSTART 25588; GFX940-NEXT: ; use s8 25589; GFX940-NEXT: ;;#ASMEND 25590; GFX940-NEXT: s_setpc_b64 s[30:31] 25591 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25592 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25593 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 13> 25594 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25595 ret void 25596} 25597 25598define void @s_shuffle_v2i16_v8i16__u_14() { 25599; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_14: 25600; GFX900: ; %bb.0: 25601; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25602; GFX900-NEXT: ;;#ASMSTART 25603; GFX900-NEXT: ; def s[4:7] 25604; GFX900-NEXT: ;;#ASMEND 25605; GFX900-NEXT: s_lshl_b32 s8, s7, 16 25606; GFX900-NEXT: ;;#ASMSTART 25607; GFX900-NEXT: ; use s8 25608; GFX900-NEXT: ;;#ASMEND 25609; GFX900-NEXT: s_setpc_b64 s[30:31] 25610; 25611; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_14: 25612; GFX90A: ; %bb.0: 25613; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25614; GFX90A-NEXT: ;;#ASMSTART 25615; GFX90A-NEXT: ; def s[4:7] 25616; GFX90A-NEXT: ;;#ASMEND 25617; GFX90A-NEXT: s_lshl_b32 s8, s7, 16 25618; GFX90A-NEXT: ;;#ASMSTART 25619; GFX90A-NEXT: ; use s8 25620; GFX90A-NEXT: ;;#ASMEND 25621; GFX90A-NEXT: s_setpc_b64 s[30:31] 25622; 25623; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_14: 25624; GFX940: ; %bb.0: 25625; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25626; GFX940-NEXT: ;;#ASMSTART 25627; GFX940-NEXT: ; def s[0:3] 25628; GFX940-NEXT: ;;#ASMEND 25629; GFX940-NEXT: s_lshl_b32 s8, s3, 16 25630; GFX940-NEXT: ;;#ASMSTART 25631; GFX940-NEXT: ; use s8 25632; GFX940-NEXT: ;;#ASMEND 25633; GFX940-NEXT: s_setpc_b64 s[30:31] 25634 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25635 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25636 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 14> 25637 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25638 ret void 25639} 25640 25641define void @s_shuffle_v2i16_v8i16__0_14() { 25642; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_14: 25643; GFX900: ; %bb.0: 25644; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25645; GFX900-NEXT: ;;#ASMSTART 25646; GFX900-NEXT: ; def s[8:11] 25647; GFX900-NEXT: ;;#ASMEND 25648; GFX900-NEXT: ;;#ASMSTART 25649; GFX900-NEXT: ; def s[4:7] 25650; GFX900-NEXT: ;;#ASMEND 25651; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s11 25652; GFX900-NEXT: ;;#ASMSTART 25653; GFX900-NEXT: ; use s8 25654; GFX900-NEXT: ;;#ASMEND 25655; GFX900-NEXT: s_setpc_b64 s[30:31] 25656; 25657; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_14: 25658; GFX90A: ; %bb.0: 25659; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25660; GFX90A-NEXT: ;;#ASMSTART 25661; GFX90A-NEXT: ; def s[8:11] 25662; GFX90A-NEXT: ;;#ASMEND 25663; GFX90A-NEXT: ;;#ASMSTART 25664; GFX90A-NEXT: ; def s[4:7] 25665; GFX90A-NEXT: ;;#ASMEND 25666; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s11 25667; GFX90A-NEXT: ;;#ASMSTART 25668; GFX90A-NEXT: ; use s8 25669; GFX90A-NEXT: ;;#ASMEND 25670; GFX90A-NEXT: s_setpc_b64 s[30:31] 25671; 25672; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_14: 25673; GFX940: ; %bb.0: 25674; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25675; GFX940-NEXT: ;;#ASMSTART 25676; GFX940-NEXT: ; def s[0:3] 25677; GFX940-NEXT: ;;#ASMEND 25678; GFX940-NEXT: ;;#ASMSTART 25679; GFX940-NEXT: ; def s[4:7] 25680; GFX940-NEXT: ;;#ASMEND 25681; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 25682; GFX940-NEXT: ;;#ASMSTART 25683; GFX940-NEXT: ; use s8 25684; GFX940-NEXT: ;;#ASMEND 25685; GFX940-NEXT: s_setpc_b64 s[30:31] 25686 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25687 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25688 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 14> 25689 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25690 ret void 25691} 25692 25693define void @s_shuffle_v2i16_v8i16__1_14() { 25694; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_14: 25695; GFX900: ; %bb.0: 25696; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25697; GFX900-NEXT: ;;#ASMSTART 25698; GFX900-NEXT: ; def s[4:7] 25699; GFX900-NEXT: ;;#ASMEND 25700; GFX900-NEXT: ;;#ASMSTART 25701; GFX900-NEXT: ; def s[8:11] 25702; GFX900-NEXT: ;;#ASMEND 25703; GFX900-NEXT: s_lshr_b32 s4, s4, 16 25704; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s11 25705; GFX900-NEXT: ;;#ASMSTART 25706; GFX900-NEXT: ; use s8 25707; GFX900-NEXT: ;;#ASMEND 25708; GFX900-NEXT: s_setpc_b64 s[30:31] 25709; 25710; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_14: 25711; GFX90A: ; %bb.0: 25712; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25713; GFX90A-NEXT: ;;#ASMSTART 25714; GFX90A-NEXT: ; def s[4:7] 25715; GFX90A-NEXT: ;;#ASMEND 25716; GFX90A-NEXT: ;;#ASMSTART 25717; GFX90A-NEXT: ; def s[8:11] 25718; GFX90A-NEXT: ;;#ASMEND 25719; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 25720; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s11 25721; GFX90A-NEXT: ;;#ASMSTART 25722; GFX90A-NEXT: ; use s8 25723; GFX90A-NEXT: ;;#ASMEND 25724; GFX90A-NEXT: s_setpc_b64 s[30:31] 25725; 25726; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_14: 25727; GFX940: ; %bb.0: 25728; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25729; GFX940-NEXT: ;;#ASMSTART 25730; GFX940-NEXT: ; def s[0:3] 25731; GFX940-NEXT: ;;#ASMEND 25732; GFX940-NEXT: s_lshr_b32 s0, s0, 16 25733; GFX940-NEXT: ;;#ASMSTART 25734; GFX940-NEXT: ; def s[4:7] 25735; GFX940-NEXT: ;;#ASMEND 25736; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 25737; GFX940-NEXT: ;;#ASMSTART 25738; GFX940-NEXT: ; use s8 25739; GFX940-NEXT: ;;#ASMEND 25740; GFX940-NEXT: s_setpc_b64 s[30:31] 25741 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25742 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25743 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 14> 25744 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25745 ret void 25746} 25747 25748define void @s_shuffle_v2i16_v8i16__2_14() { 25749; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_14: 25750; GFX900: ; %bb.0: 25751; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25752; GFX900-NEXT: ;;#ASMSTART 25753; GFX900-NEXT: ; def s[8:11] 25754; GFX900-NEXT: ;;#ASMEND 25755; GFX900-NEXT: ;;#ASMSTART 25756; GFX900-NEXT: ; def s[4:7] 25757; GFX900-NEXT: ;;#ASMEND 25758; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s11 25759; GFX900-NEXT: ;;#ASMSTART 25760; GFX900-NEXT: ; use s8 25761; GFX900-NEXT: ;;#ASMEND 25762; GFX900-NEXT: s_setpc_b64 s[30:31] 25763; 25764; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_14: 25765; GFX90A: ; %bb.0: 25766; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25767; GFX90A-NEXT: ;;#ASMSTART 25768; GFX90A-NEXT: ; def s[8:11] 25769; GFX90A-NEXT: ;;#ASMEND 25770; GFX90A-NEXT: ;;#ASMSTART 25771; GFX90A-NEXT: ; def s[4:7] 25772; GFX90A-NEXT: ;;#ASMEND 25773; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s11 25774; GFX90A-NEXT: ;;#ASMSTART 25775; GFX90A-NEXT: ; use s8 25776; GFX90A-NEXT: ;;#ASMEND 25777; GFX90A-NEXT: s_setpc_b64 s[30:31] 25778; 25779; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_14: 25780; GFX940: ; %bb.0: 25781; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25782; GFX940-NEXT: ;;#ASMSTART 25783; GFX940-NEXT: ; def s[0:3] 25784; GFX940-NEXT: ;;#ASMEND 25785; GFX940-NEXT: ;;#ASMSTART 25786; GFX940-NEXT: ; def s[4:7] 25787; GFX940-NEXT: ;;#ASMEND 25788; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s7 25789; GFX940-NEXT: ;;#ASMSTART 25790; GFX940-NEXT: ; use s8 25791; GFX940-NEXT: ;;#ASMEND 25792; GFX940-NEXT: s_setpc_b64 s[30:31] 25793 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25794 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25795 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 14> 25796 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25797 ret void 25798} 25799 25800define void @s_shuffle_v2i16_v8i16__3_14() { 25801; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_14: 25802; GFX900: ; %bb.0: 25803; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25804; GFX900-NEXT: ;;#ASMSTART 25805; GFX900-NEXT: ; def s[4:7] 25806; GFX900-NEXT: ;;#ASMEND 25807; GFX900-NEXT: ;;#ASMSTART 25808; GFX900-NEXT: ; def s[8:11] 25809; GFX900-NEXT: ;;#ASMEND 25810; GFX900-NEXT: s_lshr_b32 s4, s5, 16 25811; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s11 25812; GFX900-NEXT: ;;#ASMSTART 25813; GFX900-NEXT: ; use s8 25814; GFX900-NEXT: ;;#ASMEND 25815; GFX900-NEXT: s_setpc_b64 s[30:31] 25816; 25817; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_14: 25818; GFX90A: ; %bb.0: 25819; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25820; GFX90A-NEXT: ;;#ASMSTART 25821; GFX90A-NEXT: ; def s[4:7] 25822; GFX90A-NEXT: ;;#ASMEND 25823; GFX90A-NEXT: ;;#ASMSTART 25824; GFX90A-NEXT: ; def s[8:11] 25825; GFX90A-NEXT: ;;#ASMEND 25826; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 25827; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s11 25828; GFX90A-NEXT: ;;#ASMSTART 25829; GFX90A-NEXT: ; use s8 25830; GFX90A-NEXT: ;;#ASMEND 25831; GFX90A-NEXT: s_setpc_b64 s[30:31] 25832; 25833; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_14: 25834; GFX940: ; %bb.0: 25835; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25836; GFX940-NEXT: ;;#ASMSTART 25837; GFX940-NEXT: ; def s[0:3] 25838; GFX940-NEXT: ;;#ASMEND 25839; GFX940-NEXT: s_lshr_b32 s0, s1, 16 25840; GFX940-NEXT: ;;#ASMSTART 25841; GFX940-NEXT: ; def s[4:7] 25842; GFX940-NEXT: ;;#ASMEND 25843; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 25844; GFX940-NEXT: ;;#ASMSTART 25845; GFX940-NEXT: ; use s8 25846; GFX940-NEXT: ;;#ASMEND 25847; GFX940-NEXT: s_setpc_b64 s[30:31] 25848 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25849 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25850 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 14> 25851 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25852 ret void 25853} 25854 25855define void @s_shuffle_v2i16_v8i16__4_14() { 25856; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_14: 25857; GFX900: ; %bb.0: 25858; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25859; GFX900-NEXT: ;;#ASMSTART 25860; GFX900-NEXT: ; def s[8:11] 25861; GFX900-NEXT: ;;#ASMEND 25862; GFX900-NEXT: ;;#ASMSTART 25863; GFX900-NEXT: ; def s[4:7] 25864; GFX900-NEXT: ;;#ASMEND 25865; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s11 25866; GFX900-NEXT: ;;#ASMSTART 25867; GFX900-NEXT: ; use s8 25868; GFX900-NEXT: ;;#ASMEND 25869; GFX900-NEXT: s_setpc_b64 s[30:31] 25870; 25871; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_14: 25872; GFX90A: ; %bb.0: 25873; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25874; GFX90A-NEXT: ;;#ASMSTART 25875; GFX90A-NEXT: ; def s[8:11] 25876; GFX90A-NEXT: ;;#ASMEND 25877; GFX90A-NEXT: ;;#ASMSTART 25878; GFX90A-NEXT: ; def s[4:7] 25879; GFX90A-NEXT: ;;#ASMEND 25880; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s11 25881; GFX90A-NEXT: ;;#ASMSTART 25882; GFX90A-NEXT: ; use s8 25883; GFX90A-NEXT: ;;#ASMEND 25884; GFX90A-NEXT: s_setpc_b64 s[30:31] 25885; 25886; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_14: 25887; GFX940: ; %bb.0: 25888; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25889; GFX940-NEXT: ;;#ASMSTART 25890; GFX940-NEXT: ; def s[0:3] 25891; GFX940-NEXT: ;;#ASMEND 25892; GFX940-NEXT: ;;#ASMSTART 25893; GFX940-NEXT: ; def s[4:7] 25894; GFX940-NEXT: ;;#ASMEND 25895; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s7 25896; GFX940-NEXT: ;;#ASMSTART 25897; GFX940-NEXT: ; use s8 25898; GFX940-NEXT: ;;#ASMEND 25899; GFX940-NEXT: s_setpc_b64 s[30:31] 25900 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25901 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25902 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 14> 25903 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25904 ret void 25905} 25906 25907define void @s_shuffle_v2i16_v8i16__5_14() { 25908; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_14: 25909; GFX900: ; %bb.0: 25910; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25911; GFX900-NEXT: ;;#ASMSTART 25912; GFX900-NEXT: ; def s[4:7] 25913; GFX900-NEXT: ;;#ASMEND 25914; GFX900-NEXT: ;;#ASMSTART 25915; GFX900-NEXT: ; def s[8:11] 25916; GFX900-NEXT: ;;#ASMEND 25917; GFX900-NEXT: s_lshr_b32 s4, s6, 16 25918; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s11 25919; GFX900-NEXT: ;;#ASMSTART 25920; GFX900-NEXT: ; use s8 25921; GFX900-NEXT: ;;#ASMEND 25922; GFX900-NEXT: s_setpc_b64 s[30:31] 25923; 25924; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_14: 25925; GFX90A: ; %bb.0: 25926; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25927; GFX90A-NEXT: ;;#ASMSTART 25928; GFX90A-NEXT: ; def s[4:7] 25929; GFX90A-NEXT: ;;#ASMEND 25930; GFX90A-NEXT: ;;#ASMSTART 25931; GFX90A-NEXT: ; def s[8:11] 25932; GFX90A-NEXT: ;;#ASMEND 25933; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 25934; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s11 25935; GFX90A-NEXT: ;;#ASMSTART 25936; GFX90A-NEXT: ; use s8 25937; GFX90A-NEXT: ;;#ASMEND 25938; GFX90A-NEXT: s_setpc_b64 s[30:31] 25939; 25940; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_14: 25941; GFX940: ; %bb.0: 25942; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25943; GFX940-NEXT: ;;#ASMSTART 25944; GFX940-NEXT: ; def s[0:3] 25945; GFX940-NEXT: ;;#ASMEND 25946; GFX940-NEXT: s_lshr_b32 s0, s2, 16 25947; GFX940-NEXT: ;;#ASMSTART 25948; GFX940-NEXT: ; def s[4:7] 25949; GFX940-NEXT: ;;#ASMEND 25950; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 25951; GFX940-NEXT: ;;#ASMSTART 25952; GFX940-NEXT: ; use s8 25953; GFX940-NEXT: ;;#ASMEND 25954; GFX940-NEXT: s_setpc_b64 s[30:31] 25955 %vec0 = call <8 x i16> asm "; def $0", "=s"() 25956 %vec1 = call <8 x i16> asm "; def $0", "=s"() 25957 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 14> 25958 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 25959 ret void 25960} 25961 25962define void @s_shuffle_v2i16_v8i16__6_14() { 25963; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_14: 25964; GFX900: ; %bb.0: 25965; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25966; GFX900-NEXT: ;;#ASMSTART 25967; GFX900-NEXT: ; def s[8:11] 25968; GFX900-NEXT: ;;#ASMEND 25969; GFX900-NEXT: ;;#ASMSTART 25970; GFX900-NEXT: ; def s[4:7] 25971; GFX900-NEXT: ;;#ASMEND 25972; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s11 25973; GFX900-NEXT: ;;#ASMSTART 25974; GFX900-NEXT: ; use s8 25975; GFX900-NEXT: ;;#ASMEND 25976; GFX900-NEXT: s_setpc_b64 s[30:31] 25977; 25978; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_14: 25979; GFX90A: ; %bb.0: 25980; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25981; GFX90A-NEXT: ;;#ASMSTART 25982; GFX90A-NEXT: ; def s[8:11] 25983; GFX90A-NEXT: ;;#ASMEND 25984; GFX90A-NEXT: ;;#ASMSTART 25985; GFX90A-NEXT: ; def s[4:7] 25986; GFX90A-NEXT: ;;#ASMEND 25987; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s11 25988; GFX90A-NEXT: ;;#ASMSTART 25989; GFX90A-NEXT: ; use s8 25990; GFX90A-NEXT: ;;#ASMEND 25991; GFX90A-NEXT: s_setpc_b64 s[30:31] 25992; 25993; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_14: 25994; GFX940: ; %bb.0: 25995; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25996; GFX940-NEXT: ;;#ASMSTART 25997; GFX940-NEXT: ; def s[0:3] 25998; GFX940-NEXT: ;;#ASMEND 25999; GFX940-NEXT: ;;#ASMSTART 26000; GFX940-NEXT: ; def s[4:7] 26001; GFX940-NEXT: ;;#ASMEND 26002; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s7 26003; GFX940-NEXT: ;;#ASMSTART 26004; GFX940-NEXT: ; use s8 26005; GFX940-NEXT: ;;#ASMEND 26006; GFX940-NEXT: s_setpc_b64 s[30:31] 26007 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26008 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26009 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 14> 26010 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26011 ret void 26012} 26013 26014define void @s_shuffle_v2i16_v8i16__7_14() { 26015; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_14: 26016; GFX900: ; %bb.0: 26017; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26018; GFX900-NEXT: ;;#ASMSTART 26019; GFX900-NEXT: ; def s[4:7] 26020; GFX900-NEXT: ;;#ASMEND 26021; GFX900-NEXT: ;;#ASMSTART 26022; GFX900-NEXT: ; def s[8:11] 26023; GFX900-NEXT: ;;#ASMEND 26024; GFX900-NEXT: s_lshr_b32 s4, s7, 16 26025; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s11 26026; GFX900-NEXT: ;;#ASMSTART 26027; GFX900-NEXT: ; use s8 26028; GFX900-NEXT: ;;#ASMEND 26029; GFX900-NEXT: s_setpc_b64 s[30:31] 26030; 26031; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_14: 26032; GFX90A: ; %bb.0: 26033; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26034; GFX90A-NEXT: ;;#ASMSTART 26035; GFX90A-NEXT: ; def s[4:7] 26036; GFX90A-NEXT: ;;#ASMEND 26037; GFX90A-NEXT: ;;#ASMSTART 26038; GFX90A-NEXT: ; def s[8:11] 26039; GFX90A-NEXT: ;;#ASMEND 26040; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 26041; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s11 26042; GFX90A-NEXT: ;;#ASMSTART 26043; GFX90A-NEXT: ; use s8 26044; GFX90A-NEXT: ;;#ASMEND 26045; GFX90A-NEXT: s_setpc_b64 s[30:31] 26046; 26047; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_14: 26048; GFX940: ; %bb.0: 26049; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26050; GFX940-NEXT: ;;#ASMSTART 26051; GFX940-NEXT: ; def s[0:3] 26052; GFX940-NEXT: ;;#ASMEND 26053; GFX940-NEXT: s_lshr_b32 s0, s3, 16 26054; GFX940-NEXT: ;;#ASMSTART 26055; GFX940-NEXT: ; def s[4:7] 26056; GFX940-NEXT: ;;#ASMEND 26057; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s7 26058; GFX940-NEXT: ;;#ASMSTART 26059; GFX940-NEXT: ; use s8 26060; GFX940-NEXT: ;;#ASMEND 26061; GFX940-NEXT: s_setpc_b64 s[30:31] 26062 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26063 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26064 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 14> 26065 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26066 ret void 26067} 26068 26069define void @s_shuffle_v2i16_v8i16__8_14() { 26070; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_14: 26071; GFX900: ; %bb.0: 26072; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26073; GFX900-NEXT: ;;#ASMSTART 26074; GFX900-NEXT: ; def s[4:7] 26075; GFX900-NEXT: ;;#ASMEND 26076; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 26077; GFX900-NEXT: ;;#ASMSTART 26078; GFX900-NEXT: ; use s8 26079; GFX900-NEXT: ;;#ASMEND 26080; GFX900-NEXT: s_setpc_b64 s[30:31] 26081; 26082; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_14: 26083; GFX90A: ; %bb.0: 26084; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26085; GFX90A-NEXT: ;;#ASMSTART 26086; GFX90A-NEXT: ; def s[4:7] 26087; GFX90A-NEXT: ;;#ASMEND 26088; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 26089; GFX90A-NEXT: ;;#ASMSTART 26090; GFX90A-NEXT: ; use s8 26091; GFX90A-NEXT: ;;#ASMEND 26092; GFX90A-NEXT: s_setpc_b64 s[30:31] 26093; 26094; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_14: 26095; GFX940: ; %bb.0: 26096; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26097; GFX940-NEXT: ;;#ASMSTART 26098; GFX940-NEXT: ; def s[0:3] 26099; GFX940-NEXT: ;;#ASMEND 26100; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 26101; GFX940-NEXT: ;;#ASMSTART 26102; GFX940-NEXT: ; use s8 26103; GFX940-NEXT: ;;#ASMEND 26104; GFX940-NEXT: s_setpc_b64 s[30:31] 26105 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26106 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26107 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 14> 26108 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26109 ret void 26110} 26111 26112define void @s_shuffle_v2i16_v8i16__9_14() { 26113; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_14: 26114; GFX900: ; %bb.0: 26115; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26116; GFX900-NEXT: ;;#ASMSTART 26117; GFX900-NEXT: ; def s[4:7] 26118; GFX900-NEXT: ;;#ASMEND 26119; GFX900-NEXT: s_lshr_b32 s4, s4, 16 26120; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 26121; GFX900-NEXT: ;;#ASMSTART 26122; GFX900-NEXT: ; use s8 26123; GFX900-NEXT: ;;#ASMEND 26124; GFX900-NEXT: s_setpc_b64 s[30:31] 26125; 26126; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_14: 26127; GFX90A: ; %bb.0: 26128; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26129; GFX90A-NEXT: ;;#ASMSTART 26130; GFX90A-NEXT: ; def s[4:7] 26131; GFX90A-NEXT: ;;#ASMEND 26132; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 26133; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 26134; GFX90A-NEXT: ;;#ASMSTART 26135; GFX90A-NEXT: ; use s8 26136; GFX90A-NEXT: ;;#ASMEND 26137; GFX90A-NEXT: s_setpc_b64 s[30:31] 26138; 26139; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_14: 26140; GFX940: ; %bb.0: 26141; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26142; GFX940-NEXT: ;;#ASMSTART 26143; GFX940-NEXT: ; def s[0:3] 26144; GFX940-NEXT: ;;#ASMEND 26145; GFX940-NEXT: s_lshr_b32 s0, s0, 16 26146; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 26147; GFX940-NEXT: ;;#ASMSTART 26148; GFX940-NEXT: ; use s8 26149; GFX940-NEXT: ;;#ASMEND 26150; GFX940-NEXT: s_setpc_b64 s[30:31] 26151 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26152 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26153 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 14> 26154 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26155 ret void 26156} 26157 26158define void @s_shuffle_v2i16_v8i16__10_14() { 26159; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_14: 26160; GFX900: ; %bb.0: 26161; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26162; GFX900-NEXT: ;;#ASMSTART 26163; GFX900-NEXT: ; def s[4:7] 26164; GFX900-NEXT: ;;#ASMEND 26165; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 26166; GFX900-NEXT: ;;#ASMSTART 26167; GFX900-NEXT: ; use s8 26168; GFX900-NEXT: ;;#ASMEND 26169; GFX900-NEXT: s_setpc_b64 s[30:31] 26170; 26171; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_14: 26172; GFX90A: ; %bb.0: 26173; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26174; GFX90A-NEXT: ;;#ASMSTART 26175; GFX90A-NEXT: ; def s[4:7] 26176; GFX90A-NEXT: ;;#ASMEND 26177; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 26178; GFX90A-NEXT: ;;#ASMSTART 26179; GFX90A-NEXT: ; use s8 26180; GFX90A-NEXT: ;;#ASMEND 26181; GFX90A-NEXT: s_setpc_b64 s[30:31] 26182; 26183; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_14: 26184; GFX940: ; %bb.0: 26185; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26186; GFX940-NEXT: ;;#ASMSTART 26187; GFX940-NEXT: ; def s[0:3] 26188; GFX940-NEXT: ;;#ASMEND 26189; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 26190; GFX940-NEXT: ;;#ASMSTART 26191; GFX940-NEXT: ; use s8 26192; GFX940-NEXT: ;;#ASMEND 26193; GFX940-NEXT: s_setpc_b64 s[30:31] 26194 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26195 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26196 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 14> 26197 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26198 ret void 26199} 26200 26201define void @s_shuffle_v2i16_v8i16__11_14() { 26202; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_14: 26203; GFX900: ; %bb.0: 26204; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26205; GFX900-NEXT: ;;#ASMSTART 26206; GFX900-NEXT: ; def s[4:7] 26207; GFX900-NEXT: ;;#ASMEND 26208; GFX900-NEXT: s_lshr_b32 s4, s5, 16 26209; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 26210; GFX900-NEXT: ;;#ASMSTART 26211; GFX900-NEXT: ; use s8 26212; GFX900-NEXT: ;;#ASMEND 26213; GFX900-NEXT: s_setpc_b64 s[30:31] 26214; 26215; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_14: 26216; GFX90A: ; %bb.0: 26217; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26218; GFX90A-NEXT: ;;#ASMSTART 26219; GFX90A-NEXT: ; def s[4:7] 26220; GFX90A-NEXT: ;;#ASMEND 26221; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 26222; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 26223; GFX90A-NEXT: ;;#ASMSTART 26224; GFX90A-NEXT: ; use s8 26225; GFX90A-NEXT: ;;#ASMEND 26226; GFX90A-NEXT: s_setpc_b64 s[30:31] 26227; 26228; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_14: 26229; GFX940: ; %bb.0: 26230; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26231; GFX940-NEXT: ;;#ASMSTART 26232; GFX940-NEXT: ; def s[0:3] 26233; GFX940-NEXT: ;;#ASMEND 26234; GFX940-NEXT: s_lshr_b32 s0, s1, 16 26235; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 26236; GFX940-NEXT: ;;#ASMSTART 26237; GFX940-NEXT: ; use s8 26238; GFX940-NEXT: ;;#ASMEND 26239; GFX940-NEXT: s_setpc_b64 s[30:31] 26240 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26241 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26242 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 14> 26243 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26244 ret void 26245} 26246 26247define void @s_shuffle_v2i16_v8i16__12_14() { 26248; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_14: 26249; GFX900: ; %bb.0: 26250; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26251; GFX900-NEXT: ;;#ASMSTART 26252; GFX900-NEXT: ; def s[4:7] 26253; GFX900-NEXT: ;;#ASMEND 26254; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s7 26255; GFX900-NEXT: ;;#ASMSTART 26256; GFX900-NEXT: ; use s8 26257; GFX900-NEXT: ;;#ASMEND 26258; GFX900-NEXT: s_setpc_b64 s[30:31] 26259; 26260; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_14: 26261; GFX90A: ; %bb.0: 26262; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26263; GFX90A-NEXT: ;;#ASMSTART 26264; GFX90A-NEXT: ; def s[4:7] 26265; GFX90A-NEXT: ;;#ASMEND 26266; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s7 26267; GFX90A-NEXT: ;;#ASMSTART 26268; GFX90A-NEXT: ; use s8 26269; GFX90A-NEXT: ;;#ASMEND 26270; GFX90A-NEXT: s_setpc_b64 s[30:31] 26271; 26272; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_14: 26273; GFX940: ; %bb.0: 26274; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26275; GFX940-NEXT: ;;#ASMSTART 26276; GFX940-NEXT: ; def s[0:3] 26277; GFX940-NEXT: ;;#ASMEND 26278; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s3 26279; GFX940-NEXT: ;;#ASMSTART 26280; GFX940-NEXT: ; use s8 26281; GFX940-NEXT: ;;#ASMEND 26282; GFX940-NEXT: s_setpc_b64 s[30:31] 26283 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26284 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26285 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 14> 26286 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26287 ret void 26288} 26289 26290define void @s_shuffle_v2i16_v8i16__13_14() { 26291; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_14: 26292; GFX900: ; %bb.0: 26293; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26294; GFX900-NEXT: ;;#ASMSTART 26295; GFX900-NEXT: ; def s[4:7] 26296; GFX900-NEXT: ;;#ASMEND 26297; GFX900-NEXT: s_lshr_b32 s4, s6, 16 26298; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 26299; GFX900-NEXT: ;;#ASMSTART 26300; GFX900-NEXT: ; use s8 26301; GFX900-NEXT: ;;#ASMEND 26302; GFX900-NEXT: s_setpc_b64 s[30:31] 26303; 26304; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_14: 26305; GFX90A: ; %bb.0: 26306; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26307; GFX90A-NEXT: ;;#ASMSTART 26308; GFX90A-NEXT: ; def s[4:7] 26309; GFX90A-NEXT: ;;#ASMEND 26310; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 26311; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 26312; GFX90A-NEXT: ;;#ASMSTART 26313; GFX90A-NEXT: ; use s8 26314; GFX90A-NEXT: ;;#ASMEND 26315; GFX90A-NEXT: s_setpc_b64 s[30:31] 26316; 26317; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_14: 26318; GFX940: ; %bb.0: 26319; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26320; GFX940-NEXT: ;;#ASMSTART 26321; GFX940-NEXT: ; def s[0:3] 26322; GFX940-NEXT: ;;#ASMEND 26323; GFX940-NEXT: s_lshr_b32 s0, s2, 16 26324; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 26325; GFX940-NEXT: ;;#ASMSTART 26326; GFX940-NEXT: ; use s8 26327; GFX940-NEXT: ;;#ASMEND 26328; GFX940-NEXT: s_setpc_b64 s[30:31] 26329 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26330 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26331 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 14> 26332 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26333 ret void 26334} 26335 26336define void @s_shuffle_v2i16_v8i16__14_14() { 26337; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_14: 26338; GFX900: ; %bb.0: 26339; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26340; GFX900-NEXT: ;;#ASMSTART 26341; GFX900-NEXT: ; def s[4:7] 26342; GFX900-NEXT: ;;#ASMEND 26343; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s7 26344; GFX900-NEXT: ;;#ASMSTART 26345; GFX900-NEXT: ; use s8 26346; GFX900-NEXT: ;;#ASMEND 26347; GFX900-NEXT: s_setpc_b64 s[30:31] 26348; 26349; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_14: 26350; GFX90A: ; %bb.0: 26351; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26352; GFX90A-NEXT: ;;#ASMSTART 26353; GFX90A-NEXT: ; def s[4:7] 26354; GFX90A-NEXT: ;;#ASMEND 26355; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s7 26356; GFX90A-NEXT: ;;#ASMSTART 26357; GFX90A-NEXT: ; use s8 26358; GFX90A-NEXT: ;;#ASMEND 26359; GFX90A-NEXT: s_setpc_b64 s[30:31] 26360; 26361; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_14: 26362; GFX940: ; %bb.0: 26363; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26364; GFX940-NEXT: ;;#ASMSTART 26365; GFX940-NEXT: ; def s[0:3] 26366; GFX940-NEXT: ;;#ASMEND 26367; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s3 26368; GFX940-NEXT: ;;#ASMSTART 26369; GFX940-NEXT: ; use s8 26370; GFX940-NEXT: ;;#ASMEND 26371; GFX940-NEXT: s_setpc_b64 s[30:31] 26372 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26373 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26374 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 14> 26375 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26376 ret void 26377} 26378 26379define void @s_shuffle_v2i16_v8i16__u_15() { 26380; GFX900-LABEL: s_shuffle_v2i16_v8i16__u_15: 26381; GFX900: ; %bb.0: 26382; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26383; GFX900-NEXT: ;;#ASMSTART 26384; GFX900-NEXT: ; def s[4:7] 26385; GFX900-NEXT: ;;#ASMEND 26386; GFX900-NEXT: s_mov_b32 s8, s7 26387; GFX900-NEXT: ;;#ASMSTART 26388; GFX900-NEXT: ; use s8 26389; GFX900-NEXT: ;;#ASMEND 26390; GFX900-NEXT: s_setpc_b64 s[30:31] 26391; 26392; GFX90A-LABEL: s_shuffle_v2i16_v8i16__u_15: 26393; GFX90A: ; %bb.0: 26394; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26395; GFX90A-NEXT: ;;#ASMSTART 26396; GFX90A-NEXT: ; def s[4:7] 26397; GFX90A-NEXT: ;;#ASMEND 26398; GFX90A-NEXT: s_mov_b32 s8, s7 26399; GFX90A-NEXT: ;;#ASMSTART 26400; GFX90A-NEXT: ; use s8 26401; GFX90A-NEXT: ;;#ASMEND 26402; GFX90A-NEXT: s_setpc_b64 s[30:31] 26403; 26404; GFX940-LABEL: s_shuffle_v2i16_v8i16__u_15: 26405; GFX940: ; %bb.0: 26406; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26407; GFX940-NEXT: ;;#ASMSTART 26408; GFX940-NEXT: ; def s[0:3] 26409; GFX940-NEXT: ;;#ASMEND 26410; GFX940-NEXT: s_mov_b32 s8, s3 26411; GFX940-NEXT: ;;#ASMSTART 26412; GFX940-NEXT: ; use s8 26413; GFX940-NEXT: ;;#ASMEND 26414; GFX940-NEXT: s_setpc_b64 s[30:31] 26415 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26416 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26417 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 poison, i32 15> 26418 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26419 ret void 26420} 26421 26422define void @s_shuffle_v2i16_v8i16__0_15() { 26423; GFX900-LABEL: s_shuffle_v2i16_v8i16__0_15: 26424; GFX900: ; %bb.0: 26425; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26426; GFX900-NEXT: ;;#ASMSTART 26427; GFX900-NEXT: ; def s[8:11] 26428; GFX900-NEXT: ;;#ASMEND 26429; GFX900-NEXT: ;;#ASMSTART 26430; GFX900-NEXT: ; def s[4:7] 26431; GFX900-NEXT: ;;#ASMEND 26432; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s11 26433; GFX900-NEXT: ;;#ASMSTART 26434; GFX900-NEXT: ; use s8 26435; GFX900-NEXT: ;;#ASMEND 26436; GFX900-NEXT: s_setpc_b64 s[30:31] 26437; 26438; GFX90A-LABEL: s_shuffle_v2i16_v8i16__0_15: 26439; GFX90A: ; %bb.0: 26440; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26441; GFX90A-NEXT: ;;#ASMSTART 26442; GFX90A-NEXT: ; def s[8:11] 26443; GFX90A-NEXT: ;;#ASMEND 26444; GFX90A-NEXT: ;;#ASMSTART 26445; GFX90A-NEXT: ; def s[4:7] 26446; GFX90A-NEXT: ;;#ASMEND 26447; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s11 26448; GFX90A-NEXT: ;;#ASMSTART 26449; GFX90A-NEXT: ; use s8 26450; GFX90A-NEXT: ;;#ASMEND 26451; GFX90A-NEXT: s_setpc_b64 s[30:31] 26452; 26453; GFX940-LABEL: s_shuffle_v2i16_v8i16__0_15: 26454; GFX940: ; %bb.0: 26455; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26456; GFX940-NEXT: ;;#ASMSTART 26457; GFX940-NEXT: ; def s[0:3] 26458; GFX940-NEXT: ;;#ASMEND 26459; GFX940-NEXT: ;;#ASMSTART 26460; GFX940-NEXT: ; def s[4:7] 26461; GFX940-NEXT: ;;#ASMEND 26462; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s7 26463; GFX940-NEXT: ;;#ASMSTART 26464; GFX940-NEXT: ; use s8 26465; GFX940-NEXT: ;;#ASMEND 26466; GFX940-NEXT: s_setpc_b64 s[30:31] 26467 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26468 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26469 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 0, i32 15> 26470 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26471 ret void 26472} 26473 26474define void @s_shuffle_v2i16_v8i16__1_15() { 26475; GFX900-LABEL: s_shuffle_v2i16_v8i16__1_15: 26476; GFX900: ; %bb.0: 26477; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26478; GFX900-NEXT: ;;#ASMSTART 26479; GFX900-NEXT: ; def s[8:11] 26480; GFX900-NEXT: ;;#ASMEND 26481; GFX900-NEXT: ;;#ASMSTART 26482; GFX900-NEXT: ; def s[4:7] 26483; GFX900-NEXT: ;;#ASMEND 26484; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s11 26485; GFX900-NEXT: ;;#ASMSTART 26486; GFX900-NEXT: ; use s8 26487; GFX900-NEXT: ;;#ASMEND 26488; GFX900-NEXT: s_setpc_b64 s[30:31] 26489; 26490; GFX90A-LABEL: s_shuffle_v2i16_v8i16__1_15: 26491; GFX90A: ; %bb.0: 26492; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26493; GFX90A-NEXT: ;;#ASMSTART 26494; GFX90A-NEXT: ; def s[8:11] 26495; GFX90A-NEXT: ;;#ASMEND 26496; GFX90A-NEXT: ;;#ASMSTART 26497; GFX90A-NEXT: ; def s[4:7] 26498; GFX90A-NEXT: ;;#ASMEND 26499; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s11 26500; GFX90A-NEXT: ;;#ASMSTART 26501; GFX90A-NEXT: ; use s8 26502; GFX90A-NEXT: ;;#ASMEND 26503; GFX90A-NEXT: s_setpc_b64 s[30:31] 26504; 26505; GFX940-LABEL: s_shuffle_v2i16_v8i16__1_15: 26506; GFX940: ; %bb.0: 26507; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26508; GFX940-NEXT: ;;#ASMSTART 26509; GFX940-NEXT: ; def s[0:3] 26510; GFX940-NEXT: ;;#ASMEND 26511; GFX940-NEXT: ;;#ASMSTART 26512; GFX940-NEXT: ; def s[4:7] 26513; GFX940-NEXT: ;;#ASMEND 26514; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s7 26515; GFX940-NEXT: ;;#ASMSTART 26516; GFX940-NEXT: ; use s8 26517; GFX940-NEXT: ;;#ASMEND 26518; GFX940-NEXT: s_setpc_b64 s[30:31] 26519 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26520 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26521 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 1, i32 15> 26522 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26523 ret void 26524} 26525 26526define void @s_shuffle_v2i16_v8i16__2_15() { 26527; GFX900-LABEL: s_shuffle_v2i16_v8i16__2_15: 26528; GFX900: ; %bb.0: 26529; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26530; GFX900-NEXT: ;;#ASMSTART 26531; GFX900-NEXT: ; def s[8:11] 26532; GFX900-NEXT: ;;#ASMEND 26533; GFX900-NEXT: ;;#ASMSTART 26534; GFX900-NEXT: ; def s[4:7] 26535; GFX900-NEXT: ;;#ASMEND 26536; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s11 26537; GFX900-NEXT: ;;#ASMSTART 26538; GFX900-NEXT: ; use s8 26539; GFX900-NEXT: ;;#ASMEND 26540; GFX900-NEXT: s_setpc_b64 s[30:31] 26541; 26542; GFX90A-LABEL: s_shuffle_v2i16_v8i16__2_15: 26543; GFX90A: ; %bb.0: 26544; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26545; GFX90A-NEXT: ;;#ASMSTART 26546; GFX90A-NEXT: ; def s[8:11] 26547; GFX90A-NEXT: ;;#ASMEND 26548; GFX90A-NEXT: ;;#ASMSTART 26549; GFX90A-NEXT: ; def s[4:7] 26550; GFX90A-NEXT: ;;#ASMEND 26551; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s11 26552; GFX90A-NEXT: ;;#ASMSTART 26553; GFX90A-NEXT: ; use s8 26554; GFX90A-NEXT: ;;#ASMEND 26555; GFX90A-NEXT: s_setpc_b64 s[30:31] 26556; 26557; GFX940-LABEL: s_shuffle_v2i16_v8i16__2_15: 26558; GFX940: ; %bb.0: 26559; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26560; GFX940-NEXT: ;;#ASMSTART 26561; GFX940-NEXT: ; def s[0:3] 26562; GFX940-NEXT: ;;#ASMEND 26563; GFX940-NEXT: ;;#ASMSTART 26564; GFX940-NEXT: ; def s[4:7] 26565; GFX940-NEXT: ;;#ASMEND 26566; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s7 26567; GFX940-NEXT: ;;#ASMSTART 26568; GFX940-NEXT: ; use s8 26569; GFX940-NEXT: ;;#ASMEND 26570; GFX940-NEXT: s_setpc_b64 s[30:31] 26571 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26572 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26573 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 2, i32 15> 26574 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26575 ret void 26576} 26577 26578define void @s_shuffle_v2i16_v8i16__3_15() { 26579; GFX900-LABEL: s_shuffle_v2i16_v8i16__3_15: 26580; GFX900: ; %bb.0: 26581; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26582; GFX900-NEXT: ;;#ASMSTART 26583; GFX900-NEXT: ; def s[8:11] 26584; GFX900-NEXT: ;;#ASMEND 26585; GFX900-NEXT: ;;#ASMSTART 26586; GFX900-NEXT: ; def s[4:7] 26587; GFX900-NEXT: ;;#ASMEND 26588; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s11 26589; GFX900-NEXT: ;;#ASMSTART 26590; GFX900-NEXT: ; use s8 26591; GFX900-NEXT: ;;#ASMEND 26592; GFX900-NEXT: s_setpc_b64 s[30:31] 26593; 26594; GFX90A-LABEL: s_shuffle_v2i16_v8i16__3_15: 26595; GFX90A: ; %bb.0: 26596; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26597; GFX90A-NEXT: ;;#ASMSTART 26598; GFX90A-NEXT: ; def s[8:11] 26599; GFX90A-NEXT: ;;#ASMEND 26600; GFX90A-NEXT: ;;#ASMSTART 26601; GFX90A-NEXT: ; def s[4:7] 26602; GFX90A-NEXT: ;;#ASMEND 26603; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s11 26604; GFX90A-NEXT: ;;#ASMSTART 26605; GFX90A-NEXT: ; use s8 26606; GFX90A-NEXT: ;;#ASMEND 26607; GFX90A-NEXT: s_setpc_b64 s[30:31] 26608; 26609; GFX940-LABEL: s_shuffle_v2i16_v8i16__3_15: 26610; GFX940: ; %bb.0: 26611; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26612; GFX940-NEXT: ;;#ASMSTART 26613; GFX940-NEXT: ; def s[0:3] 26614; GFX940-NEXT: ;;#ASMEND 26615; GFX940-NEXT: ;;#ASMSTART 26616; GFX940-NEXT: ; def s[4:7] 26617; GFX940-NEXT: ;;#ASMEND 26618; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s7 26619; GFX940-NEXT: ;;#ASMSTART 26620; GFX940-NEXT: ; use s8 26621; GFX940-NEXT: ;;#ASMEND 26622; GFX940-NEXT: s_setpc_b64 s[30:31] 26623 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26624 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26625 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 3, i32 15> 26626 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26627 ret void 26628} 26629 26630define void @s_shuffle_v2i16_v8i16__4_15() { 26631; GFX900-LABEL: s_shuffle_v2i16_v8i16__4_15: 26632; GFX900: ; %bb.0: 26633; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26634; GFX900-NEXT: ;;#ASMSTART 26635; GFX900-NEXT: ; def s[8:11] 26636; GFX900-NEXT: ;;#ASMEND 26637; GFX900-NEXT: ;;#ASMSTART 26638; GFX900-NEXT: ; def s[4:7] 26639; GFX900-NEXT: ;;#ASMEND 26640; GFX900-NEXT: s_pack_lh_b32_b16 s8, s6, s11 26641; GFX900-NEXT: ;;#ASMSTART 26642; GFX900-NEXT: ; use s8 26643; GFX900-NEXT: ;;#ASMEND 26644; GFX900-NEXT: s_setpc_b64 s[30:31] 26645; 26646; GFX90A-LABEL: s_shuffle_v2i16_v8i16__4_15: 26647; GFX90A: ; %bb.0: 26648; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26649; GFX90A-NEXT: ;;#ASMSTART 26650; GFX90A-NEXT: ; def s[8:11] 26651; GFX90A-NEXT: ;;#ASMEND 26652; GFX90A-NEXT: ;;#ASMSTART 26653; GFX90A-NEXT: ; def s[4:7] 26654; GFX90A-NEXT: ;;#ASMEND 26655; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s6, s11 26656; GFX90A-NEXT: ;;#ASMSTART 26657; GFX90A-NEXT: ; use s8 26658; GFX90A-NEXT: ;;#ASMEND 26659; GFX90A-NEXT: s_setpc_b64 s[30:31] 26660; 26661; GFX940-LABEL: s_shuffle_v2i16_v8i16__4_15: 26662; GFX940: ; %bb.0: 26663; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26664; GFX940-NEXT: ;;#ASMSTART 26665; GFX940-NEXT: ; def s[0:3] 26666; GFX940-NEXT: ;;#ASMEND 26667; GFX940-NEXT: ;;#ASMSTART 26668; GFX940-NEXT: ; def s[4:7] 26669; GFX940-NEXT: ;;#ASMEND 26670; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s7 26671; GFX940-NEXT: ;;#ASMSTART 26672; GFX940-NEXT: ; use s8 26673; GFX940-NEXT: ;;#ASMEND 26674; GFX940-NEXT: s_setpc_b64 s[30:31] 26675 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26676 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26677 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 4, i32 15> 26678 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26679 ret void 26680} 26681 26682define void @s_shuffle_v2i16_v8i16__5_15() { 26683; GFX900-LABEL: s_shuffle_v2i16_v8i16__5_15: 26684; GFX900: ; %bb.0: 26685; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26686; GFX900-NEXT: ;;#ASMSTART 26687; GFX900-NEXT: ; def s[8:11] 26688; GFX900-NEXT: ;;#ASMEND 26689; GFX900-NEXT: ;;#ASMSTART 26690; GFX900-NEXT: ; def s[4:7] 26691; GFX900-NEXT: ;;#ASMEND 26692; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s11 26693; GFX900-NEXT: ;;#ASMSTART 26694; GFX900-NEXT: ; use s8 26695; GFX900-NEXT: ;;#ASMEND 26696; GFX900-NEXT: s_setpc_b64 s[30:31] 26697; 26698; GFX90A-LABEL: s_shuffle_v2i16_v8i16__5_15: 26699; GFX90A: ; %bb.0: 26700; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26701; GFX90A-NEXT: ;;#ASMSTART 26702; GFX90A-NEXT: ; def s[8:11] 26703; GFX90A-NEXT: ;;#ASMEND 26704; GFX90A-NEXT: ;;#ASMSTART 26705; GFX90A-NEXT: ; def s[4:7] 26706; GFX90A-NEXT: ;;#ASMEND 26707; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s11 26708; GFX90A-NEXT: ;;#ASMSTART 26709; GFX90A-NEXT: ; use s8 26710; GFX90A-NEXT: ;;#ASMEND 26711; GFX90A-NEXT: s_setpc_b64 s[30:31] 26712; 26713; GFX940-LABEL: s_shuffle_v2i16_v8i16__5_15: 26714; GFX940: ; %bb.0: 26715; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26716; GFX940-NEXT: ;;#ASMSTART 26717; GFX940-NEXT: ; def s[0:3] 26718; GFX940-NEXT: ;;#ASMEND 26719; GFX940-NEXT: ;;#ASMSTART 26720; GFX940-NEXT: ; def s[4:7] 26721; GFX940-NEXT: ;;#ASMEND 26722; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s7 26723; GFX940-NEXT: ;;#ASMSTART 26724; GFX940-NEXT: ; use s8 26725; GFX940-NEXT: ;;#ASMEND 26726; GFX940-NEXT: s_setpc_b64 s[30:31] 26727 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26728 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26729 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 5, i32 15> 26730 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26731 ret void 26732} 26733 26734define void @s_shuffle_v2i16_v8i16__6_15() { 26735; GFX900-LABEL: s_shuffle_v2i16_v8i16__6_15: 26736; GFX900: ; %bb.0: 26737; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26738; GFX900-NEXT: ;;#ASMSTART 26739; GFX900-NEXT: ; def s[8:11] 26740; GFX900-NEXT: ;;#ASMEND 26741; GFX900-NEXT: ;;#ASMSTART 26742; GFX900-NEXT: ; def s[4:7] 26743; GFX900-NEXT: ;;#ASMEND 26744; GFX900-NEXT: s_pack_lh_b32_b16 s8, s7, s11 26745; GFX900-NEXT: ;;#ASMSTART 26746; GFX900-NEXT: ; use s8 26747; GFX900-NEXT: ;;#ASMEND 26748; GFX900-NEXT: s_setpc_b64 s[30:31] 26749; 26750; GFX90A-LABEL: s_shuffle_v2i16_v8i16__6_15: 26751; GFX90A: ; %bb.0: 26752; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26753; GFX90A-NEXT: ;;#ASMSTART 26754; GFX90A-NEXT: ; def s[8:11] 26755; GFX90A-NEXT: ;;#ASMEND 26756; GFX90A-NEXT: ;;#ASMSTART 26757; GFX90A-NEXT: ; def s[4:7] 26758; GFX90A-NEXT: ;;#ASMEND 26759; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s7, s11 26760; GFX90A-NEXT: ;;#ASMSTART 26761; GFX90A-NEXT: ; use s8 26762; GFX90A-NEXT: ;;#ASMEND 26763; GFX90A-NEXT: s_setpc_b64 s[30:31] 26764; 26765; GFX940-LABEL: s_shuffle_v2i16_v8i16__6_15: 26766; GFX940: ; %bb.0: 26767; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26768; GFX940-NEXT: ;;#ASMSTART 26769; GFX940-NEXT: ; def s[0:3] 26770; GFX940-NEXT: ;;#ASMEND 26771; GFX940-NEXT: ;;#ASMSTART 26772; GFX940-NEXT: ; def s[4:7] 26773; GFX940-NEXT: ;;#ASMEND 26774; GFX940-NEXT: s_pack_lh_b32_b16 s8, s3, s7 26775; GFX940-NEXT: ;;#ASMSTART 26776; GFX940-NEXT: ; use s8 26777; GFX940-NEXT: ;;#ASMEND 26778; GFX940-NEXT: s_setpc_b64 s[30:31] 26779 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26780 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26781 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 6, i32 15> 26782 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26783 ret void 26784} 26785 26786define void @s_shuffle_v2i16_v8i16__7_15() { 26787; GFX900-LABEL: s_shuffle_v2i16_v8i16__7_15: 26788; GFX900: ; %bb.0: 26789; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26790; GFX900-NEXT: ;;#ASMSTART 26791; GFX900-NEXT: ; def s[8:11] 26792; GFX900-NEXT: ;;#ASMEND 26793; GFX900-NEXT: ;;#ASMSTART 26794; GFX900-NEXT: ; def s[4:7] 26795; GFX900-NEXT: ;;#ASMEND 26796; GFX900-NEXT: s_pack_hh_b32_b16 s8, s7, s11 26797; GFX900-NEXT: ;;#ASMSTART 26798; GFX900-NEXT: ; use s8 26799; GFX900-NEXT: ;;#ASMEND 26800; GFX900-NEXT: s_setpc_b64 s[30:31] 26801; 26802; GFX90A-LABEL: s_shuffle_v2i16_v8i16__7_15: 26803; GFX90A: ; %bb.0: 26804; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26805; GFX90A-NEXT: ;;#ASMSTART 26806; GFX90A-NEXT: ; def s[8:11] 26807; GFX90A-NEXT: ;;#ASMEND 26808; GFX90A-NEXT: ;;#ASMSTART 26809; GFX90A-NEXT: ; def s[4:7] 26810; GFX90A-NEXT: ;;#ASMEND 26811; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s7, s11 26812; GFX90A-NEXT: ;;#ASMSTART 26813; GFX90A-NEXT: ; use s8 26814; GFX90A-NEXT: ;;#ASMEND 26815; GFX90A-NEXT: s_setpc_b64 s[30:31] 26816; 26817; GFX940-LABEL: s_shuffle_v2i16_v8i16__7_15: 26818; GFX940: ; %bb.0: 26819; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26820; GFX940-NEXT: ;;#ASMSTART 26821; GFX940-NEXT: ; def s[0:3] 26822; GFX940-NEXT: ;;#ASMEND 26823; GFX940-NEXT: ;;#ASMSTART 26824; GFX940-NEXT: ; def s[4:7] 26825; GFX940-NEXT: ;;#ASMEND 26826; GFX940-NEXT: s_pack_hh_b32_b16 s8, s3, s7 26827; GFX940-NEXT: ;;#ASMSTART 26828; GFX940-NEXT: ; use s8 26829; GFX940-NEXT: ;;#ASMEND 26830; GFX940-NEXT: s_setpc_b64 s[30:31] 26831 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26832 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26833 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 7, i32 15> 26834 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26835 ret void 26836} 26837 26838define void @s_shuffle_v2i16_v8i16__8_15() { 26839; GFX900-LABEL: s_shuffle_v2i16_v8i16__8_15: 26840; GFX900: ; %bb.0: 26841; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26842; GFX900-NEXT: ;;#ASMSTART 26843; GFX900-NEXT: ; def s[4:7] 26844; GFX900-NEXT: ;;#ASMEND 26845; GFX900-NEXT: s_pack_lh_b32_b16 s8, s4, s7 26846; GFX900-NEXT: ;;#ASMSTART 26847; GFX900-NEXT: ; use s8 26848; GFX900-NEXT: ;;#ASMEND 26849; GFX900-NEXT: s_setpc_b64 s[30:31] 26850; 26851; GFX90A-LABEL: s_shuffle_v2i16_v8i16__8_15: 26852; GFX90A: ; %bb.0: 26853; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26854; GFX90A-NEXT: ;;#ASMSTART 26855; GFX90A-NEXT: ; def s[4:7] 26856; GFX90A-NEXT: ;;#ASMEND 26857; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s4, s7 26858; GFX90A-NEXT: ;;#ASMSTART 26859; GFX90A-NEXT: ; use s8 26860; GFX90A-NEXT: ;;#ASMEND 26861; GFX90A-NEXT: s_setpc_b64 s[30:31] 26862; 26863; GFX940-LABEL: s_shuffle_v2i16_v8i16__8_15: 26864; GFX940: ; %bb.0: 26865; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26866; GFX940-NEXT: ;;#ASMSTART 26867; GFX940-NEXT: ; def s[0:3] 26868; GFX940-NEXT: ;;#ASMEND 26869; GFX940-NEXT: s_pack_lh_b32_b16 s8, s0, s3 26870; GFX940-NEXT: ;;#ASMSTART 26871; GFX940-NEXT: ; use s8 26872; GFX940-NEXT: ;;#ASMEND 26873; GFX940-NEXT: s_setpc_b64 s[30:31] 26874 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26875 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26876 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 8, i32 15> 26877 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26878 ret void 26879} 26880 26881define void @s_shuffle_v2i16_v8i16__9_15() { 26882; GFX900-LABEL: s_shuffle_v2i16_v8i16__9_15: 26883; GFX900: ; %bb.0: 26884; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26885; GFX900-NEXT: ;;#ASMSTART 26886; GFX900-NEXT: ; def s[4:7] 26887; GFX900-NEXT: ;;#ASMEND 26888; GFX900-NEXT: s_pack_hh_b32_b16 s8, s4, s7 26889; GFX900-NEXT: ;;#ASMSTART 26890; GFX900-NEXT: ; use s8 26891; GFX900-NEXT: ;;#ASMEND 26892; GFX900-NEXT: s_setpc_b64 s[30:31] 26893; 26894; GFX90A-LABEL: s_shuffle_v2i16_v8i16__9_15: 26895; GFX90A: ; %bb.0: 26896; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26897; GFX90A-NEXT: ;;#ASMSTART 26898; GFX90A-NEXT: ; def s[4:7] 26899; GFX90A-NEXT: ;;#ASMEND 26900; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s4, s7 26901; GFX90A-NEXT: ;;#ASMSTART 26902; GFX90A-NEXT: ; use s8 26903; GFX90A-NEXT: ;;#ASMEND 26904; GFX90A-NEXT: s_setpc_b64 s[30:31] 26905; 26906; GFX940-LABEL: s_shuffle_v2i16_v8i16__9_15: 26907; GFX940: ; %bb.0: 26908; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26909; GFX940-NEXT: ;;#ASMSTART 26910; GFX940-NEXT: ; def s[0:3] 26911; GFX940-NEXT: ;;#ASMEND 26912; GFX940-NEXT: s_pack_hh_b32_b16 s8, s0, s3 26913; GFX940-NEXT: ;;#ASMSTART 26914; GFX940-NEXT: ; use s8 26915; GFX940-NEXT: ;;#ASMEND 26916; GFX940-NEXT: s_setpc_b64 s[30:31] 26917 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26918 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26919 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 9, i32 15> 26920 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26921 ret void 26922} 26923 26924define void @s_shuffle_v2i16_v8i16__10_15() { 26925; GFX900-LABEL: s_shuffle_v2i16_v8i16__10_15: 26926; GFX900: ; %bb.0: 26927; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26928; GFX900-NEXT: ;;#ASMSTART 26929; GFX900-NEXT: ; def s[4:7] 26930; GFX900-NEXT: ;;#ASMEND 26931; GFX900-NEXT: s_pack_lh_b32_b16 s8, s5, s7 26932; GFX900-NEXT: ;;#ASMSTART 26933; GFX900-NEXT: ; use s8 26934; GFX900-NEXT: ;;#ASMEND 26935; GFX900-NEXT: s_setpc_b64 s[30:31] 26936; 26937; GFX90A-LABEL: s_shuffle_v2i16_v8i16__10_15: 26938; GFX90A: ; %bb.0: 26939; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26940; GFX90A-NEXT: ;;#ASMSTART 26941; GFX90A-NEXT: ; def s[4:7] 26942; GFX90A-NEXT: ;;#ASMEND 26943; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s5, s7 26944; GFX90A-NEXT: ;;#ASMSTART 26945; GFX90A-NEXT: ; use s8 26946; GFX90A-NEXT: ;;#ASMEND 26947; GFX90A-NEXT: s_setpc_b64 s[30:31] 26948; 26949; GFX940-LABEL: s_shuffle_v2i16_v8i16__10_15: 26950; GFX940: ; %bb.0: 26951; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26952; GFX940-NEXT: ;;#ASMSTART 26953; GFX940-NEXT: ; def s[0:3] 26954; GFX940-NEXT: ;;#ASMEND 26955; GFX940-NEXT: s_pack_lh_b32_b16 s8, s1, s3 26956; GFX940-NEXT: ;;#ASMSTART 26957; GFX940-NEXT: ; use s8 26958; GFX940-NEXT: ;;#ASMEND 26959; GFX940-NEXT: s_setpc_b64 s[30:31] 26960 %vec0 = call <8 x i16> asm "; def $0", "=s"() 26961 %vec1 = call <8 x i16> asm "; def $0", "=s"() 26962 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 10, i32 15> 26963 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 26964 ret void 26965} 26966 26967define void @s_shuffle_v2i16_v8i16__11_15() { 26968; GFX900-LABEL: s_shuffle_v2i16_v8i16__11_15: 26969; GFX900: ; %bb.0: 26970; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26971; GFX900-NEXT: ;;#ASMSTART 26972; GFX900-NEXT: ; def s[4:7] 26973; GFX900-NEXT: ;;#ASMEND 26974; GFX900-NEXT: s_pack_hh_b32_b16 s8, s5, s7 26975; GFX900-NEXT: ;;#ASMSTART 26976; GFX900-NEXT: ; use s8 26977; GFX900-NEXT: ;;#ASMEND 26978; GFX900-NEXT: s_setpc_b64 s[30:31] 26979; 26980; GFX90A-LABEL: s_shuffle_v2i16_v8i16__11_15: 26981; GFX90A: ; %bb.0: 26982; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26983; GFX90A-NEXT: ;;#ASMSTART 26984; GFX90A-NEXT: ; def s[4:7] 26985; GFX90A-NEXT: ;;#ASMEND 26986; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s5, s7 26987; GFX90A-NEXT: ;;#ASMSTART 26988; GFX90A-NEXT: ; use s8 26989; GFX90A-NEXT: ;;#ASMEND 26990; GFX90A-NEXT: s_setpc_b64 s[30:31] 26991; 26992; GFX940-LABEL: s_shuffle_v2i16_v8i16__11_15: 26993; GFX940: ; %bb.0: 26994; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26995; GFX940-NEXT: ;;#ASMSTART 26996; GFX940-NEXT: ; def s[0:3] 26997; GFX940-NEXT: ;;#ASMEND 26998; GFX940-NEXT: s_pack_hh_b32_b16 s8, s1, s3 26999; GFX940-NEXT: ;;#ASMSTART 27000; GFX940-NEXT: ; use s8 27001; GFX940-NEXT: ;;#ASMEND 27002; GFX940-NEXT: s_setpc_b64 s[30:31] 27003 %vec0 = call <8 x i16> asm "; def $0", "=s"() 27004 %vec1 = call <8 x i16> asm "; def $0", "=s"() 27005 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 11, i32 15> 27006 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 27007 ret void 27008} 27009 27010define void @s_shuffle_v2i16_v8i16__12_15() { 27011; GFX900-LABEL: s_shuffle_v2i16_v8i16__12_15: 27012; GFX900: ; %bb.0: 27013; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27014; GFX900-NEXT: ;;#ASMSTART 27015; GFX900-NEXT: ; def s[4:7] 27016; GFX900-NEXT: ;;#ASMEND 27017; GFX900-NEXT: s_pack_lh_b32_b16 s8, s6, s7 27018; GFX900-NEXT: ;;#ASMSTART 27019; GFX900-NEXT: ; use s8 27020; GFX900-NEXT: ;;#ASMEND 27021; GFX900-NEXT: s_setpc_b64 s[30:31] 27022; 27023; GFX90A-LABEL: s_shuffle_v2i16_v8i16__12_15: 27024; GFX90A: ; %bb.0: 27025; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27026; GFX90A-NEXT: ;;#ASMSTART 27027; GFX90A-NEXT: ; def s[4:7] 27028; GFX90A-NEXT: ;;#ASMEND 27029; GFX90A-NEXT: s_pack_lh_b32_b16 s8, s6, s7 27030; GFX90A-NEXT: ;;#ASMSTART 27031; GFX90A-NEXT: ; use s8 27032; GFX90A-NEXT: ;;#ASMEND 27033; GFX90A-NEXT: s_setpc_b64 s[30:31] 27034; 27035; GFX940-LABEL: s_shuffle_v2i16_v8i16__12_15: 27036; GFX940: ; %bb.0: 27037; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27038; GFX940-NEXT: ;;#ASMSTART 27039; GFX940-NEXT: ; def s[0:3] 27040; GFX940-NEXT: ;;#ASMEND 27041; GFX940-NEXT: s_pack_lh_b32_b16 s8, s2, s3 27042; GFX940-NEXT: ;;#ASMSTART 27043; GFX940-NEXT: ; use s8 27044; GFX940-NEXT: ;;#ASMEND 27045; GFX940-NEXT: s_setpc_b64 s[30:31] 27046 %vec0 = call <8 x i16> asm "; def $0", "=s"() 27047 %vec1 = call <8 x i16> asm "; def $0", "=s"() 27048 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 12, i32 15> 27049 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 27050 ret void 27051} 27052 27053define void @s_shuffle_v2i16_v8i16__13_15() { 27054; GFX900-LABEL: s_shuffle_v2i16_v8i16__13_15: 27055; GFX900: ; %bb.0: 27056; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27057; GFX900-NEXT: ;;#ASMSTART 27058; GFX900-NEXT: ; def s[4:7] 27059; GFX900-NEXT: ;;#ASMEND 27060; GFX900-NEXT: s_pack_hh_b32_b16 s8, s6, s7 27061; GFX900-NEXT: ;;#ASMSTART 27062; GFX900-NEXT: ; use s8 27063; GFX900-NEXT: ;;#ASMEND 27064; GFX900-NEXT: s_setpc_b64 s[30:31] 27065; 27066; GFX90A-LABEL: s_shuffle_v2i16_v8i16__13_15: 27067; GFX90A: ; %bb.0: 27068; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27069; GFX90A-NEXT: ;;#ASMSTART 27070; GFX90A-NEXT: ; def s[4:7] 27071; GFX90A-NEXT: ;;#ASMEND 27072; GFX90A-NEXT: s_pack_hh_b32_b16 s8, s6, s7 27073; GFX90A-NEXT: ;;#ASMSTART 27074; GFX90A-NEXT: ; use s8 27075; GFX90A-NEXT: ;;#ASMEND 27076; GFX90A-NEXT: s_setpc_b64 s[30:31] 27077; 27078; GFX940-LABEL: s_shuffle_v2i16_v8i16__13_15: 27079; GFX940: ; %bb.0: 27080; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27081; GFX940-NEXT: ;;#ASMSTART 27082; GFX940-NEXT: ; def s[0:3] 27083; GFX940-NEXT: ;;#ASMEND 27084; GFX940-NEXT: s_pack_hh_b32_b16 s8, s2, s3 27085; GFX940-NEXT: ;;#ASMSTART 27086; GFX940-NEXT: ; use s8 27087; GFX940-NEXT: ;;#ASMEND 27088; GFX940-NEXT: s_setpc_b64 s[30:31] 27089 %vec0 = call <8 x i16> asm "; def $0", "=s"() 27090 %vec1 = call <8 x i16> asm "; def $0", "=s"() 27091 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 13, i32 15> 27092 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 27093 ret void 27094} 27095 27096define void @s_shuffle_v2i16_v8i16__14_15() { 27097; GFX900-LABEL: s_shuffle_v2i16_v8i16__14_15: 27098; GFX900: ; %bb.0: 27099; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27100; GFX900-NEXT: ;;#ASMSTART 27101; GFX900-NEXT: ; def s[4:7] 27102; GFX900-NEXT: ;;#ASMEND 27103; GFX900-NEXT: s_mov_b32 s8, s7 27104; GFX900-NEXT: ;;#ASMSTART 27105; GFX900-NEXT: ; use s8 27106; GFX900-NEXT: ;;#ASMEND 27107; GFX900-NEXT: s_setpc_b64 s[30:31] 27108; 27109; GFX90A-LABEL: s_shuffle_v2i16_v8i16__14_15: 27110; GFX90A: ; %bb.0: 27111; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27112; GFX90A-NEXT: ;;#ASMSTART 27113; GFX90A-NEXT: ; def s[4:7] 27114; GFX90A-NEXT: ;;#ASMEND 27115; GFX90A-NEXT: s_mov_b32 s8, s7 27116; GFX90A-NEXT: ;;#ASMSTART 27117; GFX90A-NEXT: ; use s8 27118; GFX90A-NEXT: ;;#ASMEND 27119; GFX90A-NEXT: s_setpc_b64 s[30:31] 27120; 27121; GFX940-LABEL: s_shuffle_v2i16_v8i16__14_15: 27122; GFX940: ; %bb.0: 27123; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27124; GFX940-NEXT: ;;#ASMSTART 27125; GFX940-NEXT: ; def s[0:3] 27126; GFX940-NEXT: ;;#ASMEND 27127; GFX940-NEXT: s_mov_b32 s8, s3 27128; GFX940-NEXT: ;;#ASMSTART 27129; GFX940-NEXT: ; use s8 27130; GFX940-NEXT: ;;#ASMEND 27131; GFX940-NEXT: s_setpc_b64 s[30:31] 27132 %vec0 = call <8 x i16> asm "; def $0", "=s"() 27133 %vec1 = call <8 x i16> asm "; def $0", "=s"() 27134 %shuf = shufflevector <8 x i16> %vec0, <8 x i16> %vec1, <2 x i32> <i32 14, i32 15> 27135 call void asm sideeffect "; use $0", "{s8}"(<2 x i16> %shuf) 27136 ret void 27137} 27138;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 27139; GFX90APLUS: {{.*}} 27140