1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v4i64_v3i64__u_u_u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v4i64_v3i64__u_u_u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <3 x i64> asm "; def $0", "=v"() 13 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> poison 14 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 15 ret void 16} 17 18define void @v_shuffle_v4i64_v3i64__0_u_u_u(ptr addrspace(1) inreg %ptr) { 19; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u: 20; GFX900: ; %bb.0: 21; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX900-NEXT: v_mov_b32_e32 v6, 0 23; GFX900-NEXT: ;;#ASMSTART 24; GFX900-NEXT: ; def v[0:5] 25; GFX900-NEXT: ;;#ASMEND 26; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16 27; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 28; GFX900-NEXT: s_waitcnt vmcnt(0) 29; GFX900-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u: 32; GFX90A: ; %bb.0: 33; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX90A-NEXT: v_mov_b32_e32 v6, 0 35; GFX90A-NEXT: ;;#ASMSTART 36; GFX90A-NEXT: ; def v[0:5] 37; GFX90A-NEXT: ;;#ASMEND 38; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16 39; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 40; GFX90A-NEXT: s_waitcnt vmcnt(0) 41; GFX90A-NEXT: s_setpc_b64 s[30:31] 42; 43; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u: 44; GFX940: ; %bb.0: 45; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX940-NEXT: v_mov_b32_e32 v6, 0 47; GFX940-NEXT: ;;#ASMSTART 48; GFX940-NEXT: ; def v[0:5] 49; GFX940-NEXT: ;;#ASMEND 50; GFX940-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 sc0 sc1 51; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 52; GFX940-NEXT: s_waitcnt vmcnt(0) 53; GFX940-NEXT: s_setpc_b64 s[30:31] 54 %vec0 = call <3 x i64> asm "; def $0", "=v"() 55 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 56 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 57 ret void 58} 59 60define void @v_shuffle_v4i64_v3i64__1_u_u_u(ptr addrspace(1) inreg %ptr) { 61; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u: 62; GFX900: ; %bb.0: 63; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GFX900-NEXT: v_mov_b32_e32 v6, 0 65; GFX900-NEXT: ;;#ASMSTART 66; GFX900-NEXT: ; def v[0:5] 67; GFX900-NEXT: ;;#ASMEND 68; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 69; GFX900-NEXT: s_waitcnt vmcnt(0) 70; GFX900-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u: 73; GFX90A: ; %bb.0: 74; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX90A-NEXT: v_mov_b32_e32 v6, 0 76; GFX90A-NEXT: ;;#ASMSTART 77; GFX90A-NEXT: ; def v[0:5] 78; GFX90A-NEXT: ;;#ASMEND 79; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 80; GFX90A-NEXT: s_waitcnt vmcnt(0) 81; GFX90A-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u: 84; GFX940: ; %bb.0: 85; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX940-NEXT: v_mov_b32_e32 v6, 0 87; GFX940-NEXT: ;;#ASMSTART 88; GFX940-NEXT: ; def v[0:5] 89; GFX940-NEXT: ;;#ASMEND 90; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 91; GFX940-NEXT: s_waitcnt vmcnt(0) 92; GFX940-NEXT: s_setpc_b64 s[30:31] 93 %vec0 = call <3 x i64> asm "; def $0", "=v"() 94 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 95 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 96 ret void 97} 98 99define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) { 100; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u: 101; GFX900: ; %bb.0: 102; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX900-NEXT: ;;#ASMSTART 104; GFX900-NEXT: ; def v[0:5] 105; GFX900-NEXT: ;;#ASMEND 106; GFX900-NEXT: v_mov_b32_e32 v6, 0 107; GFX900-NEXT: v_mov_b32_e32 v0, v4 108; GFX900-NEXT: v_mov_b32_e32 v1, v5 109; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 110; GFX900-NEXT: s_waitcnt vmcnt(0) 111; GFX900-NEXT: s_setpc_b64 s[30:31] 112; 113; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u: 114; GFX90A: ; %bb.0: 115; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; GFX90A-NEXT: ;;#ASMSTART 117; GFX90A-NEXT: ; def v[0:5] 118; GFX90A-NEXT: ;;#ASMEND 119; GFX90A-NEXT: v_mov_b32_e32 v6, 0 120; GFX90A-NEXT: v_mov_b32_e32 v0, v4 121; GFX90A-NEXT: v_mov_b32_e32 v1, v5 122; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 123; GFX90A-NEXT: s_waitcnt vmcnt(0) 124; GFX90A-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u: 127; GFX940: ; %bb.0: 128; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX940-NEXT: ;;#ASMSTART 130; GFX940-NEXT: ; def v[0:5] 131; GFX940-NEXT: ;;#ASMEND 132; GFX940-NEXT: v_mov_b32_e32 v6, 0 133; GFX940-NEXT: v_mov_b32_e32 v0, v4 134; GFX940-NEXT: v_mov_b32_e32 v1, v5 135; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 136; GFX940-NEXT: s_waitcnt vmcnt(0) 137; GFX940-NEXT: s_setpc_b64 s[30:31] 138 %vec0 = call <3 x i64> asm "; def $0", "=v"() 139 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 140 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 141 ret void 142} 143 144define void @v_shuffle_v4i64_v3i64__3_u_u_u(ptr addrspace(1) inreg %ptr) { 145; GFX9-LABEL: v_shuffle_v4i64_v3i64__3_u_u_u: 146; GFX9: ; %bb.0: 147; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148; GFX9-NEXT: s_setpc_b64 s[30:31] 149 %vec0 = call <3 x i64> asm "; def $0", "=v"() 150 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison> 151 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 152 ret void 153} 154 155define void @v_shuffle_v4i64_v3i64__4_u_u_u(ptr addrspace(1) inreg %ptr) { 156; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u: 157; GFX900: ; %bb.0: 158; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 159; GFX900-NEXT: v_mov_b32_e32 v6, 0 160; GFX900-NEXT: ;;#ASMSTART 161; GFX900-NEXT: ; def v[0:5] 162; GFX900-NEXT: ;;#ASMEND 163; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 164; GFX900-NEXT: s_waitcnt vmcnt(0) 165; GFX900-NEXT: s_setpc_b64 s[30:31] 166; 167; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u: 168; GFX90A: ; %bb.0: 169; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX90A-NEXT: v_mov_b32_e32 v6, 0 171; GFX90A-NEXT: ;;#ASMSTART 172; GFX90A-NEXT: ; def v[0:5] 173; GFX90A-NEXT: ;;#ASMEND 174; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 175; GFX90A-NEXT: s_waitcnt vmcnt(0) 176; GFX90A-NEXT: s_setpc_b64 s[30:31] 177; 178; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u: 179; GFX940: ; %bb.0: 180; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 181; GFX940-NEXT: v_mov_b32_e32 v6, 0 182; GFX940-NEXT: ;;#ASMSTART 183; GFX940-NEXT: ; def v[0:5] 184; GFX940-NEXT: ;;#ASMEND 185; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 186; GFX940-NEXT: s_waitcnt vmcnt(0) 187; GFX940-NEXT: s_setpc_b64 s[30:31] 188 %vec0 = call <3 x i64> asm "; def $0", "=v"() 189 %vec1 = call <3 x i64> asm "; def $0", "=v"() 190 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison> 191 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 192 ret void 193} 194 195define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) { 196; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u: 197; GFX900: ; %bb.0: 198; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 199; GFX900-NEXT: ;;#ASMSTART 200; GFX900-NEXT: ; def v[0:5] 201; GFX900-NEXT: ;;#ASMEND 202; GFX900-NEXT: v_mov_b32_e32 v6, 0 203; GFX900-NEXT: v_mov_b32_e32 v0, v4 204; GFX900-NEXT: v_mov_b32_e32 v1, v5 205; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 206; GFX900-NEXT: s_waitcnt vmcnt(0) 207; GFX900-NEXT: s_setpc_b64 s[30:31] 208; 209; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u: 210; GFX90A: ; %bb.0: 211; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; GFX90A-NEXT: ;;#ASMSTART 213; GFX90A-NEXT: ; def v[0:5] 214; GFX90A-NEXT: ;;#ASMEND 215; GFX90A-NEXT: v_mov_b32_e32 v6, 0 216; GFX90A-NEXT: v_mov_b32_e32 v0, v4 217; GFX90A-NEXT: v_mov_b32_e32 v1, v5 218; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 219; GFX90A-NEXT: s_waitcnt vmcnt(0) 220; GFX90A-NEXT: s_setpc_b64 s[30:31] 221; 222; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u: 223; GFX940: ; %bb.0: 224; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; GFX940-NEXT: ;;#ASMSTART 226; GFX940-NEXT: ; def v[0:5] 227; GFX940-NEXT: ;;#ASMEND 228; GFX940-NEXT: v_mov_b32_e32 v6, 0 229; GFX940-NEXT: v_mov_b32_e32 v0, v4 230; GFX940-NEXT: v_mov_b32_e32 v1, v5 231; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 232; GFX940-NEXT: s_waitcnt vmcnt(0) 233; GFX940-NEXT: s_setpc_b64 s[30:31] 234 %vec0 = call <3 x i64> asm "; def $0", "=v"() 235 %vec1 = call <3 x i64> asm "; def $0", "=v"() 236 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison> 237 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 238 ret void 239} 240 241define void @v_shuffle_v4i64_v3i64__5_0_u_u(ptr addrspace(1) inreg %ptr) { 242; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u: 243; GFX900: ; %bb.0: 244; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX900-NEXT: ;;#ASMSTART 246; GFX900-NEXT: ; def v[0:5] 247; GFX900-NEXT: ;;#ASMEND 248; GFX900-NEXT: ;;#ASMSTART 249; GFX900-NEXT: ; def v[2:7] 250; GFX900-NEXT: ;;#ASMEND 251; GFX900-NEXT: v_mov_b32_e32 v8, 0 252; GFX900-NEXT: v_mov_b32_e32 v2, v6 253; GFX900-NEXT: v_mov_b32_e32 v3, v7 254; GFX900-NEXT: v_mov_b32_e32 v4, v0 255; GFX900-NEXT: v_mov_b32_e32 v5, v1 256; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 257; GFX900-NEXT: s_waitcnt vmcnt(0) 258; GFX900-NEXT: s_setpc_b64 s[30:31] 259; 260; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u: 261; GFX90A: ; %bb.0: 262; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GFX90A-NEXT: ;;#ASMSTART 264; GFX90A-NEXT: ; def v[0:5] 265; GFX90A-NEXT: ;;#ASMEND 266; GFX90A-NEXT: ;;#ASMSTART 267; GFX90A-NEXT: ; def v[2:7] 268; GFX90A-NEXT: ;;#ASMEND 269; GFX90A-NEXT: v_mov_b32_e32 v8, 0 270; GFX90A-NEXT: v_mov_b32_e32 v2, v6 271; GFX90A-NEXT: v_mov_b32_e32 v3, v7 272; GFX90A-NEXT: v_mov_b32_e32 v4, v0 273; GFX90A-NEXT: v_mov_b32_e32 v5, v1 274; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 275; GFX90A-NEXT: s_waitcnt vmcnt(0) 276; GFX90A-NEXT: s_setpc_b64 s[30:31] 277; 278; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u: 279; GFX940: ; %bb.0: 280; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX940-NEXT: ;;#ASMSTART 282; GFX940-NEXT: ; def v[0:5] 283; GFX940-NEXT: ;;#ASMEND 284; GFX940-NEXT: v_mov_b32_e32 v8, 0 285; GFX940-NEXT: ;;#ASMSTART 286; GFX940-NEXT: ; def v[2:7] 287; GFX940-NEXT: ;;#ASMEND 288; GFX940-NEXT: s_nop 0 289; GFX940-NEXT: v_mov_b32_e32 v2, v6 290; GFX940-NEXT: v_mov_b32_e32 v3, v7 291; GFX940-NEXT: v_mov_b32_e32 v4, v0 292; GFX940-NEXT: v_mov_b32_e32 v5, v1 293; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 294; GFX940-NEXT: s_waitcnt vmcnt(0) 295; GFX940-NEXT: s_setpc_b64 s[30:31] 296 %vec0 = call <3 x i64> asm "; def $0", "=v"() 297 %vec1 = call <3 x i64> asm "; def $0", "=v"() 298 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison> 299 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 300 ret void 301} 302 303define void @v_shuffle_v4i64_v3i64__5_1_u_u(ptr addrspace(1) inreg %ptr) { 304; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u: 305; GFX900: ; %bb.0: 306; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 307; GFX900-NEXT: ;;#ASMSTART 308; GFX900-NEXT: ; def v[0:5] 309; GFX900-NEXT: ;;#ASMEND 310; GFX900-NEXT: v_mov_b32_e32 v10, 0 311; GFX900-NEXT: ;;#ASMSTART 312; GFX900-NEXT: ; def v[4:9] 313; GFX900-NEXT: ;;#ASMEND 314; GFX900-NEXT: v_mov_b32_e32 v0, v8 315; GFX900-NEXT: v_mov_b32_e32 v1, v9 316; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 317; GFX900-NEXT: s_waitcnt vmcnt(0) 318; GFX900-NEXT: s_setpc_b64 s[30:31] 319; 320; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u: 321; GFX90A: ; %bb.0: 322; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 323; GFX90A-NEXT: ;;#ASMSTART 324; GFX90A-NEXT: ; def v[0:5] 325; GFX90A-NEXT: ;;#ASMEND 326; GFX90A-NEXT: v_mov_b32_e32 v10, 0 327; GFX90A-NEXT: ;;#ASMSTART 328; GFX90A-NEXT: ; def v[4:9] 329; GFX90A-NEXT: ;;#ASMEND 330; GFX90A-NEXT: v_mov_b32_e32 v0, v8 331; GFX90A-NEXT: v_mov_b32_e32 v1, v9 332; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 333; GFX90A-NEXT: s_waitcnt vmcnt(0) 334; GFX90A-NEXT: s_setpc_b64 s[30:31] 335; 336; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u: 337; GFX940: ; %bb.0: 338; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GFX940-NEXT: ;;#ASMSTART 340; GFX940-NEXT: ; def v[0:5] 341; GFX940-NEXT: ;;#ASMEND 342; GFX940-NEXT: v_mov_b32_e32 v10, 0 343; GFX940-NEXT: ;;#ASMSTART 344; GFX940-NEXT: ; def v[4:9] 345; GFX940-NEXT: ;;#ASMEND 346; GFX940-NEXT: s_nop 0 347; GFX940-NEXT: v_mov_b32_e32 v0, v8 348; GFX940-NEXT: v_mov_b32_e32 v1, v9 349; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 350; GFX940-NEXT: s_waitcnt vmcnt(0) 351; GFX940-NEXT: s_setpc_b64 s[30:31] 352 %vec0 = call <3 x i64> asm "; def $0", "=v"() 353 %vec1 = call <3 x i64> asm "; def $0", "=v"() 354 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison> 355 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 356 ret void 357} 358 359define void @v_shuffle_v4i64_v3i64__5_2_u_u(ptr addrspace(1) inreg %ptr) { 360; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u: 361; GFX900: ; %bb.0: 362; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 363; GFX900-NEXT: ;;#ASMSTART 364; GFX900-NEXT: ; def v[0:5] 365; GFX900-NEXT: ;;#ASMEND 366; GFX900-NEXT: v_mov_b32_e32 v12, 0 367; GFX900-NEXT: ;;#ASMSTART 368; GFX900-NEXT: ; def v[6:11] 369; GFX900-NEXT: ;;#ASMEND 370; GFX900-NEXT: v_mov_b32_e32 v2, v10 371; GFX900-NEXT: v_mov_b32_e32 v3, v11 372; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 373; GFX900-NEXT: s_waitcnt vmcnt(0) 374; GFX900-NEXT: s_setpc_b64 s[30:31] 375; 376; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u: 377; GFX90A: ; %bb.0: 378; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 379; GFX90A-NEXT: ;;#ASMSTART 380; GFX90A-NEXT: ; def v[0:5] 381; GFX90A-NEXT: ;;#ASMEND 382; GFX90A-NEXT: v_mov_b32_e32 v12, 0 383; GFX90A-NEXT: ;;#ASMSTART 384; GFX90A-NEXT: ; def v[6:11] 385; GFX90A-NEXT: ;;#ASMEND 386; GFX90A-NEXT: v_mov_b32_e32 v2, v10 387; GFX90A-NEXT: v_mov_b32_e32 v3, v11 388; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 389; GFX90A-NEXT: s_waitcnt vmcnt(0) 390; GFX90A-NEXT: s_setpc_b64 s[30:31] 391; 392; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u: 393; GFX940: ; %bb.0: 394; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GFX940-NEXT: ;;#ASMSTART 396; GFX940-NEXT: ; def v[0:5] 397; GFX940-NEXT: ;;#ASMEND 398; GFX940-NEXT: v_mov_b32_e32 v12, 0 399; GFX940-NEXT: ;;#ASMSTART 400; GFX940-NEXT: ; def v[6:11] 401; GFX940-NEXT: ;;#ASMEND 402; GFX940-NEXT: s_nop 0 403; GFX940-NEXT: v_mov_b32_e32 v2, v10 404; GFX940-NEXT: v_mov_b32_e32 v3, v11 405; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 406; GFX940-NEXT: s_waitcnt vmcnt(0) 407; GFX940-NEXT: s_setpc_b64 s[30:31] 408 %vec0 = call <3 x i64> asm "; def $0", "=v"() 409 %vec1 = call <3 x i64> asm "; def $0", "=v"() 410 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison> 411 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 412 ret void 413} 414 415define void @v_shuffle_v4i64_v3i64__5_3_u_u(ptr addrspace(1) inreg %ptr) { 416; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u: 417; GFX900: ; %bb.0: 418; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 419; GFX900-NEXT: ;;#ASMSTART 420; GFX900-NEXT: ; def v[0:5] 421; GFX900-NEXT: ;;#ASMEND 422; GFX900-NEXT: v_mov_b32_e32 v6, 0 423; GFX900-NEXT: v_mov_b32_e32 v2, v4 424; GFX900-NEXT: v_mov_b32_e32 v3, v5 425; GFX900-NEXT: v_mov_b32_e32 v4, v0 426; GFX900-NEXT: v_mov_b32_e32 v5, v1 427; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 428; GFX900-NEXT: s_waitcnt vmcnt(0) 429; GFX900-NEXT: s_setpc_b64 s[30:31] 430; 431; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u: 432; GFX90A: ; %bb.0: 433; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX90A-NEXT: ;;#ASMSTART 435; GFX90A-NEXT: ; def v[0:5] 436; GFX90A-NEXT: ;;#ASMEND 437; GFX90A-NEXT: v_mov_b32_e32 v6, 0 438; GFX90A-NEXT: v_mov_b32_e32 v2, v4 439; GFX90A-NEXT: v_mov_b32_e32 v3, v5 440; GFX90A-NEXT: v_mov_b32_e32 v4, v0 441; GFX90A-NEXT: v_mov_b32_e32 v5, v1 442; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 443; GFX90A-NEXT: s_waitcnt vmcnt(0) 444; GFX90A-NEXT: s_setpc_b64 s[30:31] 445; 446; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u: 447; GFX940: ; %bb.0: 448; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 449; GFX940-NEXT: ;;#ASMSTART 450; GFX940-NEXT: ; def v[0:5] 451; GFX940-NEXT: ;;#ASMEND 452; GFX940-NEXT: v_mov_b32_e32 v6, 0 453; GFX940-NEXT: v_mov_b32_e32 v2, v4 454; GFX940-NEXT: v_mov_b32_e32 v3, v5 455; GFX940-NEXT: v_mov_b32_e32 v4, v0 456; GFX940-NEXT: v_mov_b32_e32 v5, v1 457; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 458; GFX940-NEXT: s_waitcnt vmcnt(0) 459; GFX940-NEXT: s_setpc_b64 s[30:31] 460 %vec0 = call <3 x i64> asm "; def $0", "=v"() 461 %vec1 = call <3 x i64> asm "; def $0", "=v"() 462 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison> 463 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 464 ret void 465} 466 467define void @v_shuffle_v4i64_v3i64__5_4_u_u(ptr addrspace(1) inreg %ptr) { 468; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u: 469; GFX900: ; %bb.0: 470; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 471; GFX900-NEXT: ;;#ASMSTART 472; GFX900-NEXT: ; def v[0:5] 473; GFX900-NEXT: ;;#ASMEND 474; GFX900-NEXT: v_mov_b32_e32 v6, 0 475; GFX900-NEXT: v_mov_b32_e32 v0, v4 476; GFX900-NEXT: v_mov_b32_e32 v1, v5 477; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 478; GFX900-NEXT: s_waitcnt vmcnt(0) 479; GFX900-NEXT: s_setpc_b64 s[30:31] 480; 481; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u: 482; GFX90A: ; %bb.0: 483; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 484; GFX90A-NEXT: ;;#ASMSTART 485; GFX90A-NEXT: ; def v[0:5] 486; GFX90A-NEXT: ;;#ASMEND 487; GFX90A-NEXT: v_mov_b32_e32 v6, 0 488; GFX90A-NEXT: v_mov_b32_e32 v0, v4 489; GFX90A-NEXT: v_mov_b32_e32 v1, v5 490; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 491; GFX90A-NEXT: s_waitcnt vmcnt(0) 492; GFX90A-NEXT: s_setpc_b64 s[30:31] 493; 494; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u: 495; GFX940: ; %bb.0: 496; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 497; GFX940-NEXT: ;;#ASMSTART 498; GFX940-NEXT: ; def v[0:5] 499; GFX940-NEXT: ;;#ASMEND 500; GFX940-NEXT: v_mov_b32_e32 v6, 0 501; GFX940-NEXT: v_mov_b32_e32 v0, v4 502; GFX940-NEXT: v_mov_b32_e32 v1, v5 503; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 504; GFX940-NEXT: s_waitcnt vmcnt(0) 505; GFX940-NEXT: s_setpc_b64 s[30:31] 506 %vec0 = call <3 x i64> asm "; def $0", "=v"() 507 %vec1 = call <3 x i64> asm "; def $0", "=v"() 508 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison> 509 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 510 ret void 511} 512 513define void @v_shuffle_v4i64_v3i64__5_5_u_u(ptr addrspace(1) inreg %ptr) { 514; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u: 515; GFX900: ; %bb.0: 516; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 517; GFX900-NEXT: ;;#ASMSTART 518; GFX900-NEXT: ; def v[0:5] 519; GFX900-NEXT: ;;#ASMEND 520; GFX900-NEXT: v_mov_b32_e32 v6, 0 521; GFX900-NEXT: v_mov_b32_e32 v2, v4 522; GFX900-NEXT: v_mov_b32_e32 v3, v5 523; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 524; GFX900-NEXT: s_waitcnt vmcnt(0) 525; GFX900-NEXT: s_setpc_b64 s[30:31] 526; 527; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u: 528; GFX90A: ; %bb.0: 529; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 530; GFX90A-NEXT: ;;#ASMSTART 531; GFX90A-NEXT: ; def v[0:5] 532; GFX90A-NEXT: ;;#ASMEND 533; GFX90A-NEXT: v_mov_b32_e32 v6, 0 534; GFX90A-NEXT: v_mov_b32_e32 v2, v4 535; GFX90A-NEXT: v_mov_b32_e32 v3, v5 536; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 537; GFX90A-NEXT: s_waitcnt vmcnt(0) 538; GFX90A-NEXT: s_setpc_b64 s[30:31] 539; 540; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u: 541; GFX940: ; %bb.0: 542; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 543; GFX940-NEXT: ;;#ASMSTART 544; GFX940-NEXT: ; def v[0:5] 545; GFX940-NEXT: ;;#ASMEND 546; GFX940-NEXT: v_mov_b32_e32 v6, 0 547; GFX940-NEXT: v_mov_b32_e32 v2, v4 548; GFX940-NEXT: v_mov_b32_e32 v3, v5 549; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 550; GFX940-NEXT: s_waitcnt vmcnt(0) 551; GFX940-NEXT: s_setpc_b64 s[30:31] 552 %vec0 = call <3 x i64> asm "; def $0", "=v"() 553 %vec1 = call <3 x i64> asm "; def $0", "=v"() 554 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison> 555 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 556 ret void 557} 558 559define void @v_shuffle_v4i64_v3i64__5_5_0_u(ptr addrspace(1) inreg %ptr) { 560; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u: 561; GFX900: ; %bb.0: 562; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 563; GFX900-NEXT: ;;#ASMSTART 564; GFX900-NEXT: ; def v[0:5] 565; GFX900-NEXT: ;;#ASMEND 566; GFX900-NEXT: ;;#ASMSTART 567; GFX900-NEXT: ; def v[2:7] 568; GFX900-NEXT: ;;#ASMEND 569; GFX900-NEXT: v_mov_b32_e32 v8, 0 570; GFX900-NEXT: v_mov_b32_e32 v4, v6 571; GFX900-NEXT: v_mov_b32_e32 v5, v7 572; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 573; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 574; GFX900-NEXT: s_waitcnt vmcnt(0) 575; GFX900-NEXT: s_setpc_b64 s[30:31] 576; 577; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u: 578; GFX90A: ; %bb.0: 579; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 580; GFX90A-NEXT: ;;#ASMSTART 581; GFX90A-NEXT: ; def v[0:5] 582; GFX90A-NEXT: ;;#ASMEND 583; GFX90A-NEXT: ;;#ASMSTART 584; GFX90A-NEXT: ; def v[2:7] 585; GFX90A-NEXT: ;;#ASMEND 586; GFX90A-NEXT: v_mov_b32_e32 v8, 0 587; GFX90A-NEXT: v_mov_b32_e32 v4, v6 588; GFX90A-NEXT: v_mov_b32_e32 v5, v7 589; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 590; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 591; GFX90A-NEXT: s_waitcnt vmcnt(0) 592; GFX90A-NEXT: s_setpc_b64 s[30:31] 593; 594; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u: 595; GFX940: ; %bb.0: 596; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 597; GFX940-NEXT: ;;#ASMSTART 598; GFX940-NEXT: ; def v[0:5] 599; GFX940-NEXT: ;;#ASMEND 600; GFX940-NEXT: v_mov_b32_e32 v8, 0 601; GFX940-NEXT: ;;#ASMSTART 602; GFX940-NEXT: ; def v[2:7] 603; GFX940-NEXT: ;;#ASMEND 604; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 605; GFX940-NEXT: v_mov_b32_e32 v4, v6 606; GFX940-NEXT: v_mov_b32_e32 v5, v7 607; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 608; GFX940-NEXT: s_waitcnt vmcnt(0) 609; GFX940-NEXT: s_setpc_b64 s[30:31] 610 %vec0 = call <3 x i64> asm "; def $0", "=v"() 611 %vec1 = call <3 x i64> asm "; def $0", "=v"() 612 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison> 613 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 614 ret void 615} 616 617define void @v_shuffle_v4i64_v3i64__5_5_1_u(ptr addrspace(1) inreg %ptr) { 618; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u: 619; GFX900: ; %bb.0: 620; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 621; GFX900-NEXT: ;;#ASMSTART 622; GFX900-NEXT: ; def v[0:5] 623; GFX900-NEXT: ;;#ASMEND 624; GFX900-NEXT: ;;#ASMSTART 625; GFX900-NEXT: ; def v[4:9] 626; GFX900-NEXT: ;;#ASMEND 627; GFX900-NEXT: v_mov_b32_e32 v10, 0 628; GFX900-NEXT: v_mov_b32_e32 v6, v8 629; GFX900-NEXT: v_mov_b32_e32 v7, v9 630; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 631; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 632; GFX900-NEXT: s_waitcnt vmcnt(0) 633; GFX900-NEXT: s_setpc_b64 s[30:31] 634; 635; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u: 636; GFX90A: ; %bb.0: 637; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 638; GFX90A-NEXT: ;;#ASMSTART 639; GFX90A-NEXT: ; def v[0:5] 640; GFX90A-NEXT: ;;#ASMEND 641; GFX90A-NEXT: ;;#ASMSTART 642; GFX90A-NEXT: ; def v[4:9] 643; GFX90A-NEXT: ;;#ASMEND 644; GFX90A-NEXT: v_mov_b32_e32 v10, 0 645; GFX90A-NEXT: v_mov_b32_e32 v6, v8 646; GFX90A-NEXT: v_mov_b32_e32 v7, v9 647; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 648; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 649; GFX90A-NEXT: s_waitcnt vmcnt(0) 650; GFX90A-NEXT: s_setpc_b64 s[30:31] 651; 652; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u: 653; GFX940: ; %bb.0: 654; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 655; GFX940-NEXT: ;;#ASMSTART 656; GFX940-NEXT: ; def v[0:5] 657; GFX940-NEXT: ;;#ASMEND 658; GFX940-NEXT: v_mov_b32_e32 v10, 0 659; GFX940-NEXT: ;;#ASMSTART 660; GFX940-NEXT: ; def v[4:9] 661; GFX940-NEXT: ;;#ASMEND 662; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 663; GFX940-NEXT: v_mov_b32_e32 v6, v8 664; GFX940-NEXT: v_mov_b32_e32 v7, v9 665; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 666; GFX940-NEXT: s_waitcnt vmcnt(0) 667; GFX940-NEXT: s_setpc_b64 s[30:31] 668 %vec0 = call <3 x i64> asm "; def $0", "=v"() 669 %vec1 = call <3 x i64> asm "; def $0", "=v"() 670 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 poison> 671 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 672 ret void 673} 674 675define void @v_shuffle_v4i64_v3i64__5_5_2_u(ptr addrspace(1) inreg %ptr) { 676; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u: 677; GFX900: ; %bb.0: 678; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 679; GFX900-NEXT: ;;#ASMSTART 680; GFX900-NEXT: ; def v[0:5] 681; GFX900-NEXT: ;;#ASMEND 682; GFX900-NEXT: ;;#ASMSTART 683; GFX900-NEXT: ; def v[6:11] 684; GFX900-NEXT: ;;#ASMEND 685; GFX900-NEXT: v_mov_b32_e32 v12, 0 686; GFX900-NEXT: v_mov_b32_e32 v0, v4 687; GFX900-NEXT: v_mov_b32_e32 v1, v5 688; GFX900-NEXT: v_mov_b32_e32 v8, v10 689; GFX900-NEXT: v_mov_b32_e32 v9, v11 690; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 691; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 692; GFX900-NEXT: s_waitcnt vmcnt(0) 693; GFX900-NEXT: s_setpc_b64 s[30:31] 694; 695; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u: 696; GFX90A: ; %bb.0: 697; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 698; GFX90A-NEXT: ;;#ASMSTART 699; GFX90A-NEXT: ; def v[0:5] 700; GFX90A-NEXT: ;;#ASMEND 701; GFX90A-NEXT: ;;#ASMSTART 702; GFX90A-NEXT: ; def v[6:11] 703; GFX90A-NEXT: ;;#ASMEND 704; GFX90A-NEXT: v_mov_b32_e32 v12, 0 705; GFX90A-NEXT: v_mov_b32_e32 v0, v4 706; GFX90A-NEXT: v_mov_b32_e32 v1, v5 707; GFX90A-NEXT: v_mov_b32_e32 v8, v10 708; GFX90A-NEXT: v_mov_b32_e32 v9, v11 709; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 710; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 711; GFX90A-NEXT: s_waitcnt vmcnt(0) 712; GFX90A-NEXT: s_setpc_b64 s[30:31] 713; 714; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u: 715; GFX940: ; %bb.0: 716; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 717; GFX940-NEXT: ;;#ASMSTART 718; GFX940-NEXT: ; def v[0:5] 719; GFX940-NEXT: ;;#ASMEND 720; GFX940-NEXT: ;;#ASMSTART 721; GFX940-NEXT: ; def v[6:11] 722; GFX940-NEXT: ;;#ASMEND 723; GFX940-NEXT: v_mov_b32_e32 v12, 0 724; GFX940-NEXT: v_mov_b32_e32 v0, v4 725; GFX940-NEXT: v_mov_b32_e32 v1, v5 726; GFX940-NEXT: v_mov_b32_e32 v8, v10 727; GFX940-NEXT: v_mov_b32_e32 v9, v11 728; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 729; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 730; GFX940-NEXT: s_waitcnt vmcnt(0) 731; GFX940-NEXT: s_setpc_b64 s[30:31] 732 %vec0 = call <3 x i64> asm "; def $0", "=v"() 733 %vec1 = call <3 x i64> asm "; def $0", "=v"() 734 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 poison> 735 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 736 ret void 737} 738 739define void @v_shuffle_v4i64_v3i64__5_5_3_u(ptr addrspace(1) inreg %ptr) { 740; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u: 741; GFX900: ; %bb.0: 742; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 743; GFX900-NEXT: v_mov_b32_e32 v6, 0 744; GFX900-NEXT: ;;#ASMSTART 745; GFX900-NEXT: ; def v[0:5] 746; GFX900-NEXT: ;;#ASMEND 747; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 748; GFX900-NEXT: s_nop 0 749; GFX900-NEXT: v_mov_b32_e32 v2, v4 750; GFX900-NEXT: v_mov_b32_e32 v3, v5 751; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 752; GFX900-NEXT: s_waitcnt vmcnt(0) 753; GFX900-NEXT: s_setpc_b64 s[30:31] 754; 755; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u: 756; GFX90A: ; %bb.0: 757; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 758; GFX90A-NEXT: v_mov_b32_e32 v6, 0 759; GFX90A-NEXT: ;;#ASMSTART 760; GFX90A-NEXT: ; def v[0:5] 761; GFX90A-NEXT: ;;#ASMEND 762; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 763; GFX90A-NEXT: s_nop 0 764; GFX90A-NEXT: v_mov_b32_e32 v2, v4 765; GFX90A-NEXT: v_mov_b32_e32 v3, v5 766; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 767; GFX90A-NEXT: s_waitcnt vmcnt(0) 768; GFX90A-NEXT: s_setpc_b64 s[30:31] 769; 770; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u: 771; GFX940: ; %bb.0: 772; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 773; GFX940-NEXT: v_mov_b32_e32 v6, 0 774; GFX940-NEXT: ;;#ASMSTART 775; GFX940-NEXT: ; def v[0:5] 776; GFX940-NEXT: ;;#ASMEND 777; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 778; GFX940-NEXT: s_nop 1 779; GFX940-NEXT: v_mov_b32_e32 v2, v4 780; GFX940-NEXT: v_mov_b32_e32 v3, v5 781; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 782; GFX940-NEXT: s_waitcnt vmcnt(0) 783; GFX940-NEXT: s_setpc_b64 s[30:31] 784 %vec0 = call <3 x i64> asm "; def $0", "=v"() 785 %vec1 = call <3 x i64> asm "; def $0", "=v"() 786 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison> 787 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 788 ret void 789} 790 791define void @v_shuffle_v4i64_v3i64__5_5_4_u(ptr addrspace(1) inreg %ptr) { 792; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u: 793; GFX900: ; %bb.0: 794; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 795; GFX900-NEXT: v_mov_b32_e32 v6, 0 796; GFX900-NEXT: ;;#ASMSTART 797; GFX900-NEXT: ; def v[0:5] 798; GFX900-NEXT: ;;#ASMEND 799; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 800; GFX900-NEXT: s_nop 0 801; GFX900-NEXT: v_mov_b32_e32 v2, v4 802; GFX900-NEXT: v_mov_b32_e32 v3, v5 803; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 804; GFX900-NEXT: s_waitcnt vmcnt(0) 805; GFX900-NEXT: s_setpc_b64 s[30:31] 806; 807; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u: 808; GFX90A: ; %bb.0: 809; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 810; GFX90A-NEXT: v_mov_b32_e32 v6, 0 811; GFX90A-NEXT: ;;#ASMSTART 812; GFX90A-NEXT: ; def v[0:5] 813; GFX90A-NEXT: ;;#ASMEND 814; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 815; GFX90A-NEXT: s_nop 0 816; GFX90A-NEXT: v_mov_b32_e32 v2, v4 817; GFX90A-NEXT: v_mov_b32_e32 v3, v5 818; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 819; GFX90A-NEXT: s_waitcnt vmcnt(0) 820; GFX90A-NEXT: s_setpc_b64 s[30:31] 821; 822; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u: 823; GFX940: ; %bb.0: 824; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 825; GFX940-NEXT: v_mov_b32_e32 v6, 0 826; GFX940-NEXT: ;;#ASMSTART 827; GFX940-NEXT: ; def v[0:5] 828; GFX940-NEXT: ;;#ASMEND 829; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 830; GFX940-NEXT: s_nop 1 831; GFX940-NEXT: v_mov_b32_e32 v2, v4 832; GFX940-NEXT: v_mov_b32_e32 v3, v5 833; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 834; GFX940-NEXT: s_waitcnt vmcnt(0) 835; GFX940-NEXT: s_setpc_b64 s[30:31] 836 %vec0 = call <3 x i64> asm "; def $0", "=v"() 837 %vec1 = call <3 x i64> asm "; def $0", "=v"() 838 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison> 839 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 840 ret void 841} 842 843define void @v_shuffle_v4i64_v3i64__5_5_5_u(ptr addrspace(1) inreg %ptr) { 844; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u: 845; GFX900: ; %bb.0: 846; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 847; GFX900-NEXT: ;;#ASMSTART 848; GFX900-NEXT: ; def v[0:5] 849; GFX900-NEXT: ;;#ASMEND 850; GFX900-NEXT: v_mov_b32_e32 v6, 0 851; GFX900-NEXT: v_mov_b32_e32 v0, v4 852; GFX900-NEXT: v_mov_b32_e32 v1, v5 853; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 854; GFX900-NEXT: s_nop 0 855; GFX900-NEXT: v_mov_b32_e32 v2, v4 856; GFX900-NEXT: v_mov_b32_e32 v3, v5 857; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 858; GFX900-NEXT: s_waitcnt vmcnt(0) 859; GFX900-NEXT: s_setpc_b64 s[30:31] 860; 861; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u: 862; GFX90A: ; %bb.0: 863; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 864; GFX90A-NEXT: ;;#ASMSTART 865; GFX90A-NEXT: ; def v[0:5] 866; GFX90A-NEXT: ;;#ASMEND 867; GFX90A-NEXT: v_mov_b32_e32 v6, 0 868; GFX90A-NEXT: v_mov_b32_e32 v0, v4 869; GFX90A-NEXT: v_mov_b32_e32 v1, v5 870; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 871; GFX90A-NEXT: s_nop 0 872; GFX90A-NEXT: v_mov_b32_e32 v2, v4 873; GFX90A-NEXT: v_mov_b32_e32 v3, v5 874; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 875; GFX90A-NEXT: s_waitcnt vmcnt(0) 876; GFX90A-NEXT: s_setpc_b64 s[30:31] 877; 878; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u: 879; GFX940: ; %bb.0: 880; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 881; GFX940-NEXT: ;;#ASMSTART 882; GFX940-NEXT: ; def v[0:5] 883; GFX940-NEXT: ;;#ASMEND 884; GFX940-NEXT: v_mov_b32_e32 v6, 0 885; GFX940-NEXT: v_mov_b32_e32 v0, v4 886; GFX940-NEXT: v_mov_b32_e32 v1, v5 887; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 888; GFX940-NEXT: s_nop 1 889; GFX940-NEXT: v_mov_b32_e32 v2, v4 890; GFX940-NEXT: v_mov_b32_e32 v3, v5 891; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 892; GFX940-NEXT: s_waitcnt vmcnt(0) 893; GFX940-NEXT: s_setpc_b64 s[30:31] 894 %vec0 = call <3 x i64> asm "; def $0", "=v"() 895 %vec1 = call <3 x i64> asm "; def $0", "=v"() 896 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison> 897 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 898 ret void 899} 900 901define void @v_shuffle_v4i64_v3i64__5_5_5_0(ptr addrspace(1) inreg %ptr) { 902; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0: 903; GFX900: ; %bb.0: 904; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 905; GFX900-NEXT: ;;#ASMSTART 906; GFX900-NEXT: ; def v[0:5] 907; GFX900-NEXT: ;;#ASMEND 908; GFX900-NEXT: ;;#ASMSTART 909; GFX900-NEXT: ; def v[2:7] 910; GFX900-NEXT: ;;#ASMEND 911; GFX900-NEXT: v_mov_b32_e32 v8, 0 912; GFX900-NEXT: v_mov_b32_e32 v2, v6 913; GFX900-NEXT: v_mov_b32_e32 v3, v7 914; GFX900-NEXT: v_mov_b32_e32 v4, v0 915; GFX900-NEXT: v_mov_b32_e32 v5, v1 916; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16 917; GFX900-NEXT: s_nop 0 918; GFX900-NEXT: v_mov_b32_e32 v4, v6 919; GFX900-NEXT: v_mov_b32_e32 v5, v7 920; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 921; GFX900-NEXT: s_waitcnt vmcnt(0) 922; GFX900-NEXT: s_setpc_b64 s[30:31] 923; 924; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0: 925; GFX90A: ; %bb.0: 926; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 927; GFX90A-NEXT: ;;#ASMSTART 928; GFX90A-NEXT: ; def v[0:5] 929; GFX90A-NEXT: ;;#ASMEND 930; GFX90A-NEXT: ;;#ASMSTART 931; GFX90A-NEXT: ; def v[2:7] 932; GFX90A-NEXT: ;;#ASMEND 933; GFX90A-NEXT: v_mov_b32_e32 v8, 0 934; GFX90A-NEXT: v_mov_b32_e32 v2, v6 935; GFX90A-NEXT: v_mov_b32_e32 v3, v7 936; GFX90A-NEXT: v_mov_b32_e32 v4, v0 937; GFX90A-NEXT: v_mov_b32_e32 v5, v1 938; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16 939; GFX90A-NEXT: s_nop 0 940; GFX90A-NEXT: v_mov_b32_e32 v4, v6 941; GFX90A-NEXT: v_mov_b32_e32 v5, v7 942; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 943; GFX90A-NEXT: s_waitcnt vmcnt(0) 944; GFX90A-NEXT: s_setpc_b64 s[30:31] 945; 946; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0: 947; GFX940: ; %bb.0: 948; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 949; GFX940-NEXT: ;;#ASMSTART 950; GFX940-NEXT: ; def v[0:5] 951; GFX940-NEXT: ;;#ASMEND 952; GFX940-NEXT: v_mov_b32_e32 v8, 0 953; GFX940-NEXT: ;;#ASMSTART 954; GFX940-NEXT: ; def v[2:7] 955; GFX940-NEXT: ;;#ASMEND 956; GFX940-NEXT: s_nop 0 957; GFX940-NEXT: v_mov_b32_e32 v2, v6 958; GFX940-NEXT: v_mov_b32_e32 v3, v7 959; GFX940-NEXT: v_mov_b32_e32 v4, v0 960; GFX940-NEXT: v_mov_b32_e32 v5, v1 961; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 962; GFX940-NEXT: s_nop 1 963; GFX940-NEXT: v_mov_b32_e32 v4, v6 964; GFX940-NEXT: v_mov_b32_e32 v5, v7 965; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 966; GFX940-NEXT: s_waitcnt vmcnt(0) 967; GFX940-NEXT: s_setpc_b64 s[30:31] 968 %vec0 = call <3 x i64> asm "; def $0", "=v"() 969 %vec1 = call <3 x i64> asm "; def $0", "=v"() 970 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 0> 971 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 972 ret void 973} 974 975define void @v_shuffle_v4i64_v3i64__5_5_5_1(ptr addrspace(1) inreg %ptr) { 976; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1: 977; GFX900: ; %bb.0: 978; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 979; GFX900-NEXT: ;;#ASMSTART 980; GFX900-NEXT: ; def v[0:5] 981; GFX900-NEXT: ;;#ASMEND 982; GFX900-NEXT: ;;#ASMSTART 983; GFX900-NEXT: ; def v[4:9] 984; GFX900-NEXT: ;;#ASMEND 985; GFX900-NEXT: v_mov_b32_e32 v10, 0 986; GFX900-NEXT: v_mov_b32_e32 v0, v8 987; GFX900-NEXT: v_mov_b32_e32 v1, v9 988; GFX900-NEXT: v_mov_b32_e32 v6, v8 989; GFX900-NEXT: v_mov_b32_e32 v7, v9 990; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16 991; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 992; GFX900-NEXT: s_waitcnt vmcnt(0) 993; GFX900-NEXT: s_setpc_b64 s[30:31] 994; 995; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1: 996; GFX90A: ; %bb.0: 997; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 998; GFX90A-NEXT: ;;#ASMSTART 999; GFX90A-NEXT: ; def v[0:5] 1000; GFX90A-NEXT: ;;#ASMEND 1001; GFX90A-NEXT: ;;#ASMSTART 1002; GFX90A-NEXT: ; def v[4:9] 1003; GFX90A-NEXT: ;;#ASMEND 1004; GFX90A-NEXT: v_mov_b32_e32 v10, 0 1005; GFX90A-NEXT: v_mov_b32_e32 v0, v8 1006; GFX90A-NEXT: v_mov_b32_e32 v1, v9 1007; GFX90A-NEXT: v_mov_b32_e32 v6, v8 1008; GFX90A-NEXT: v_mov_b32_e32 v7, v9 1009; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16 1010; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 1011; GFX90A-NEXT: s_waitcnt vmcnt(0) 1012; GFX90A-NEXT: s_setpc_b64 s[30:31] 1013; 1014; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1: 1015; GFX940: ; %bb.0: 1016; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1017; GFX940-NEXT: ;;#ASMSTART 1018; GFX940-NEXT: ; def v[0:5] 1019; GFX940-NEXT: ;;#ASMEND 1020; GFX940-NEXT: v_mov_b32_e32 v10, 0 1021; GFX940-NEXT: ;;#ASMSTART 1022; GFX940-NEXT: ; def v[4:9] 1023; GFX940-NEXT: ;;#ASMEND 1024; GFX940-NEXT: s_nop 0 1025; GFX940-NEXT: v_mov_b32_e32 v0, v8 1026; GFX940-NEXT: v_mov_b32_e32 v1, v9 1027; GFX940-NEXT: v_mov_b32_e32 v6, v8 1028; GFX940-NEXT: v_mov_b32_e32 v7, v9 1029; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 1030; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 1031; GFX940-NEXT: s_waitcnt vmcnt(0) 1032; GFX940-NEXT: s_setpc_b64 s[30:31] 1033 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1034 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1035 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1> 1036 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1037 ret void 1038} 1039 1040define void @v_shuffle_v4i64_v3i64__5_5_5_2(ptr addrspace(1) inreg %ptr) { 1041; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2: 1042; GFX900: ; %bb.0: 1043; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1044; GFX900-NEXT: ;;#ASMSTART 1045; GFX900-NEXT: ; def v[0:5] 1046; GFX900-NEXT: ;;#ASMEND 1047; GFX900-NEXT: ;;#ASMSTART 1048; GFX900-NEXT: ; def v[6:11] 1049; GFX900-NEXT: ;;#ASMEND 1050; GFX900-NEXT: v_mov_b32_e32 v12, 0 1051; GFX900-NEXT: v_mov_b32_e32 v2, v10 1052; GFX900-NEXT: v_mov_b32_e32 v3, v11 1053; GFX900-NEXT: v_mov_b32_e32 v8, v10 1054; GFX900-NEXT: v_mov_b32_e32 v9, v11 1055; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 1056; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 1057; GFX900-NEXT: s_waitcnt vmcnt(0) 1058; GFX900-NEXT: s_setpc_b64 s[30:31] 1059; 1060; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2: 1061; GFX90A: ; %bb.0: 1062; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1063; GFX90A-NEXT: ;;#ASMSTART 1064; GFX90A-NEXT: ; def v[0:5] 1065; GFX90A-NEXT: ;;#ASMEND 1066; GFX90A-NEXT: ;;#ASMSTART 1067; GFX90A-NEXT: ; def v[6:11] 1068; GFX90A-NEXT: ;;#ASMEND 1069; GFX90A-NEXT: v_mov_b32_e32 v12, 0 1070; GFX90A-NEXT: v_mov_b32_e32 v2, v10 1071; GFX90A-NEXT: v_mov_b32_e32 v3, v11 1072; GFX90A-NEXT: v_mov_b32_e32 v8, v10 1073; GFX90A-NEXT: v_mov_b32_e32 v9, v11 1074; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 1075; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 1076; GFX90A-NEXT: s_waitcnt vmcnt(0) 1077; GFX90A-NEXT: s_setpc_b64 s[30:31] 1078; 1079; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2: 1080; GFX940: ; %bb.0: 1081; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1082; GFX940-NEXT: ;;#ASMSTART 1083; GFX940-NEXT: ; def v[0:5] 1084; GFX940-NEXT: ;;#ASMEND 1085; GFX940-NEXT: ;;#ASMSTART 1086; GFX940-NEXT: ; def v[6:11] 1087; GFX940-NEXT: ;;#ASMEND 1088; GFX940-NEXT: v_mov_b32_e32 v12, 0 1089; GFX940-NEXT: v_mov_b32_e32 v2, v10 1090; GFX940-NEXT: v_mov_b32_e32 v3, v11 1091; GFX940-NEXT: v_mov_b32_e32 v8, v10 1092; GFX940-NEXT: v_mov_b32_e32 v9, v11 1093; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 1094; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 1095; GFX940-NEXT: s_waitcnt vmcnt(0) 1096; GFX940-NEXT: s_setpc_b64 s[30:31] 1097 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1098 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1099 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2> 1100 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1101 ret void 1102} 1103 1104define void @v_shuffle_v4i64_v3i64__5_5_5_3(ptr addrspace(1) inreg %ptr) { 1105; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3: 1106; GFX900: ; %bb.0: 1107; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1108; GFX900-NEXT: ;;#ASMSTART 1109; GFX900-NEXT: ; def v[0:5] 1110; GFX900-NEXT: ;;#ASMEND 1111; GFX900-NEXT: v_mov_b32_e32 v10, 0 1112; GFX900-NEXT: v_mov_b32_e32 v6, v4 1113; GFX900-NEXT: v_mov_b32_e32 v7, v5 1114; GFX900-NEXT: v_mov_b32_e32 v8, v0 1115; GFX900-NEXT: v_mov_b32_e32 v9, v1 1116; GFX900-NEXT: v_mov_b32_e32 v2, v4 1117; GFX900-NEXT: v_mov_b32_e32 v3, v5 1118; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 1119; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 1120; GFX900-NEXT: s_waitcnt vmcnt(0) 1121; GFX900-NEXT: s_setpc_b64 s[30:31] 1122; 1123; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3: 1124; GFX90A: ; %bb.0: 1125; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1126; GFX90A-NEXT: ;;#ASMSTART 1127; GFX90A-NEXT: ; def v[0:5] 1128; GFX90A-NEXT: ;;#ASMEND 1129; GFX90A-NEXT: v_mov_b32_e32 v10, 0 1130; GFX90A-NEXT: v_mov_b32_e32 v6, v4 1131; GFX90A-NEXT: v_mov_b32_e32 v7, v5 1132; GFX90A-NEXT: v_mov_b32_e32 v8, v0 1133; GFX90A-NEXT: v_mov_b32_e32 v9, v1 1134; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1135; GFX90A-NEXT: v_mov_b32_e32 v3, v5 1136; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 1137; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 1138; GFX90A-NEXT: s_waitcnt vmcnt(0) 1139; GFX90A-NEXT: s_setpc_b64 s[30:31] 1140; 1141; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3: 1142; GFX940: ; %bb.0: 1143; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1144; GFX940-NEXT: ;;#ASMSTART 1145; GFX940-NEXT: ; def v[0:5] 1146; GFX940-NEXT: ;;#ASMEND 1147; GFX940-NEXT: v_mov_b32_e32 v10, 0 1148; GFX940-NEXT: v_mov_b32_e32 v6, v4 1149; GFX940-NEXT: v_mov_b32_e32 v7, v5 1150; GFX940-NEXT: v_mov_b32_e32 v8, v0 1151; GFX940-NEXT: v_mov_b32_e32 v9, v1 1152; GFX940-NEXT: v_mov_b32_e32 v2, v4 1153; GFX940-NEXT: v_mov_b32_e32 v3, v5 1154; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 1155; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 1156; GFX940-NEXT: s_waitcnt vmcnt(0) 1157; GFX940-NEXT: s_setpc_b64 s[30:31] 1158 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1159 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1160 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3> 1161 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1162 ret void 1163} 1164 1165define void @v_shuffle_v4i64_v3i64__5_5_5_4(ptr addrspace(1) inreg %ptr) { 1166; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4: 1167; GFX900: ; %bb.0: 1168; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1169; GFX900-NEXT: ;;#ASMSTART 1170; GFX900-NEXT: ; def v[0:5] 1171; GFX900-NEXT: ;;#ASMEND 1172; GFX900-NEXT: v_mov_b32_e32 v6, 0 1173; GFX900-NEXT: v_mov_b32_e32 v0, v4 1174; GFX900-NEXT: v_mov_b32_e32 v1, v5 1175; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 1176; GFX900-NEXT: s_nop 0 1177; GFX900-NEXT: v_mov_b32_e32 v2, v4 1178; GFX900-NEXT: v_mov_b32_e32 v3, v5 1179; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 1180; GFX900-NEXT: s_waitcnt vmcnt(0) 1181; GFX900-NEXT: s_setpc_b64 s[30:31] 1182; 1183; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4: 1184; GFX90A: ; %bb.0: 1185; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1186; GFX90A-NEXT: ;;#ASMSTART 1187; GFX90A-NEXT: ; def v[0:5] 1188; GFX90A-NEXT: ;;#ASMEND 1189; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1190; GFX90A-NEXT: v_mov_b32_e32 v0, v4 1191; GFX90A-NEXT: v_mov_b32_e32 v1, v5 1192; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 1193; GFX90A-NEXT: s_nop 0 1194; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1195; GFX90A-NEXT: v_mov_b32_e32 v3, v5 1196; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 1197; GFX90A-NEXT: s_waitcnt vmcnt(0) 1198; GFX90A-NEXT: s_setpc_b64 s[30:31] 1199; 1200; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4: 1201; GFX940: ; %bb.0: 1202; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1203; GFX940-NEXT: ;;#ASMSTART 1204; GFX940-NEXT: ; def v[0:5] 1205; GFX940-NEXT: ;;#ASMEND 1206; GFX940-NEXT: v_mov_b32_e32 v6, 0 1207; GFX940-NEXT: v_mov_b32_e32 v0, v4 1208; GFX940-NEXT: v_mov_b32_e32 v1, v5 1209; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 1210; GFX940-NEXT: s_nop 1 1211; GFX940-NEXT: v_mov_b32_e32 v2, v4 1212; GFX940-NEXT: v_mov_b32_e32 v3, v5 1213; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 1214; GFX940-NEXT: s_waitcnt vmcnt(0) 1215; GFX940-NEXT: s_setpc_b64 s[30:31] 1216 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1217 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1218 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4> 1219 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1220 ret void 1221} 1222 1223define void @v_shuffle_v4i64_v3i64__5_5_5_5(ptr addrspace(1) inreg %ptr) { 1224; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5: 1225; GFX900: ; %bb.0: 1226; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1227; GFX900-NEXT: ;;#ASMSTART 1228; GFX900-NEXT: ; def v[0:5] 1229; GFX900-NEXT: ;;#ASMEND 1230; GFX900-NEXT: v_mov_b32_e32 v6, 0 1231; GFX900-NEXT: v_mov_b32_e32 v2, v4 1232; GFX900-NEXT: v_mov_b32_e32 v3, v5 1233; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 1234; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 1235; GFX900-NEXT: s_waitcnt vmcnt(0) 1236; GFX900-NEXT: s_setpc_b64 s[30:31] 1237; 1238; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5: 1239; GFX90A: ; %bb.0: 1240; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1241; GFX90A-NEXT: ;;#ASMSTART 1242; GFX90A-NEXT: ; def v[0:5] 1243; GFX90A-NEXT: ;;#ASMEND 1244; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1245; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1246; GFX90A-NEXT: v_mov_b32_e32 v3, v5 1247; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 1248; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 1249; GFX90A-NEXT: s_waitcnt vmcnt(0) 1250; GFX90A-NEXT: s_setpc_b64 s[30:31] 1251; 1252; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5: 1253; GFX940: ; %bb.0: 1254; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1255; GFX940-NEXT: ;;#ASMSTART 1256; GFX940-NEXT: ; def v[0:5] 1257; GFX940-NEXT: ;;#ASMEND 1258; GFX940-NEXT: v_mov_b32_e32 v6, 0 1259; GFX940-NEXT: v_mov_b32_e32 v2, v4 1260; GFX940-NEXT: v_mov_b32_e32 v3, v5 1261; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 1262; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 1263; GFX940-NEXT: s_waitcnt vmcnt(0) 1264; GFX940-NEXT: s_setpc_b64 s[30:31] 1265 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1266 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1267 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5> 1268 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1269 ret void 1270} 1271 1272define void @v_shuffle_v4i64_v3i64__u_0_0_0(ptr addrspace(1) inreg %ptr) { 1273; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_0_0_0: 1274; GFX900: ; %bb.0: 1275; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1276; GFX900-NEXT: ;;#ASMSTART 1277; GFX900-NEXT: ; def v[0:5] 1278; GFX900-NEXT: ;;#ASMEND 1279; GFX900-NEXT: v_mov_b32_e32 v6, 0 1280; GFX900-NEXT: v_mov_b32_e32 v2, v0 1281; GFX900-NEXT: v_mov_b32_e32 v3, v1 1282; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 1283; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1284; GFX900-NEXT: s_waitcnt vmcnt(0) 1285; GFX900-NEXT: s_setpc_b64 s[30:31] 1286; 1287; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_0_0_0: 1288; GFX90A: ; %bb.0: 1289; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1290; GFX90A-NEXT: ;;#ASMSTART 1291; GFX90A-NEXT: ; def v[0:5] 1292; GFX90A-NEXT: ;;#ASMEND 1293; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1294; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1295; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1296; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 1297; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1298; GFX90A-NEXT: s_waitcnt vmcnt(0) 1299; GFX90A-NEXT: s_setpc_b64 s[30:31] 1300; 1301; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_0_0_0: 1302; GFX940: ; %bb.0: 1303; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1304; GFX940-NEXT: ;;#ASMSTART 1305; GFX940-NEXT: ; def v[0:5] 1306; GFX940-NEXT: ;;#ASMEND 1307; GFX940-NEXT: v_mov_b32_e32 v6, 0 1308; GFX940-NEXT: v_mov_b32_e32 v2, v0 1309; GFX940-NEXT: v_mov_b32_e32 v3, v1 1310; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 1311; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 1312; GFX940-NEXT: s_waitcnt vmcnt(0) 1313; GFX940-NEXT: s_setpc_b64 s[30:31] 1314 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1315 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0> 1316 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1317 ret void 1318} 1319 1320define void @v_shuffle_v4i64_v3i64__0_0_0_0(ptr addrspace(1) inreg %ptr) { 1321; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_0_0_0: 1322; GFX900: ; %bb.0: 1323; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1324; GFX900-NEXT: ;;#ASMSTART 1325; GFX900-NEXT: ; def v[0:5] 1326; GFX900-NEXT: ;;#ASMEND 1327; GFX900-NEXT: v_mov_b32_e32 v6, 0 1328; GFX900-NEXT: v_mov_b32_e32 v2, v0 1329; GFX900-NEXT: v_mov_b32_e32 v3, v1 1330; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 1331; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1332; GFX900-NEXT: s_waitcnt vmcnt(0) 1333; GFX900-NEXT: s_setpc_b64 s[30:31] 1334; 1335; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_0_0_0: 1336; GFX90A: ; %bb.0: 1337; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1338; GFX90A-NEXT: ;;#ASMSTART 1339; GFX90A-NEXT: ; def v[0:5] 1340; GFX90A-NEXT: ;;#ASMEND 1341; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1342; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1343; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1344; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 1345; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1346; GFX90A-NEXT: s_waitcnt vmcnt(0) 1347; GFX90A-NEXT: s_setpc_b64 s[30:31] 1348; 1349; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_0_0_0: 1350; GFX940: ; %bb.0: 1351; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1352; GFX940-NEXT: ;;#ASMSTART 1353; GFX940-NEXT: ; def v[0:5] 1354; GFX940-NEXT: ;;#ASMEND 1355; GFX940-NEXT: v_mov_b32_e32 v6, 0 1356; GFX940-NEXT: v_mov_b32_e32 v2, v0 1357; GFX940-NEXT: v_mov_b32_e32 v3, v1 1358; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 1359; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 1360; GFX940-NEXT: s_waitcnt vmcnt(0) 1361; GFX940-NEXT: s_setpc_b64 s[30:31] 1362 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1363 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> zeroinitializer 1364 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1365 ret void 1366} 1367 1368define void @v_shuffle_v4i64_v3i64__1_0_0_0(ptr addrspace(1) inreg %ptr) { 1369; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_0_0_0: 1370; GFX900: ; %bb.0: 1371; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1372; GFX900-NEXT: ;;#ASMSTART 1373; GFX900-NEXT: ; def v[0:5] 1374; GFX900-NEXT: ;;#ASMEND 1375; GFX900-NEXT: v_mov_b32_e32 v8, 0 1376; GFX900-NEXT: v_mov_b32_e32 v4, v0 1377; GFX900-NEXT: v_mov_b32_e32 v5, v1 1378; GFX900-NEXT: v_mov_b32_e32 v6, v0 1379; GFX900-NEXT: v_mov_b32_e32 v7, v1 1380; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16 1381; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 1382; GFX900-NEXT: s_waitcnt vmcnt(0) 1383; GFX900-NEXT: s_setpc_b64 s[30:31] 1384; 1385; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_0_0_0: 1386; GFX90A: ; %bb.0: 1387; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1388; GFX90A-NEXT: ;;#ASMSTART 1389; GFX90A-NEXT: ; def v[0:5] 1390; GFX90A-NEXT: ;;#ASMEND 1391; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1392; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1393; GFX90A-NEXT: v_mov_b32_e32 v5, v1 1394; GFX90A-NEXT: v_mov_b32_e32 v6, v0 1395; GFX90A-NEXT: v_mov_b32_e32 v7, v1 1396; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16 1397; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 1398; GFX90A-NEXT: s_waitcnt vmcnt(0) 1399; GFX90A-NEXT: s_setpc_b64 s[30:31] 1400; 1401; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_0_0_0: 1402; GFX940: ; %bb.0: 1403; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1404; GFX940-NEXT: ;;#ASMSTART 1405; GFX940-NEXT: ; def v[0:5] 1406; GFX940-NEXT: ;;#ASMEND 1407; GFX940-NEXT: v_mov_b32_e32 v8, 0 1408; GFX940-NEXT: v_mov_b32_e32 v4, v0 1409; GFX940-NEXT: v_mov_b32_e32 v5, v1 1410; GFX940-NEXT: v_mov_b32_e32 v6, v0 1411; GFX940-NEXT: v_mov_b32_e32 v7, v1 1412; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 1413; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 1414; GFX940-NEXT: s_waitcnt vmcnt(0) 1415; GFX940-NEXT: s_setpc_b64 s[30:31] 1416 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1417 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 1418 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1419 ret void 1420} 1421 1422define void @v_shuffle_v4i64_v3i64__2_0_0_0(ptr addrspace(1) inreg %ptr) { 1423; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_0_0_0: 1424; GFX900: ; %bb.0: 1425; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1426; GFX900-NEXT: ;;#ASMSTART 1427; GFX900-NEXT: ; def v[0:5] 1428; GFX900-NEXT: ;;#ASMEND 1429; GFX900-NEXT: v_mov_b32_e32 v6, 0 1430; GFX900-NEXT: v_mov_b32_e32 v2, v0 1431; GFX900-NEXT: v_mov_b32_e32 v3, v1 1432; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 1433; GFX900-NEXT: s_nop 0 1434; GFX900-NEXT: v_mov_b32_e32 v2, v4 1435; GFX900-NEXT: v_mov_b32_e32 v3, v5 1436; GFX900-NEXT: v_mov_b32_e32 v4, v0 1437; GFX900-NEXT: v_mov_b32_e32 v5, v1 1438; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 1439; GFX900-NEXT: s_waitcnt vmcnt(0) 1440; GFX900-NEXT: s_setpc_b64 s[30:31] 1441; 1442; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_0_0_0: 1443; GFX90A: ; %bb.0: 1444; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1445; GFX90A-NEXT: ;;#ASMSTART 1446; GFX90A-NEXT: ; def v[0:5] 1447; GFX90A-NEXT: ;;#ASMEND 1448; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1449; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1450; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1451; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 1452; GFX90A-NEXT: s_nop 0 1453; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1454; GFX90A-NEXT: v_mov_b32_e32 v3, v5 1455; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1456; GFX90A-NEXT: v_mov_b32_e32 v5, v1 1457; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 1458; GFX90A-NEXT: s_waitcnt vmcnt(0) 1459; GFX90A-NEXT: s_setpc_b64 s[30:31] 1460; 1461; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_0_0_0: 1462; GFX940: ; %bb.0: 1463; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1464; GFX940-NEXT: ;;#ASMSTART 1465; GFX940-NEXT: ; def v[0:5] 1466; GFX940-NEXT: ;;#ASMEND 1467; GFX940-NEXT: v_mov_b32_e32 v6, 0 1468; GFX940-NEXT: v_mov_b32_e32 v2, v0 1469; GFX940-NEXT: v_mov_b32_e32 v3, v1 1470; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 1471; GFX940-NEXT: s_nop 1 1472; GFX940-NEXT: v_mov_b32_e32 v2, v4 1473; GFX940-NEXT: v_mov_b32_e32 v3, v5 1474; GFX940-NEXT: v_mov_b32_e32 v4, v0 1475; GFX940-NEXT: v_mov_b32_e32 v5, v1 1476; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 1477; GFX940-NEXT: s_waitcnt vmcnt(0) 1478; GFX940-NEXT: s_setpc_b64 s[30:31] 1479 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1480 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0> 1481 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1482 ret void 1483} 1484 1485define void @v_shuffle_v4i64_v3i64__3_0_0_0(ptr addrspace(1) inreg %ptr) { 1486; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_0_0_0: 1487; GFX900: ; %bb.0: 1488; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1489; GFX900-NEXT: ;;#ASMSTART 1490; GFX900-NEXT: ; def v[0:5] 1491; GFX900-NEXT: ;;#ASMEND 1492; GFX900-NEXT: v_mov_b32_e32 v6, 0 1493; GFX900-NEXT: v_mov_b32_e32 v2, v0 1494; GFX900-NEXT: v_mov_b32_e32 v3, v1 1495; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 1496; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1497; GFX900-NEXT: s_waitcnt vmcnt(0) 1498; GFX900-NEXT: s_setpc_b64 s[30:31] 1499; 1500; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_0_0_0: 1501; GFX90A: ; %bb.0: 1502; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1503; GFX90A-NEXT: ;;#ASMSTART 1504; GFX90A-NEXT: ; def v[0:5] 1505; GFX90A-NEXT: ;;#ASMEND 1506; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1507; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1508; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1509; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 1510; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1511; GFX90A-NEXT: s_waitcnt vmcnt(0) 1512; GFX90A-NEXT: s_setpc_b64 s[30:31] 1513; 1514; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_0_0_0: 1515; GFX940: ; %bb.0: 1516; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1517; GFX940-NEXT: ;;#ASMSTART 1518; GFX940-NEXT: ; def v[0:5] 1519; GFX940-NEXT: ;;#ASMEND 1520; GFX940-NEXT: v_mov_b32_e32 v6, 0 1521; GFX940-NEXT: v_mov_b32_e32 v2, v0 1522; GFX940-NEXT: v_mov_b32_e32 v3, v1 1523; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 1524; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 1525; GFX940-NEXT: s_waitcnt vmcnt(0) 1526; GFX940-NEXT: s_setpc_b64 s[30:31] 1527 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1528 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 1529 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1530 ret void 1531} 1532 1533define void @v_shuffle_v4i64_v3i64__4_0_0_0(ptr addrspace(1) inreg %ptr) { 1534; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_0_0_0: 1535; GFX900: ; %bb.0: 1536; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1537; GFX900-NEXT: ;;#ASMSTART 1538; GFX900-NEXT: ; def v[0:5] 1539; GFX900-NEXT: ;;#ASMEND 1540; GFX900-NEXT: ;;#ASMSTART 1541; GFX900-NEXT: ; def v[2:7] 1542; GFX900-NEXT: ;;#ASMEND 1543; GFX900-NEXT: v_mov_b32_e32 v8, 0 1544; GFX900-NEXT: v_mov_b32_e32 v2, v0 1545; GFX900-NEXT: v_mov_b32_e32 v3, v1 1546; GFX900-NEXT: v_mov_b32_e32 v6, v0 1547; GFX900-NEXT: v_mov_b32_e32 v7, v1 1548; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 1549; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 1550; GFX900-NEXT: s_waitcnt vmcnt(0) 1551; GFX900-NEXT: s_setpc_b64 s[30:31] 1552; 1553; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_0_0_0: 1554; GFX90A: ; %bb.0: 1555; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1556; GFX90A-NEXT: ;;#ASMSTART 1557; GFX90A-NEXT: ; def v[0:5] 1558; GFX90A-NEXT: ;;#ASMEND 1559; GFX90A-NEXT: ;;#ASMSTART 1560; GFX90A-NEXT: ; def v[2:7] 1561; GFX90A-NEXT: ;;#ASMEND 1562; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1563; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1564; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1565; GFX90A-NEXT: v_mov_b32_e32 v6, v0 1566; GFX90A-NEXT: v_mov_b32_e32 v7, v1 1567; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 1568; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 1569; GFX90A-NEXT: s_waitcnt vmcnt(0) 1570; GFX90A-NEXT: s_setpc_b64 s[30:31] 1571; 1572; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_0_0_0: 1573; GFX940: ; %bb.0: 1574; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1575; GFX940-NEXT: ;;#ASMSTART 1576; GFX940-NEXT: ; def v[0:5] 1577; GFX940-NEXT: ;;#ASMEND 1578; GFX940-NEXT: v_mov_b32_e32 v8, 0 1579; GFX940-NEXT: ;;#ASMSTART 1580; GFX940-NEXT: ; def v[2:7] 1581; GFX940-NEXT: ;;#ASMEND 1582; GFX940-NEXT: s_nop 0 1583; GFX940-NEXT: v_mov_b32_e32 v2, v0 1584; GFX940-NEXT: v_mov_b32_e32 v3, v1 1585; GFX940-NEXT: v_mov_b32_e32 v6, v0 1586; GFX940-NEXT: v_mov_b32_e32 v7, v1 1587; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 1588; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 1589; GFX940-NEXT: s_waitcnt vmcnt(0) 1590; GFX940-NEXT: s_setpc_b64 s[30:31] 1591 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1592 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1593 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0> 1594 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1595 ret void 1596} 1597 1598define void @v_shuffle_v4i64_v3i64__5_0_0_0(ptr addrspace(1) inreg %ptr) { 1599; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_0_0: 1600; GFX900: ; %bb.0: 1601; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1602; GFX900-NEXT: ;;#ASMSTART 1603; GFX900-NEXT: ; def v[0:5] 1604; GFX900-NEXT: ;;#ASMEND 1605; GFX900-NEXT: ;;#ASMSTART 1606; GFX900-NEXT: ; def v[2:7] 1607; GFX900-NEXT: ;;#ASMEND 1608; GFX900-NEXT: v_mov_b32_e32 v8, 0 1609; GFX900-NEXT: v_mov_b32_e32 v2, v0 1610; GFX900-NEXT: v_mov_b32_e32 v3, v1 1611; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 1612; GFX900-NEXT: v_mov_b32_e32 v4, v0 1613; GFX900-NEXT: v_mov_b32_e32 v2, v6 1614; GFX900-NEXT: v_mov_b32_e32 v3, v7 1615; GFX900-NEXT: v_mov_b32_e32 v5, v1 1616; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 1617; GFX900-NEXT: s_waitcnt vmcnt(0) 1618; GFX900-NEXT: s_setpc_b64 s[30:31] 1619; 1620; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_0_0: 1621; GFX90A: ; %bb.0: 1622; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1623; GFX90A-NEXT: ;;#ASMSTART 1624; GFX90A-NEXT: ; def v[0:5] 1625; GFX90A-NEXT: ;;#ASMEND 1626; GFX90A-NEXT: ;;#ASMSTART 1627; GFX90A-NEXT: ; def v[2:7] 1628; GFX90A-NEXT: ;;#ASMEND 1629; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1630; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1631; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1632; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 1633; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1634; GFX90A-NEXT: v_mov_b32_e32 v2, v6 1635; GFX90A-NEXT: v_mov_b32_e32 v3, v7 1636; GFX90A-NEXT: v_mov_b32_e32 v5, v1 1637; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 1638; GFX90A-NEXT: s_waitcnt vmcnt(0) 1639; GFX90A-NEXT: s_setpc_b64 s[30:31] 1640; 1641; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_0_0: 1642; GFX940: ; %bb.0: 1643; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1644; GFX940-NEXT: ;;#ASMSTART 1645; GFX940-NEXT: ; def v[0:5] 1646; GFX940-NEXT: ;;#ASMEND 1647; GFX940-NEXT: v_mov_b32_e32 v8, 0 1648; GFX940-NEXT: ;;#ASMSTART 1649; GFX940-NEXT: ; def v[2:7] 1650; GFX940-NEXT: ;;#ASMEND 1651; GFX940-NEXT: s_nop 0 1652; GFX940-NEXT: v_mov_b32_e32 v2, v0 1653; GFX940-NEXT: v_mov_b32_e32 v3, v1 1654; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 1655; GFX940-NEXT: v_mov_b32_e32 v4, v0 1656; GFX940-NEXT: v_mov_b32_e32 v5, v1 1657; GFX940-NEXT: v_mov_b32_e32 v2, v6 1658; GFX940-NEXT: v_mov_b32_e32 v3, v7 1659; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 1660; GFX940-NEXT: s_waitcnt vmcnt(0) 1661; GFX940-NEXT: s_setpc_b64 s[30:31] 1662 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1663 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1664 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0> 1665 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1666 ret void 1667} 1668 1669define void @v_shuffle_v4i64_v3i64__5_u_0_0(ptr addrspace(1) inreg %ptr) { 1670; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_0_0: 1671; GFX900: ; %bb.0: 1672; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1673; GFX900-NEXT: ;;#ASMSTART 1674; GFX900-NEXT: ; def v[0:5] 1675; GFX900-NEXT: ;;#ASMEND 1676; GFX900-NEXT: ;;#ASMSTART 1677; GFX900-NEXT: ; def v[2:7] 1678; GFX900-NEXT: ;;#ASMEND 1679; GFX900-NEXT: v_mov_b32_e32 v8, 0 1680; GFX900-NEXT: v_mov_b32_e32 v2, v0 1681; GFX900-NEXT: v_mov_b32_e32 v3, v1 1682; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 1683; GFX900-NEXT: s_nop 0 1684; GFX900-NEXT: v_mov_b32_e32 v0, v6 1685; GFX900-NEXT: v_mov_b32_e32 v1, v7 1686; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 1687; GFX900-NEXT: s_waitcnt vmcnt(0) 1688; GFX900-NEXT: s_setpc_b64 s[30:31] 1689; 1690; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_0_0: 1691; GFX90A: ; %bb.0: 1692; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1693; GFX90A-NEXT: ;;#ASMSTART 1694; GFX90A-NEXT: ; def v[0:5] 1695; GFX90A-NEXT: ;;#ASMEND 1696; GFX90A-NEXT: ;;#ASMSTART 1697; GFX90A-NEXT: ; def v[2:7] 1698; GFX90A-NEXT: ;;#ASMEND 1699; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1700; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1701; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1702; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 1703; GFX90A-NEXT: s_nop 0 1704; GFX90A-NEXT: v_mov_b32_e32 v0, v6 1705; GFX90A-NEXT: v_mov_b32_e32 v1, v7 1706; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 1707; GFX90A-NEXT: s_waitcnt vmcnt(0) 1708; GFX90A-NEXT: s_setpc_b64 s[30:31] 1709; 1710; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_0_0: 1711; GFX940: ; %bb.0: 1712; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1713; GFX940-NEXT: ;;#ASMSTART 1714; GFX940-NEXT: ; def v[0:5] 1715; GFX940-NEXT: ;;#ASMEND 1716; GFX940-NEXT: v_mov_b32_e32 v8, 0 1717; GFX940-NEXT: ;;#ASMSTART 1718; GFX940-NEXT: ; def v[2:7] 1719; GFX940-NEXT: ;;#ASMEND 1720; GFX940-NEXT: s_nop 0 1721; GFX940-NEXT: v_mov_b32_e32 v2, v0 1722; GFX940-NEXT: v_mov_b32_e32 v3, v1 1723; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 1724; GFX940-NEXT: s_nop 1 1725; GFX940-NEXT: v_mov_b32_e32 v0, v6 1726; GFX940-NEXT: v_mov_b32_e32 v1, v7 1727; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 1728; GFX940-NEXT: s_waitcnt vmcnt(0) 1729; GFX940-NEXT: s_setpc_b64 s[30:31] 1730 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1731 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1732 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0> 1733 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1734 ret void 1735} 1736 1737define void @v_shuffle_v4i64_v3i64__5_1_0_0(ptr addrspace(1) inreg %ptr) { 1738; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0: 1739; GFX900: ; %bb.0: 1740; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1741; GFX900-NEXT: ;;#ASMSTART 1742; GFX900-NEXT: ; def v[0:5] 1743; GFX900-NEXT: ;;#ASMEND 1744; GFX900-NEXT: ;;#ASMSTART 1745; GFX900-NEXT: ; def v[4:9] 1746; GFX900-NEXT: ;;#ASMEND 1747; GFX900-NEXT: v_mov_b32_e32 v10, 0 1748; GFX900-NEXT: v_mov_b32_e32 v4, v0 1749; GFX900-NEXT: v_mov_b32_e32 v5, v1 1750; GFX900-NEXT: v_mov_b32_e32 v6, v0 1751; GFX900-NEXT: v_mov_b32_e32 v7, v1 1752; GFX900-NEXT: v_mov_b32_e32 v0, v8 1753; GFX900-NEXT: v_mov_b32_e32 v1, v9 1754; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16 1755; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 1756; GFX900-NEXT: s_waitcnt vmcnt(0) 1757; GFX900-NEXT: s_setpc_b64 s[30:31] 1758; 1759; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0: 1760; GFX90A: ; %bb.0: 1761; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1762; GFX90A-NEXT: ;;#ASMSTART 1763; GFX90A-NEXT: ; def v[0:5] 1764; GFX90A-NEXT: ;;#ASMEND 1765; GFX90A-NEXT: ;;#ASMSTART 1766; GFX90A-NEXT: ; def v[4:9] 1767; GFX90A-NEXT: ;;#ASMEND 1768; GFX90A-NEXT: v_mov_b32_e32 v10, 0 1769; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1770; GFX90A-NEXT: v_mov_b32_e32 v5, v1 1771; GFX90A-NEXT: v_mov_b32_e32 v6, v0 1772; GFX90A-NEXT: v_mov_b32_e32 v7, v1 1773; GFX90A-NEXT: v_mov_b32_e32 v0, v8 1774; GFX90A-NEXT: v_mov_b32_e32 v1, v9 1775; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16 1776; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 1777; GFX90A-NEXT: s_waitcnt vmcnt(0) 1778; GFX90A-NEXT: s_setpc_b64 s[30:31] 1779; 1780; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0: 1781; GFX940: ; %bb.0: 1782; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1783; GFX940-NEXT: ;;#ASMSTART 1784; GFX940-NEXT: ; def v[0:5] 1785; GFX940-NEXT: ;;#ASMEND 1786; GFX940-NEXT: v_mov_b32_e32 v10, 0 1787; GFX940-NEXT: ;;#ASMSTART 1788; GFX940-NEXT: ; def v[4:9] 1789; GFX940-NEXT: ;;#ASMEND 1790; GFX940-NEXT: s_nop 0 1791; GFX940-NEXT: v_mov_b32_e32 v4, v0 1792; GFX940-NEXT: v_mov_b32_e32 v5, v1 1793; GFX940-NEXT: v_mov_b32_e32 v6, v0 1794; GFX940-NEXT: v_mov_b32_e32 v7, v1 1795; GFX940-NEXT: v_mov_b32_e32 v0, v8 1796; GFX940-NEXT: v_mov_b32_e32 v1, v9 1797; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 1798; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 1799; GFX940-NEXT: s_waitcnt vmcnt(0) 1800; GFX940-NEXT: s_setpc_b64 s[30:31] 1801 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1802 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1803 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0> 1804 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1805 ret void 1806} 1807 1808define void @v_shuffle_v4i64_v3i64__5_2_0_0(ptr addrspace(1) inreg %ptr) { 1809; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_0_0: 1810; GFX900: ; %bb.0: 1811; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1812; GFX900-NEXT: ;;#ASMSTART 1813; GFX900-NEXT: ; def v[0:5] 1814; GFX900-NEXT: ;;#ASMEND 1815; GFX900-NEXT: v_mov_b32_e32 v12, 0 1816; GFX900-NEXT: v_mov_b32_e32 v2, v0 1817; GFX900-NEXT: v_mov_b32_e32 v3, v1 1818; GFX900-NEXT: ;;#ASMSTART 1819; GFX900-NEXT: ; def v[6:11] 1820; GFX900-NEXT: ;;#ASMEND 1821; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 1822; GFX900-NEXT: s_nop 0 1823; GFX900-NEXT: v_mov_b32_e32 v2, v10 1824; GFX900-NEXT: v_mov_b32_e32 v3, v11 1825; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 1826; GFX900-NEXT: s_waitcnt vmcnt(0) 1827; GFX900-NEXT: s_setpc_b64 s[30:31] 1828; 1829; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_0_0: 1830; GFX90A: ; %bb.0: 1831; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1832; GFX90A-NEXT: ;;#ASMSTART 1833; GFX90A-NEXT: ; def v[0:5] 1834; GFX90A-NEXT: ;;#ASMEND 1835; GFX90A-NEXT: v_mov_b32_e32 v12, 0 1836; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1837; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1838; GFX90A-NEXT: ;;#ASMSTART 1839; GFX90A-NEXT: ; def v[6:11] 1840; GFX90A-NEXT: ;;#ASMEND 1841; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 1842; GFX90A-NEXT: s_nop 0 1843; GFX90A-NEXT: v_mov_b32_e32 v2, v10 1844; GFX90A-NEXT: v_mov_b32_e32 v3, v11 1845; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 1846; GFX90A-NEXT: s_waitcnt vmcnt(0) 1847; GFX90A-NEXT: s_setpc_b64 s[30:31] 1848; 1849; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_0_0: 1850; GFX940: ; %bb.0: 1851; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1852; GFX940-NEXT: ;;#ASMSTART 1853; GFX940-NEXT: ; def v[0:5] 1854; GFX940-NEXT: ;;#ASMEND 1855; GFX940-NEXT: v_mov_b32_e32 v12, 0 1856; GFX940-NEXT: v_mov_b32_e32 v2, v0 1857; GFX940-NEXT: v_mov_b32_e32 v3, v1 1858; GFX940-NEXT: ;;#ASMSTART 1859; GFX940-NEXT: ; def v[6:11] 1860; GFX940-NEXT: ;;#ASMEND 1861; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 1862; GFX940-NEXT: s_nop 1 1863; GFX940-NEXT: v_mov_b32_e32 v2, v10 1864; GFX940-NEXT: v_mov_b32_e32 v3, v11 1865; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 1866; GFX940-NEXT: s_waitcnt vmcnt(0) 1867; GFX940-NEXT: s_setpc_b64 s[30:31] 1868 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1869 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1870 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0> 1871 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1872 ret void 1873} 1874 1875define void @v_shuffle_v4i64_v3i64__5_3_0_0(ptr addrspace(1) inreg %ptr) { 1876; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_0_0: 1877; GFX900: ; %bb.0: 1878; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1879; GFX900-NEXT: ;;#ASMSTART 1880; GFX900-NEXT: ; def v[0:5] 1881; GFX900-NEXT: ;;#ASMEND 1882; GFX900-NEXT: v_mov_b32_e32 v10, 0 1883; GFX900-NEXT: v_mov_b32_e32 v2, v0 1884; GFX900-NEXT: v_mov_b32_e32 v3, v1 1885; GFX900-NEXT: ;;#ASMSTART 1886; GFX900-NEXT: ; def v[4:9] 1887; GFX900-NEXT: ;;#ASMEND 1888; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16 1889; GFX900-NEXT: s_nop 0 1890; GFX900-NEXT: v_mov_b32_e32 v0, v8 1891; GFX900-NEXT: v_mov_b32_e32 v1, v9 1892; GFX900-NEXT: v_mov_b32_e32 v2, v4 1893; GFX900-NEXT: v_mov_b32_e32 v3, v5 1894; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 1895; GFX900-NEXT: s_waitcnt vmcnt(0) 1896; GFX900-NEXT: s_setpc_b64 s[30:31] 1897; 1898; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_0_0: 1899; GFX90A: ; %bb.0: 1900; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1901; GFX90A-NEXT: ;;#ASMSTART 1902; GFX90A-NEXT: ; def v[0:5] 1903; GFX90A-NEXT: ;;#ASMEND 1904; GFX90A-NEXT: v_mov_b32_e32 v10, 0 1905; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1906; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1907; GFX90A-NEXT: ;;#ASMSTART 1908; GFX90A-NEXT: ; def v[4:9] 1909; GFX90A-NEXT: ;;#ASMEND 1910; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16 1911; GFX90A-NEXT: s_nop 0 1912; GFX90A-NEXT: v_mov_b32_e32 v0, v8 1913; GFX90A-NEXT: v_mov_b32_e32 v1, v9 1914; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1915; GFX90A-NEXT: v_mov_b32_e32 v3, v5 1916; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 1917; GFX90A-NEXT: s_waitcnt vmcnt(0) 1918; GFX90A-NEXT: s_setpc_b64 s[30:31] 1919; 1920; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_0_0: 1921; GFX940: ; %bb.0: 1922; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1923; GFX940-NEXT: ;;#ASMSTART 1924; GFX940-NEXT: ; def v[0:5] 1925; GFX940-NEXT: ;;#ASMEND 1926; GFX940-NEXT: v_mov_b32_e32 v10, 0 1927; GFX940-NEXT: v_mov_b32_e32 v2, v0 1928; GFX940-NEXT: v_mov_b32_e32 v3, v1 1929; GFX940-NEXT: ;;#ASMSTART 1930; GFX940-NEXT: ; def v[4:9] 1931; GFX940-NEXT: ;;#ASMEND 1932; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 1933; GFX940-NEXT: s_nop 1 1934; GFX940-NEXT: v_mov_b32_e32 v0, v8 1935; GFX940-NEXT: v_mov_b32_e32 v1, v9 1936; GFX940-NEXT: v_mov_b32_e32 v2, v4 1937; GFX940-NEXT: v_mov_b32_e32 v3, v5 1938; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 1939; GFX940-NEXT: s_waitcnt vmcnt(0) 1940; GFX940-NEXT: s_setpc_b64 s[30:31] 1941 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1942 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1943 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0> 1944 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1945 ret void 1946} 1947 1948define void @v_shuffle_v4i64_v3i64__5_4_0_0(ptr addrspace(1) inreg %ptr) { 1949; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_0_0: 1950; GFX900: ; %bb.0: 1951; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1952; GFX900-NEXT: ;;#ASMSTART 1953; GFX900-NEXT: ; def v[0:5] 1954; GFX900-NEXT: ;;#ASMEND 1955; GFX900-NEXT: ;;#ASMSTART 1956; GFX900-NEXT: ; def v[2:7] 1957; GFX900-NEXT: ;;#ASMEND 1958; GFX900-NEXT: v_mov_b32_e32 v8, 0 1959; GFX900-NEXT: v_mov_b32_e32 v2, v0 1960; GFX900-NEXT: v_mov_b32_e32 v3, v1 1961; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 1962; GFX900-NEXT: s_nop 0 1963; GFX900-NEXT: v_mov_b32_e32 v2, v6 1964; GFX900-NEXT: v_mov_b32_e32 v3, v7 1965; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 1966; GFX900-NEXT: s_waitcnt vmcnt(0) 1967; GFX900-NEXT: s_setpc_b64 s[30:31] 1968; 1969; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_0_0: 1970; GFX90A: ; %bb.0: 1971; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1972; GFX90A-NEXT: ;;#ASMSTART 1973; GFX90A-NEXT: ; def v[0:5] 1974; GFX90A-NEXT: ;;#ASMEND 1975; GFX90A-NEXT: ;;#ASMSTART 1976; GFX90A-NEXT: ; def v[2:7] 1977; GFX90A-NEXT: ;;#ASMEND 1978; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1979; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1980; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1981; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 1982; GFX90A-NEXT: s_nop 0 1983; GFX90A-NEXT: v_mov_b32_e32 v2, v6 1984; GFX90A-NEXT: v_mov_b32_e32 v3, v7 1985; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 1986; GFX90A-NEXT: s_waitcnt vmcnt(0) 1987; GFX90A-NEXT: s_setpc_b64 s[30:31] 1988; 1989; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_0_0: 1990; GFX940: ; %bb.0: 1991; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1992; GFX940-NEXT: ;;#ASMSTART 1993; GFX940-NEXT: ; def v[0:5] 1994; GFX940-NEXT: ;;#ASMEND 1995; GFX940-NEXT: v_mov_b32_e32 v8, 0 1996; GFX940-NEXT: ;;#ASMSTART 1997; GFX940-NEXT: ; def v[2:7] 1998; GFX940-NEXT: ;;#ASMEND 1999; GFX940-NEXT: s_nop 0 2000; GFX940-NEXT: v_mov_b32_e32 v2, v0 2001; GFX940-NEXT: v_mov_b32_e32 v3, v1 2002; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 2003; GFX940-NEXT: s_nop 1 2004; GFX940-NEXT: v_mov_b32_e32 v2, v6 2005; GFX940-NEXT: v_mov_b32_e32 v3, v7 2006; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 2007; GFX940-NEXT: s_waitcnt vmcnt(0) 2008; GFX940-NEXT: s_setpc_b64 s[30:31] 2009 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2010 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2011 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0> 2012 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2013 ret void 2014} 2015 2016define void @v_shuffle_v4i64_v3i64__5_5_0_0(ptr addrspace(1) inreg %ptr) { 2017; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_0: 2018; GFX900: ; %bb.0: 2019; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2020; GFX900-NEXT: ;;#ASMSTART 2021; GFX900-NEXT: ; def v[0:5] 2022; GFX900-NEXT: ;;#ASMEND 2023; GFX900-NEXT: ;;#ASMSTART 2024; GFX900-NEXT: ; def v[2:7] 2025; GFX900-NEXT: ;;#ASMEND 2026; GFX900-NEXT: v_mov_b32_e32 v8, 0 2027; GFX900-NEXT: v_mov_b32_e32 v2, v0 2028; GFX900-NEXT: v_mov_b32_e32 v3, v1 2029; GFX900-NEXT: v_mov_b32_e32 v4, v6 2030; GFX900-NEXT: v_mov_b32_e32 v5, v7 2031; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 2032; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 2033; GFX900-NEXT: s_waitcnt vmcnt(0) 2034; GFX900-NEXT: s_setpc_b64 s[30:31] 2035; 2036; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_0: 2037; GFX90A: ; %bb.0: 2038; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2039; GFX90A-NEXT: ;;#ASMSTART 2040; GFX90A-NEXT: ; def v[0:5] 2041; GFX90A-NEXT: ;;#ASMEND 2042; GFX90A-NEXT: ;;#ASMSTART 2043; GFX90A-NEXT: ; def v[2:7] 2044; GFX90A-NEXT: ;;#ASMEND 2045; GFX90A-NEXT: v_mov_b32_e32 v8, 0 2046; GFX90A-NEXT: v_mov_b32_e32 v2, v0 2047; GFX90A-NEXT: v_mov_b32_e32 v3, v1 2048; GFX90A-NEXT: v_mov_b32_e32 v4, v6 2049; GFX90A-NEXT: v_mov_b32_e32 v5, v7 2050; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 2051; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 2052; GFX90A-NEXT: s_waitcnt vmcnt(0) 2053; GFX90A-NEXT: s_setpc_b64 s[30:31] 2054; 2055; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_0: 2056; GFX940: ; %bb.0: 2057; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2058; GFX940-NEXT: ;;#ASMSTART 2059; GFX940-NEXT: ; def v[0:5] 2060; GFX940-NEXT: ;;#ASMEND 2061; GFX940-NEXT: v_mov_b32_e32 v8, 0 2062; GFX940-NEXT: ;;#ASMSTART 2063; GFX940-NEXT: ; def v[2:7] 2064; GFX940-NEXT: ;;#ASMEND 2065; GFX940-NEXT: s_nop 0 2066; GFX940-NEXT: v_mov_b32_e32 v2, v0 2067; GFX940-NEXT: v_mov_b32_e32 v3, v1 2068; GFX940-NEXT: v_mov_b32_e32 v4, v6 2069; GFX940-NEXT: v_mov_b32_e32 v5, v7 2070; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 2071; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 2072; GFX940-NEXT: s_waitcnt vmcnt(0) 2073; GFX940-NEXT: s_setpc_b64 s[30:31] 2074 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2075 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2076 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0> 2077 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2078 ret void 2079} 2080 2081define void @v_shuffle_v4i64_v3i64__5_5_u_0(ptr addrspace(1) inreg %ptr) { 2082; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_0: 2083; GFX900: ; %bb.0: 2084; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2085; GFX900-NEXT: ;;#ASMSTART 2086; GFX900-NEXT: ; def v[0:5] 2087; GFX900-NEXT: ;;#ASMEND 2088; GFX900-NEXT: ;;#ASMSTART 2089; GFX900-NEXT: ; def v[2:7] 2090; GFX900-NEXT: ;;#ASMEND 2091; GFX900-NEXT: v_mov_b32_e32 v8, 0 2092; GFX900-NEXT: v_mov_b32_e32 v2, v0 2093; GFX900-NEXT: v_mov_b32_e32 v3, v1 2094; GFX900-NEXT: v_mov_b32_e32 v4, v6 2095; GFX900-NEXT: v_mov_b32_e32 v5, v7 2096; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 2097; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 2098; GFX900-NEXT: s_waitcnt vmcnt(0) 2099; GFX900-NEXT: s_setpc_b64 s[30:31] 2100; 2101; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_0: 2102; GFX90A: ; %bb.0: 2103; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2104; GFX90A-NEXT: ;;#ASMSTART 2105; GFX90A-NEXT: ; def v[0:5] 2106; GFX90A-NEXT: ;;#ASMEND 2107; GFX90A-NEXT: ;;#ASMSTART 2108; GFX90A-NEXT: ; def v[2:7] 2109; GFX90A-NEXT: ;;#ASMEND 2110; GFX90A-NEXT: v_mov_b32_e32 v8, 0 2111; GFX90A-NEXT: v_mov_b32_e32 v2, v0 2112; GFX90A-NEXT: v_mov_b32_e32 v3, v1 2113; GFX90A-NEXT: v_mov_b32_e32 v4, v6 2114; GFX90A-NEXT: v_mov_b32_e32 v5, v7 2115; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 2116; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 2117; GFX90A-NEXT: s_waitcnt vmcnt(0) 2118; GFX90A-NEXT: s_setpc_b64 s[30:31] 2119; 2120; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_0: 2121; GFX940: ; %bb.0: 2122; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2123; GFX940-NEXT: ;;#ASMSTART 2124; GFX940-NEXT: ; def v[0:5] 2125; GFX940-NEXT: ;;#ASMEND 2126; GFX940-NEXT: v_mov_b32_e32 v8, 0 2127; GFX940-NEXT: ;;#ASMSTART 2128; GFX940-NEXT: ; def v[2:7] 2129; GFX940-NEXT: ;;#ASMEND 2130; GFX940-NEXT: s_nop 0 2131; GFX940-NEXT: v_mov_b32_e32 v2, v0 2132; GFX940-NEXT: v_mov_b32_e32 v3, v1 2133; GFX940-NEXT: v_mov_b32_e32 v4, v6 2134; GFX940-NEXT: v_mov_b32_e32 v5, v7 2135; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 2136; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 2137; GFX940-NEXT: s_waitcnt vmcnt(0) 2138; GFX940-NEXT: s_setpc_b64 s[30:31] 2139 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2140 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2141 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0> 2142 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2143 ret void 2144} 2145 2146define void @v_shuffle_v4i64_v3i64__5_5_1_0(ptr addrspace(1) inreg %ptr) { 2147; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_0: 2148; GFX900: ; %bb.0: 2149; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2150; GFX900-NEXT: ;;#ASMSTART 2151; GFX900-NEXT: ; def v[0:5] 2152; GFX900-NEXT: ;;#ASMEND 2153; GFX900-NEXT: ;;#ASMSTART 2154; GFX900-NEXT: ; def v[4:9] 2155; GFX900-NEXT: ;;#ASMEND 2156; GFX900-NEXT: v_mov_b32_e32 v10, 0 2157; GFX900-NEXT: v_mov_b32_e32 v4, v0 2158; GFX900-NEXT: v_mov_b32_e32 v5, v1 2159; GFX900-NEXT: v_mov_b32_e32 v6, v8 2160; GFX900-NEXT: v_mov_b32_e32 v7, v9 2161; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 2162; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 2163; GFX900-NEXT: s_waitcnt vmcnt(0) 2164; GFX900-NEXT: s_setpc_b64 s[30:31] 2165; 2166; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_0: 2167; GFX90A: ; %bb.0: 2168; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2169; GFX90A-NEXT: ;;#ASMSTART 2170; GFX90A-NEXT: ; def v[0:5] 2171; GFX90A-NEXT: ;;#ASMEND 2172; GFX90A-NEXT: ;;#ASMSTART 2173; GFX90A-NEXT: ; def v[4:9] 2174; GFX90A-NEXT: ;;#ASMEND 2175; GFX90A-NEXT: v_mov_b32_e32 v10, 0 2176; GFX90A-NEXT: v_mov_b32_e32 v4, v0 2177; GFX90A-NEXT: v_mov_b32_e32 v5, v1 2178; GFX90A-NEXT: v_mov_b32_e32 v6, v8 2179; GFX90A-NEXT: v_mov_b32_e32 v7, v9 2180; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 2181; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 2182; GFX90A-NEXT: s_waitcnt vmcnt(0) 2183; GFX90A-NEXT: s_setpc_b64 s[30:31] 2184; 2185; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_0: 2186; GFX940: ; %bb.0: 2187; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2188; GFX940-NEXT: ;;#ASMSTART 2189; GFX940-NEXT: ; def v[0:5] 2190; GFX940-NEXT: ;;#ASMEND 2191; GFX940-NEXT: v_mov_b32_e32 v10, 0 2192; GFX940-NEXT: ;;#ASMSTART 2193; GFX940-NEXT: ; def v[4:9] 2194; GFX940-NEXT: ;;#ASMEND 2195; GFX940-NEXT: s_nop 0 2196; GFX940-NEXT: v_mov_b32_e32 v4, v0 2197; GFX940-NEXT: v_mov_b32_e32 v5, v1 2198; GFX940-NEXT: v_mov_b32_e32 v6, v8 2199; GFX940-NEXT: v_mov_b32_e32 v7, v9 2200; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 2201; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 2202; GFX940-NEXT: s_waitcnt vmcnt(0) 2203; GFX940-NEXT: s_setpc_b64 s[30:31] 2204 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2205 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2206 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0> 2207 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2208 ret void 2209} 2210 2211define void @v_shuffle_v4i64_v3i64__5_5_2_0(ptr addrspace(1) inreg %ptr) { 2212; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_0: 2213; GFX900: ; %bb.0: 2214; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2215; GFX900-NEXT: ;;#ASMSTART 2216; GFX900-NEXT: ; def v[0:5] 2217; GFX900-NEXT: ;;#ASMEND 2218; GFX900-NEXT: ;;#ASMSTART 2219; GFX900-NEXT: ; def v[6:11] 2220; GFX900-NEXT: ;;#ASMEND 2221; GFX900-NEXT: v_mov_b32_e32 v12, 0 2222; GFX900-NEXT: v_mov_b32_e32 v2, v4 2223; GFX900-NEXT: v_mov_b32_e32 v3, v5 2224; GFX900-NEXT: v_mov_b32_e32 v4, v0 2225; GFX900-NEXT: v_mov_b32_e32 v5, v1 2226; GFX900-NEXT: v_mov_b32_e32 v8, v10 2227; GFX900-NEXT: v_mov_b32_e32 v9, v11 2228; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 2229; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 2230; GFX900-NEXT: s_waitcnt vmcnt(0) 2231; GFX900-NEXT: s_setpc_b64 s[30:31] 2232; 2233; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_0: 2234; GFX90A: ; %bb.0: 2235; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2236; GFX90A-NEXT: ;;#ASMSTART 2237; GFX90A-NEXT: ; def v[0:5] 2238; GFX90A-NEXT: ;;#ASMEND 2239; GFX90A-NEXT: ;;#ASMSTART 2240; GFX90A-NEXT: ; def v[6:11] 2241; GFX90A-NEXT: ;;#ASMEND 2242; GFX90A-NEXT: v_mov_b32_e32 v12, 0 2243; GFX90A-NEXT: v_mov_b32_e32 v2, v4 2244; GFX90A-NEXT: v_mov_b32_e32 v3, v5 2245; GFX90A-NEXT: v_mov_b32_e32 v4, v0 2246; GFX90A-NEXT: v_mov_b32_e32 v5, v1 2247; GFX90A-NEXT: v_mov_b32_e32 v8, v10 2248; GFX90A-NEXT: v_mov_b32_e32 v9, v11 2249; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 2250; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 2251; GFX90A-NEXT: s_waitcnt vmcnt(0) 2252; GFX90A-NEXT: s_setpc_b64 s[30:31] 2253; 2254; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_0: 2255; GFX940: ; %bb.0: 2256; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2257; GFX940-NEXT: ;;#ASMSTART 2258; GFX940-NEXT: ; def v[0:5] 2259; GFX940-NEXT: ;;#ASMEND 2260; GFX940-NEXT: ;;#ASMSTART 2261; GFX940-NEXT: ; def v[6:11] 2262; GFX940-NEXT: ;;#ASMEND 2263; GFX940-NEXT: v_mov_b32_e32 v12, 0 2264; GFX940-NEXT: v_mov_b32_e32 v2, v4 2265; GFX940-NEXT: v_mov_b32_e32 v3, v5 2266; GFX940-NEXT: v_mov_b32_e32 v4, v0 2267; GFX940-NEXT: v_mov_b32_e32 v5, v1 2268; GFX940-NEXT: v_mov_b32_e32 v8, v10 2269; GFX940-NEXT: v_mov_b32_e32 v9, v11 2270; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 2271; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 2272; GFX940-NEXT: s_waitcnt vmcnt(0) 2273; GFX940-NEXT: s_setpc_b64 s[30:31] 2274 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2275 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2276 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0> 2277 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2278 ret void 2279} 2280 2281define void @v_shuffle_v4i64_v3i64__5_5_3_0(ptr addrspace(1) inreg %ptr) { 2282; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_0: 2283; GFX900: ; %bb.0: 2284; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2285; GFX900-NEXT: ;;#ASMSTART 2286; GFX900-NEXT: ; def v[0:5] 2287; GFX900-NEXT: ;;#ASMEND 2288; GFX900-NEXT: ;;#ASMSTART 2289; GFX900-NEXT: ; def v[2:7] 2290; GFX900-NEXT: ;;#ASMEND 2291; GFX900-NEXT: v_mov_b32_e32 v8, 0 2292; GFX900-NEXT: v_mov_b32_e32 v4, v0 2293; GFX900-NEXT: v_mov_b32_e32 v5, v1 2294; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16 2295; GFX900-NEXT: s_nop 0 2296; GFX900-NEXT: v_mov_b32_e32 v4, v6 2297; GFX900-NEXT: v_mov_b32_e32 v5, v7 2298; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 2299; GFX900-NEXT: s_waitcnt vmcnt(0) 2300; GFX900-NEXT: s_setpc_b64 s[30:31] 2301; 2302; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_0: 2303; GFX90A: ; %bb.0: 2304; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2305; GFX90A-NEXT: ;;#ASMSTART 2306; GFX90A-NEXT: ; def v[0:5] 2307; GFX90A-NEXT: ;;#ASMEND 2308; GFX90A-NEXT: ;;#ASMSTART 2309; GFX90A-NEXT: ; def v[2:7] 2310; GFX90A-NEXT: ;;#ASMEND 2311; GFX90A-NEXT: v_mov_b32_e32 v8, 0 2312; GFX90A-NEXT: v_mov_b32_e32 v4, v0 2313; GFX90A-NEXT: v_mov_b32_e32 v5, v1 2314; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16 2315; GFX90A-NEXT: s_nop 0 2316; GFX90A-NEXT: v_mov_b32_e32 v4, v6 2317; GFX90A-NEXT: v_mov_b32_e32 v5, v7 2318; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 2319; GFX90A-NEXT: s_waitcnt vmcnt(0) 2320; GFX90A-NEXT: s_setpc_b64 s[30:31] 2321; 2322; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_0: 2323; GFX940: ; %bb.0: 2324; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2325; GFX940-NEXT: ;;#ASMSTART 2326; GFX940-NEXT: ; def v[0:5] 2327; GFX940-NEXT: ;;#ASMEND 2328; GFX940-NEXT: v_mov_b32_e32 v8, 0 2329; GFX940-NEXT: ;;#ASMSTART 2330; GFX940-NEXT: ; def v[2:7] 2331; GFX940-NEXT: ;;#ASMEND 2332; GFX940-NEXT: s_nop 0 2333; GFX940-NEXT: v_mov_b32_e32 v4, v0 2334; GFX940-NEXT: v_mov_b32_e32 v5, v1 2335; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 2336; GFX940-NEXT: s_nop 1 2337; GFX940-NEXT: v_mov_b32_e32 v4, v6 2338; GFX940-NEXT: v_mov_b32_e32 v5, v7 2339; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 2340; GFX940-NEXT: s_waitcnt vmcnt(0) 2341; GFX940-NEXT: s_setpc_b64 s[30:31] 2342 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2343 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2344 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0> 2345 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2346 ret void 2347} 2348 2349define void @v_shuffle_v4i64_v3i64__5_5_4_0(ptr addrspace(1) inreg %ptr) { 2350; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_0: 2351; GFX900: ; %bb.0: 2352; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2353; GFX900-NEXT: ;;#ASMSTART 2354; GFX900-NEXT: ; def v[0:5] 2355; GFX900-NEXT: ;;#ASMEND 2356; GFX900-NEXT: ;;#ASMSTART 2357; GFX900-NEXT: ; def v[2:7] 2358; GFX900-NEXT: ;;#ASMEND 2359; GFX900-NEXT: v_mov_b32_e32 v8, 0 2360; GFX900-NEXT: v_mov_b32_e32 v2, v4 2361; GFX900-NEXT: v_mov_b32_e32 v3, v5 2362; GFX900-NEXT: v_mov_b32_e32 v4, v0 2363; GFX900-NEXT: v_mov_b32_e32 v5, v1 2364; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16 2365; GFX900-NEXT: s_nop 0 2366; GFX900-NEXT: v_mov_b32_e32 v4, v6 2367; GFX900-NEXT: v_mov_b32_e32 v5, v7 2368; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 2369; GFX900-NEXT: s_waitcnt vmcnt(0) 2370; GFX900-NEXT: s_setpc_b64 s[30:31] 2371; 2372; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_0: 2373; GFX90A: ; %bb.0: 2374; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2375; GFX90A-NEXT: ;;#ASMSTART 2376; GFX90A-NEXT: ; def v[0:5] 2377; GFX90A-NEXT: ;;#ASMEND 2378; GFX90A-NEXT: ;;#ASMSTART 2379; GFX90A-NEXT: ; def v[2:7] 2380; GFX90A-NEXT: ;;#ASMEND 2381; GFX90A-NEXT: v_mov_b32_e32 v8, 0 2382; GFX90A-NEXT: v_mov_b32_e32 v2, v4 2383; GFX90A-NEXT: v_mov_b32_e32 v3, v5 2384; GFX90A-NEXT: v_mov_b32_e32 v4, v0 2385; GFX90A-NEXT: v_mov_b32_e32 v5, v1 2386; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16 2387; GFX90A-NEXT: s_nop 0 2388; GFX90A-NEXT: v_mov_b32_e32 v4, v6 2389; GFX90A-NEXT: v_mov_b32_e32 v5, v7 2390; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 2391; GFX90A-NEXT: s_waitcnt vmcnt(0) 2392; GFX90A-NEXT: s_setpc_b64 s[30:31] 2393; 2394; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_0: 2395; GFX940: ; %bb.0: 2396; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2397; GFX940-NEXT: ;;#ASMSTART 2398; GFX940-NEXT: ; def v[0:5] 2399; GFX940-NEXT: ;;#ASMEND 2400; GFX940-NEXT: v_mov_b32_e32 v8, 0 2401; GFX940-NEXT: ;;#ASMSTART 2402; GFX940-NEXT: ; def v[2:7] 2403; GFX940-NEXT: ;;#ASMEND 2404; GFX940-NEXT: s_nop 0 2405; GFX940-NEXT: v_mov_b32_e32 v2, v4 2406; GFX940-NEXT: v_mov_b32_e32 v3, v5 2407; GFX940-NEXT: v_mov_b32_e32 v4, v0 2408; GFX940-NEXT: v_mov_b32_e32 v5, v1 2409; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 2410; GFX940-NEXT: s_nop 1 2411; GFX940-NEXT: v_mov_b32_e32 v4, v6 2412; GFX940-NEXT: v_mov_b32_e32 v5, v7 2413; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 2414; GFX940-NEXT: s_waitcnt vmcnt(0) 2415; GFX940-NEXT: s_setpc_b64 s[30:31] 2416 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2417 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2418 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0> 2419 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2420 ret void 2421} 2422 2423define void @v_shuffle_v4i64_v3i64__u_1_1_1(ptr addrspace(1) inreg %ptr) { 2424; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_1_1_1: 2425; GFX900: ; %bb.0: 2426; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2427; GFX900-NEXT: ;;#ASMSTART 2428; GFX900-NEXT: ; def v[0:5] 2429; GFX900-NEXT: ;;#ASMEND 2430; GFX900-NEXT: v_mov_b32_e32 v6, 0 2431; GFX900-NEXT: v_mov_b32_e32 v4, v2 2432; GFX900-NEXT: v_mov_b32_e32 v5, v3 2433; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 2434; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2435; GFX900-NEXT: s_waitcnt vmcnt(0) 2436; GFX900-NEXT: s_setpc_b64 s[30:31] 2437; 2438; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_1_1_1: 2439; GFX90A: ; %bb.0: 2440; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2441; GFX90A-NEXT: ;;#ASMSTART 2442; GFX90A-NEXT: ; def v[0:5] 2443; GFX90A-NEXT: ;;#ASMEND 2444; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2445; GFX90A-NEXT: v_mov_b32_e32 v4, v2 2446; GFX90A-NEXT: v_mov_b32_e32 v5, v3 2447; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 2448; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2449; GFX90A-NEXT: s_waitcnt vmcnt(0) 2450; GFX90A-NEXT: s_setpc_b64 s[30:31] 2451; 2452; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_1_1_1: 2453; GFX940: ; %bb.0: 2454; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2455; GFX940-NEXT: ;;#ASMSTART 2456; GFX940-NEXT: ; def v[0:5] 2457; GFX940-NEXT: ;;#ASMEND 2458; GFX940-NEXT: v_mov_b32_e32 v6, 0 2459; GFX940-NEXT: v_mov_b32_e32 v4, v2 2460; GFX940-NEXT: v_mov_b32_e32 v5, v3 2461; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 2462; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 2463; GFX940-NEXT: s_waitcnt vmcnt(0) 2464; GFX940-NEXT: s_setpc_b64 s[30:31] 2465 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2466 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1> 2467 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2468 ret void 2469} 2470 2471define void @v_shuffle_v4i64_v3i64__0_1_1_1(ptr addrspace(1) inreg %ptr) { 2472; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_1_1_1: 2473; GFX900: ; %bb.0: 2474; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2475; GFX900-NEXT: ;;#ASMSTART 2476; GFX900-NEXT: ; def v[0:5] 2477; GFX900-NEXT: ;;#ASMEND 2478; GFX900-NEXT: v_mov_b32_e32 v6, 0 2479; GFX900-NEXT: v_mov_b32_e32 v4, v2 2480; GFX900-NEXT: v_mov_b32_e32 v5, v3 2481; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 2482; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2483; GFX900-NEXT: s_waitcnt vmcnt(0) 2484; GFX900-NEXT: s_setpc_b64 s[30:31] 2485; 2486; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_1_1_1: 2487; GFX90A: ; %bb.0: 2488; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2489; GFX90A-NEXT: ;;#ASMSTART 2490; GFX90A-NEXT: ; def v[0:5] 2491; GFX90A-NEXT: ;;#ASMEND 2492; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2493; GFX90A-NEXT: v_mov_b32_e32 v4, v2 2494; GFX90A-NEXT: v_mov_b32_e32 v5, v3 2495; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 2496; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2497; GFX90A-NEXT: s_waitcnt vmcnt(0) 2498; GFX90A-NEXT: s_setpc_b64 s[30:31] 2499; 2500; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_1_1_1: 2501; GFX940: ; %bb.0: 2502; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2503; GFX940-NEXT: ;;#ASMSTART 2504; GFX940-NEXT: ; def v[0:5] 2505; GFX940-NEXT: ;;#ASMEND 2506; GFX940-NEXT: v_mov_b32_e32 v6, 0 2507; GFX940-NEXT: v_mov_b32_e32 v4, v2 2508; GFX940-NEXT: v_mov_b32_e32 v5, v3 2509; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 2510; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 2511; GFX940-NEXT: s_waitcnt vmcnt(0) 2512; GFX940-NEXT: s_setpc_b64 s[30:31] 2513 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2514 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> 2515 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2516 ret void 2517} 2518 2519define void @v_shuffle_v4i64_v3i64__1_1_1_1(ptr addrspace(1) inreg %ptr) { 2520; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_1_1_1: 2521; GFX900: ; %bb.0: 2522; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2523; GFX900-NEXT: ;;#ASMSTART 2524; GFX900-NEXT: ; def v[0:5] 2525; GFX900-NEXT: ;;#ASMEND 2526; GFX900-NEXT: v_mov_b32_e32 v6, 0 2527; GFX900-NEXT: v_mov_b32_e32 v4, v2 2528; GFX900-NEXT: v_mov_b32_e32 v5, v3 2529; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 2530; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2531; GFX900-NEXT: s_waitcnt vmcnt(0) 2532; GFX900-NEXT: s_setpc_b64 s[30:31] 2533; 2534; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_1_1_1: 2535; GFX90A: ; %bb.0: 2536; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2537; GFX90A-NEXT: ;;#ASMSTART 2538; GFX90A-NEXT: ; def v[0:5] 2539; GFX90A-NEXT: ;;#ASMEND 2540; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2541; GFX90A-NEXT: v_mov_b32_e32 v4, v2 2542; GFX90A-NEXT: v_mov_b32_e32 v5, v3 2543; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 2544; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2545; GFX90A-NEXT: s_waitcnt vmcnt(0) 2546; GFX90A-NEXT: s_setpc_b64 s[30:31] 2547; 2548; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_1_1_1: 2549; GFX940: ; %bb.0: 2550; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2551; GFX940-NEXT: ;;#ASMSTART 2552; GFX940-NEXT: ; def v[0:5] 2553; GFX940-NEXT: ;;#ASMEND 2554; GFX940-NEXT: v_mov_b32_e32 v6, 0 2555; GFX940-NEXT: v_mov_b32_e32 v4, v2 2556; GFX940-NEXT: v_mov_b32_e32 v5, v3 2557; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 2558; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 2559; GFX940-NEXT: s_waitcnt vmcnt(0) 2560; GFX940-NEXT: s_setpc_b64 s[30:31] 2561 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2562 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2563 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2564 ret void 2565} 2566 2567define void @v_shuffle_v4i64_v3i64__2_1_1_1(ptr addrspace(1) inreg %ptr) { 2568; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_1_1_1: 2569; GFX900: ; %bb.0: 2570; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2571; GFX900-NEXT: ;;#ASMSTART 2572; GFX900-NEXT: ; def v[0:5] 2573; GFX900-NEXT: ;;#ASMEND 2574; GFX900-NEXT: v_mov_b32_e32 v6, 0 2575; GFX900-NEXT: v_mov_b32_e32 v0, v2 2576; GFX900-NEXT: v_mov_b32_e32 v1, v3 2577; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 2578; GFX900-NEXT: s_nop 0 2579; GFX900-NEXT: v_mov_b32_e32 v0, v4 2580; GFX900-NEXT: v_mov_b32_e32 v1, v5 2581; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2582; GFX900-NEXT: s_waitcnt vmcnt(0) 2583; GFX900-NEXT: s_setpc_b64 s[30:31] 2584; 2585; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_1_1_1: 2586; GFX90A: ; %bb.0: 2587; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2588; GFX90A-NEXT: ;;#ASMSTART 2589; GFX90A-NEXT: ; def v[0:5] 2590; GFX90A-NEXT: ;;#ASMEND 2591; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2592; GFX90A-NEXT: v_mov_b32_e32 v0, v2 2593; GFX90A-NEXT: v_mov_b32_e32 v1, v3 2594; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 2595; GFX90A-NEXT: s_nop 0 2596; GFX90A-NEXT: v_mov_b32_e32 v0, v4 2597; GFX90A-NEXT: v_mov_b32_e32 v1, v5 2598; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2599; GFX90A-NEXT: s_waitcnt vmcnt(0) 2600; GFX90A-NEXT: s_setpc_b64 s[30:31] 2601; 2602; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_1_1_1: 2603; GFX940: ; %bb.0: 2604; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2605; GFX940-NEXT: ;;#ASMSTART 2606; GFX940-NEXT: ; def v[0:5] 2607; GFX940-NEXT: ;;#ASMEND 2608; GFX940-NEXT: v_mov_b32_e32 v6, 0 2609; GFX940-NEXT: v_mov_b32_e32 v0, v2 2610; GFX940-NEXT: v_mov_b32_e32 v1, v3 2611; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 2612; GFX940-NEXT: s_nop 1 2613; GFX940-NEXT: v_mov_b32_e32 v0, v4 2614; GFX940-NEXT: v_mov_b32_e32 v1, v5 2615; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 2616; GFX940-NEXT: s_waitcnt vmcnt(0) 2617; GFX940-NEXT: s_setpc_b64 s[30:31] 2618 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2619 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 2620 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2621 ret void 2622} 2623 2624define void @v_shuffle_v4i64_v3i64__3_1_1_1(ptr addrspace(1) inreg %ptr) { 2625; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_1_1_1: 2626; GFX900: ; %bb.0: 2627; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2628; GFX900-NEXT: ;;#ASMSTART 2629; GFX900-NEXT: ; def v[0:5] 2630; GFX900-NEXT: ;;#ASMEND 2631; GFX900-NEXT: v_mov_b32_e32 v6, 0 2632; GFX900-NEXT: v_mov_b32_e32 v4, v2 2633; GFX900-NEXT: v_mov_b32_e32 v5, v3 2634; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 2635; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2636; GFX900-NEXT: s_waitcnt vmcnt(0) 2637; GFX900-NEXT: s_setpc_b64 s[30:31] 2638; 2639; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_1_1_1: 2640; GFX90A: ; %bb.0: 2641; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2642; GFX90A-NEXT: ;;#ASMSTART 2643; GFX90A-NEXT: ; def v[0:5] 2644; GFX90A-NEXT: ;;#ASMEND 2645; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2646; GFX90A-NEXT: v_mov_b32_e32 v4, v2 2647; GFX90A-NEXT: v_mov_b32_e32 v5, v3 2648; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 2649; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2650; GFX90A-NEXT: s_waitcnt vmcnt(0) 2651; GFX90A-NEXT: s_setpc_b64 s[30:31] 2652; 2653; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_1_1_1: 2654; GFX940: ; %bb.0: 2655; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2656; GFX940-NEXT: ;;#ASMSTART 2657; GFX940-NEXT: ; def v[0:5] 2658; GFX940-NEXT: ;;#ASMEND 2659; GFX940-NEXT: v_mov_b32_e32 v6, 0 2660; GFX940-NEXT: v_mov_b32_e32 v4, v2 2661; GFX940-NEXT: v_mov_b32_e32 v5, v3 2662; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 2663; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 2664; GFX940-NEXT: s_waitcnt vmcnt(0) 2665; GFX940-NEXT: s_setpc_b64 s[30:31] 2666 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2667 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 2668 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2669 ret void 2670} 2671 2672define void @v_shuffle_v4i64_v3i64__4_1_1_1(ptr addrspace(1) inreg %ptr) { 2673; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_1_1_1: 2674; GFX900: ; %bb.0: 2675; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2676; GFX900-NEXT: ;;#ASMSTART 2677; GFX900-NEXT: ; def v[0:5] 2678; GFX900-NEXT: ;;#ASMEND 2679; GFX900-NEXT: ;;#ASMSTART 2680; GFX900-NEXT: ; def v[4:9] 2681; GFX900-NEXT: ;;#ASMEND 2682; GFX900-NEXT: v_mov_b32_e32 v10, 0 2683; GFX900-NEXT: v_mov_b32_e32 v4, v2 2684; GFX900-NEXT: v_mov_b32_e32 v5, v3 2685; GFX900-NEXT: v_mov_b32_e32 v8, v2 2686; GFX900-NEXT: v_mov_b32_e32 v9, v3 2687; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 2688; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 2689; GFX900-NEXT: s_waitcnt vmcnt(0) 2690; GFX900-NEXT: s_setpc_b64 s[30:31] 2691; 2692; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_1_1_1: 2693; GFX90A: ; %bb.0: 2694; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2695; GFX90A-NEXT: ;;#ASMSTART 2696; GFX90A-NEXT: ; def v[0:5] 2697; GFX90A-NEXT: ;;#ASMEND 2698; GFX90A-NEXT: ;;#ASMSTART 2699; GFX90A-NEXT: ; def v[4:9] 2700; GFX90A-NEXT: ;;#ASMEND 2701; GFX90A-NEXT: v_mov_b32_e32 v10, 0 2702; GFX90A-NEXT: v_mov_b32_e32 v4, v2 2703; GFX90A-NEXT: v_mov_b32_e32 v5, v3 2704; GFX90A-NEXT: v_mov_b32_e32 v8, v2 2705; GFX90A-NEXT: v_mov_b32_e32 v9, v3 2706; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 2707; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 2708; GFX90A-NEXT: s_waitcnt vmcnt(0) 2709; GFX90A-NEXT: s_setpc_b64 s[30:31] 2710; 2711; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_1_1_1: 2712; GFX940: ; %bb.0: 2713; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2714; GFX940-NEXT: ;;#ASMSTART 2715; GFX940-NEXT: ; def v[0:5] 2716; GFX940-NEXT: ;;#ASMEND 2717; GFX940-NEXT: v_mov_b32_e32 v10, 0 2718; GFX940-NEXT: ;;#ASMSTART 2719; GFX940-NEXT: ; def v[4:9] 2720; GFX940-NEXT: ;;#ASMEND 2721; GFX940-NEXT: s_nop 0 2722; GFX940-NEXT: v_mov_b32_e32 v4, v2 2723; GFX940-NEXT: v_mov_b32_e32 v5, v3 2724; GFX940-NEXT: v_mov_b32_e32 v8, v2 2725; GFX940-NEXT: v_mov_b32_e32 v9, v3 2726; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 2727; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 2728; GFX940-NEXT: s_waitcnt vmcnt(0) 2729; GFX940-NEXT: s_setpc_b64 s[30:31] 2730 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2731 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2732 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1> 2733 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2734 ret void 2735} 2736 2737define void @v_shuffle_v4i64_v3i64__5_1_1_1(ptr addrspace(1) inreg %ptr) { 2738; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_1_1: 2739; GFX900: ; %bb.0: 2740; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2741; GFX900-NEXT: ;;#ASMSTART 2742; GFX900-NEXT: ; def v[0:5] 2743; GFX900-NEXT: ;;#ASMEND 2744; GFX900-NEXT: ;;#ASMSTART 2745; GFX900-NEXT: ; def v[4:9] 2746; GFX900-NEXT: ;;#ASMEND 2747; GFX900-NEXT: v_mov_b32_e32 v10, 0 2748; GFX900-NEXT: v_mov_b32_e32 v4, v2 2749; GFX900-NEXT: v_mov_b32_e32 v5, v3 2750; GFX900-NEXT: v_mov_b32_e32 v0, v8 2751; GFX900-NEXT: v_mov_b32_e32 v1, v9 2752; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 2753; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 2754; GFX900-NEXT: s_waitcnt vmcnt(0) 2755; GFX900-NEXT: s_setpc_b64 s[30:31] 2756; 2757; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_1_1: 2758; GFX90A: ; %bb.0: 2759; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2760; GFX90A-NEXT: ;;#ASMSTART 2761; GFX90A-NEXT: ; def v[0:5] 2762; GFX90A-NEXT: ;;#ASMEND 2763; GFX90A-NEXT: ;;#ASMSTART 2764; GFX90A-NEXT: ; def v[4:9] 2765; GFX90A-NEXT: ;;#ASMEND 2766; GFX90A-NEXT: v_mov_b32_e32 v10, 0 2767; GFX90A-NEXT: v_mov_b32_e32 v4, v2 2768; GFX90A-NEXT: v_mov_b32_e32 v5, v3 2769; GFX90A-NEXT: v_mov_b32_e32 v0, v8 2770; GFX90A-NEXT: v_mov_b32_e32 v1, v9 2771; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 2772; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 2773; GFX90A-NEXT: s_waitcnt vmcnt(0) 2774; GFX90A-NEXT: s_setpc_b64 s[30:31] 2775; 2776; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_1_1: 2777; GFX940: ; %bb.0: 2778; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2779; GFX940-NEXT: ;;#ASMSTART 2780; GFX940-NEXT: ; def v[0:5] 2781; GFX940-NEXT: ;;#ASMEND 2782; GFX940-NEXT: v_mov_b32_e32 v10, 0 2783; GFX940-NEXT: ;;#ASMSTART 2784; GFX940-NEXT: ; def v[4:9] 2785; GFX940-NEXT: ;;#ASMEND 2786; GFX940-NEXT: s_nop 0 2787; GFX940-NEXT: v_mov_b32_e32 v4, v2 2788; GFX940-NEXT: v_mov_b32_e32 v5, v3 2789; GFX940-NEXT: v_mov_b32_e32 v0, v8 2790; GFX940-NEXT: v_mov_b32_e32 v1, v9 2791; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 2792; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 2793; GFX940-NEXT: s_waitcnt vmcnt(0) 2794; GFX940-NEXT: s_setpc_b64 s[30:31] 2795 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2796 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2797 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1> 2798 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2799 ret void 2800} 2801 2802define void @v_shuffle_v4i64_v3i64__5_u_1_1(ptr addrspace(1) inreg %ptr) { 2803; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_1_1: 2804; GFX900: ; %bb.0: 2805; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2806; GFX900-NEXT: ;;#ASMSTART 2807; GFX900-NEXT: ; def v[0:5] 2808; GFX900-NEXT: ;;#ASMEND 2809; GFX900-NEXT: ;;#ASMSTART 2810; GFX900-NEXT: ; def v[4:9] 2811; GFX900-NEXT: ;;#ASMEND 2812; GFX900-NEXT: v_mov_b32_e32 v10, 0 2813; GFX900-NEXT: v_mov_b32_e32 v4, v2 2814; GFX900-NEXT: v_mov_b32_e32 v5, v3 2815; GFX900-NEXT: v_mov_b32_e32 v0, v8 2816; GFX900-NEXT: v_mov_b32_e32 v1, v9 2817; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 2818; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 2819; GFX900-NEXT: s_waitcnt vmcnt(0) 2820; GFX900-NEXT: s_setpc_b64 s[30:31] 2821; 2822; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_1_1: 2823; GFX90A: ; %bb.0: 2824; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2825; GFX90A-NEXT: ;;#ASMSTART 2826; GFX90A-NEXT: ; def v[0:5] 2827; GFX90A-NEXT: ;;#ASMEND 2828; GFX90A-NEXT: ;;#ASMSTART 2829; GFX90A-NEXT: ; def v[4:9] 2830; GFX90A-NEXT: ;;#ASMEND 2831; GFX90A-NEXT: v_mov_b32_e32 v10, 0 2832; GFX90A-NEXT: v_mov_b32_e32 v4, v2 2833; GFX90A-NEXT: v_mov_b32_e32 v5, v3 2834; GFX90A-NEXT: v_mov_b32_e32 v0, v8 2835; GFX90A-NEXT: v_mov_b32_e32 v1, v9 2836; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 2837; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 2838; GFX90A-NEXT: s_waitcnt vmcnt(0) 2839; GFX90A-NEXT: s_setpc_b64 s[30:31] 2840; 2841; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_1_1: 2842; GFX940: ; %bb.0: 2843; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2844; GFX940-NEXT: ;;#ASMSTART 2845; GFX940-NEXT: ; def v[0:5] 2846; GFX940-NEXT: ;;#ASMEND 2847; GFX940-NEXT: v_mov_b32_e32 v10, 0 2848; GFX940-NEXT: ;;#ASMSTART 2849; GFX940-NEXT: ; def v[4:9] 2850; GFX940-NEXT: ;;#ASMEND 2851; GFX940-NEXT: s_nop 0 2852; GFX940-NEXT: v_mov_b32_e32 v4, v2 2853; GFX940-NEXT: v_mov_b32_e32 v5, v3 2854; GFX940-NEXT: v_mov_b32_e32 v0, v8 2855; GFX940-NEXT: v_mov_b32_e32 v1, v9 2856; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 2857; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 2858; GFX940-NEXT: s_waitcnt vmcnt(0) 2859; GFX940-NEXT: s_setpc_b64 s[30:31] 2860 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2861 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2862 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1> 2863 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2864 ret void 2865} 2866 2867define void @v_shuffle_v4i64_v3i64__5_0_1_1(ptr addrspace(1) inreg %ptr) { 2868; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_1_1: 2869; GFX900: ; %bb.0: 2870; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2871; GFX900-NEXT: ;;#ASMSTART 2872; GFX900-NEXT: ; def v[0:5] 2873; GFX900-NEXT: ;;#ASMEND 2874; GFX900-NEXT: ;;#ASMSTART 2875; GFX900-NEXT: ; def v[4:9] 2876; GFX900-NEXT: ;;#ASMEND 2877; GFX900-NEXT: v_mov_b32_e32 v10, 0 2878; GFX900-NEXT: v_mov_b32_e32 v4, v2 2879; GFX900-NEXT: v_mov_b32_e32 v5, v3 2880; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 2881; GFX900-NEXT: s_nop 0 2882; GFX900-NEXT: v_mov_b32_e32 v2, v8 2883; GFX900-NEXT: v_mov_b32_e32 v3, v9 2884; GFX900-NEXT: v_mov_b32_e32 v4, v0 2885; GFX900-NEXT: v_mov_b32_e32 v5, v1 2886; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 2887; GFX900-NEXT: s_waitcnt vmcnt(0) 2888; GFX900-NEXT: s_setpc_b64 s[30:31] 2889; 2890; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_1_1: 2891; GFX90A: ; %bb.0: 2892; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2893; GFX90A-NEXT: ;;#ASMSTART 2894; GFX90A-NEXT: ; def v[0:5] 2895; GFX90A-NEXT: ;;#ASMEND 2896; GFX90A-NEXT: ;;#ASMSTART 2897; GFX90A-NEXT: ; def v[4:9] 2898; GFX90A-NEXT: ;;#ASMEND 2899; GFX90A-NEXT: v_mov_b32_e32 v10, 0 2900; GFX90A-NEXT: v_mov_b32_e32 v4, v2 2901; GFX90A-NEXT: v_mov_b32_e32 v5, v3 2902; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 2903; GFX90A-NEXT: s_nop 0 2904; GFX90A-NEXT: v_mov_b32_e32 v2, v8 2905; GFX90A-NEXT: v_mov_b32_e32 v3, v9 2906; GFX90A-NEXT: v_mov_b32_e32 v4, v0 2907; GFX90A-NEXT: v_mov_b32_e32 v5, v1 2908; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 2909; GFX90A-NEXT: s_waitcnt vmcnt(0) 2910; GFX90A-NEXT: s_setpc_b64 s[30:31] 2911; 2912; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_1_1: 2913; GFX940: ; %bb.0: 2914; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2915; GFX940-NEXT: ;;#ASMSTART 2916; GFX940-NEXT: ; def v[0:5] 2917; GFX940-NEXT: ;;#ASMEND 2918; GFX940-NEXT: v_mov_b32_e32 v10, 0 2919; GFX940-NEXT: ;;#ASMSTART 2920; GFX940-NEXT: ; def v[4:9] 2921; GFX940-NEXT: ;;#ASMEND 2922; GFX940-NEXT: s_nop 0 2923; GFX940-NEXT: v_mov_b32_e32 v4, v2 2924; GFX940-NEXT: v_mov_b32_e32 v5, v3 2925; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 2926; GFX940-NEXT: s_nop 1 2927; GFX940-NEXT: v_mov_b32_e32 v2, v8 2928; GFX940-NEXT: v_mov_b32_e32 v3, v9 2929; GFX940-NEXT: v_mov_b32_e32 v4, v0 2930; GFX940-NEXT: v_mov_b32_e32 v5, v1 2931; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 2932; GFX940-NEXT: s_waitcnt vmcnt(0) 2933; GFX940-NEXT: s_setpc_b64 s[30:31] 2934 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2935 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2936 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1> 2937 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2938 ret void 2939} 2940 2941define void @v_shuffle_v4i64_v3i64__5_2_1_1(ptr addrspace(1) inreg %ptr) { 2942; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_1_1: 2943; GFX900: ; %bb.0: 2944; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2945; GFX900-NEXT: ;;#ASMSTART 2946; GFX900-NEXT: ; def v[0:5] 2947; GFX900-NEXT: ;;#ASMEND 2948; GFX900-NEXT: v_mov_b32_e32 v12, 0 2949; GFX900-NEXT: v_mov_b32_e32 v0, v2 2950; GFX900-NEXT: v_mov_b32_e32 v1, v3 2951; GFX900-NEXT: ;;#ASMSTART 2952; GFX900-NEXT: ; def v[6:11] 2953; GFX900-NEXT: ;;#ASMEND 2954; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 2955; GFX900-NEXT: s_nop 0 2956; GFX900-NEXT: v_mov_b32_e32 v2, v10 2957; GFX900-NEXT: v_mov_b32_e32 v3, v11 2958; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 2959; GFX900-NEXT: s_waitcnt vmcnt(0) 2960; GFX900-NEXT: s_setpc_b64 s[30:31] 2961; 2962; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_1_1: 2963; GFX90A: ; %bb.0: 2964; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2965; GFX90A-NEXT: ;;#ASMSTART 2966; GFX90A-NEXT: ; def v[0:5] 2967; GFX90A-NEXT: ;;#ASMEND 2968; GFX90A-NEXT: v_mov_b32_e32 v12, 0 2969; GFX90A-NEXT: v_mov_b32_e32 v0, v2 2970; GFX90A-NEXT: v_mov_b32_e32 v1, v3 2971; GFX90A-NEXT: ;;#ASMSTART 2972; GFX90A-NEXT: ; def v[6:11] 2973; GFX90A-NEXT: ;;#ASMEND 2974; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 2975; GFX90A-NEXT: s_nop 0 2976; GFX90A-NEXT: v_mov_b32_e32 v2, v10 2977; GFX90A-NEXT: v_mov_b32_e32 v3, v11 2978; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 2979; GFX90A-NEXT: s_waitcnt vmcnt(0) 2980; GFX90A-NEXT: s_setpc_b64 s[30:31] 2981; 2982; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_1_1: 2983; GFX940: ; %bb.0: 2984; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2985; GFX940-NEXT: ;;#ASMSTART 2986; GFX940-NEXT: ; def v[0:5] 2987; GFX940-NEXT: ;;#ASMEND 2988; GFX940-NEXT: v_mov_b32_e32 v12, 0 2989; GFX940-NEXT: v_mov_b32_e32 v0, v2 2990; GFX940-NEXT: v_mov_b32_e32 v1, v3 2991; GFX940-NEXT: ;;#ASMSTART 2992; GFX940-NEXT: ; def v[6:11] 2993; GFX940-NEXT: ;;#ASMEND 2994; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 2995; GFX940-NEXT: s_nop 1 2996; GFX940-NEXT: v_mov_b32_e32 v2, v10 2997; GFX940-NEXT: v_mov_b32_e32 v3, v11 2998; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 2999; GFX940-NEXT: s_waitcnt vmcnt(0) 3000; GFX940-NEXT: s_setpc_b64 s[30:31] 3001 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3002 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3003 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1> 3004 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3005 ret void 3006} 3007 3008define void @v_shuffle_v4i64_v3i64__5_3_1_1(ptr addrspace(1) inreg %ptr) { 3009; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_1_1: 3010; GFX900: ; %bb.0: 3011; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3012; GFX900-NEXT: ;;#ASMSTART 3013; GFX900-NEXT: ; def v[0:5] 3014; GFX900-NEXT: ;;#ASMEND 3015; GFX900-NEXT: v_mov_b32_e32 v12, 0 3016; GFX900-NEXT: v_mov_b32_e32 v4, v2 3017; GFX900-NEXT: v_mov_b32_e32 v5, v3 3018; GFX900-NEXT: ;;#ASMSTART 3019; GFX900-NEXT: ; def v[6:11] 3020; GFX900-NEXT: ;;#ASMEND 3021; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 3022; GFX900-NEXT: v_mov_b32_e32 v0, v10 3023; GFX900-NEXT: v_mov_b32_e32 v1, v11 3024; GFX900-NEXT: v_mov_b32_e32 v2, v6 3025; GFX900-NEXT: v_mov_b32_e32 v3, v7 3026; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 3027; GFX900-NEXT: s_waitcnt vmcnt(0) 3028; GFX900-NEXT: s_setpc_b64 s[30:31] 3029; 3030; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_1_1: 3031; GFX90A: ; %bb.0: 3032; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3033; GFX90A-NEXT: ;;#ASMSTART 3034; GFX90A-NEXT: ; def v[0:5] 3035; GFX90A-NEXT: ;;#ASMEND 3036; GFX90A-NEXT: v_mov_b32_e32 v12, 0 3037; GFX90A-NEXT: v_mov_b32_e32 v4, v2 3038; GFX90A-NEXT: v_mov_b32_e32 v5, v3 3039; GFX90A-NEXT: ;;#ASMSTART 3040; GFX90A-NEXT: ; def v[6:11] 3041; GFX90A-NEXT: ;;#ASMEND 3042; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 3043; GFX90A-NEXT: v_mov_b32_e32 v0, v10 3044; GFX90A-NEXT: v_mov_b32_e32 v1, v11 3045; GFX90A-NEXT: v_mov_b32_e32 v2, v6 3046; GFX90A-NEXT: v_mov_b32_e32 v3, v7 3047; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 3048; GFX90A-NEXT: s_waitcnt vmcnt(0) 3049; GFX90A-NEXT: s_setpc_b64 s[30:31] 3050; 3051; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_1_1: 3052; GFX940: ; %bb.0: 3053; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3054; GFX940-NEXT: ;;#ASMSTART 3055; GFX940-NEXT: ; def v[0:5] 3056; GFX940-NEXT: ;;#ASMEND 3057; GFX940-NEXT: v_mov_b32_e32 v12, 0 3058; GFX940-NEXT: v_mov_b32_e32 v4, v2 3059; GFX940-NEXT: v_mov_b32_e32 v5, v3 3060; GFX940-NEXT: ;;#ASMSTART 3061; GFX940-NEXT: ; def v[6:11] 3062; GFX940-NEXT: ;;#ASMEND 3063; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 3064; GFX940-NEXT: v_mov_b32_e32 v0, v10 3065; GFX940-NEXT: v_mov_b32_e32 v1, v11 3066; GFX940-NEXT: v_mov_b32_e32 v2, v6 3067; GFX940-NEXT: v_mov_b32_e32 v3, v7 3068; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 3069; GFX940-NEXT: s_waitcnt vmcnt(0) 3070; GFX940-NEXT: s_setpc_b64 s[30:31] 3071 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3072 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3073 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1> 3074 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3075 ret void 3076} 3077 3078define void @v_shuffle_v4i64_v3i64__5_4_1_1(ptr addrspace(1) inreg %ptr) { 3079; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_1_1: 3080; GFX900: ; %bb.0: 3081; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3082; GFX900-NEXT: ;;#ASMSTART 3083; GFX900-NEXT: ; def v[0:5] 3084; GFX900-NEXT: ;;#ASMEND 3085; GFX900-NEXT: ;;#ASMSTART 3086; GFX900-NEXT: ; def v[4:9] 3087; GFX900-NEXT: ;;#ASMEND 3088; GFX900-NEXT: v_mov_b32_e32 v10, 0 3089; GFX900-NEXT: v_mov_b32_e32 v4, v2 3090; GFX900-NEXT: v_mov_b32_e32 v5, v3 3091; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 3092; GFX900-NEXT: s_nop 0 3093; GFX900-NEXT: v_mov_b32_e32 v4, v8 3094; GFX900-NEXT: v_mov_b32_e32 v5, v9 3095; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] 3096; GFX900-NEXT: s_waitcnt vmcnt(0) 3097; GFX900-NEXT: s_setpc_b64 s[30:31] 3098; 3099; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_1_1: 3100; GFX90A: ; %bb.0: 3101; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3102; GFX90A-NEXT: ;;#ASMSTART 3103; GFX90A-NEXT: ; def v[0:5] 3104; GFX90A-NEXT: ;;#ASMEND 3105; GFX90A-NEXT: ;;#ASMSTART 3106; GFX90A-NEXT: ; def v[4:9] 3107; GFX90A-NEXT: ;;#ASMEND 3108; GFX90A-NEXT: v_mov_b32_e32 v10, 0 3109; GFX90A-NEXT: v_mov_b32_e32 v4, v2 3110; GFX90A-NEXT: v_mov_b32_e32 v5, v3 3111; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 3112; GFX90A-NEXT: s_nop 0 3113; GFX90A-NEXT: v_mov_b32_e32 v4, v8 3114; GFX90A-NEXT: v_mov_b32_e32 v5, v9 3115; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] 3116; GFX90A-NEXT: s_waitcnt vmcnt(0) 3117; GFX90A-NEXT: s_setpc_b64 s[30:31] 3118; 3119; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_1_1: 3120; GFX940: ; %bb.0: 3121; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3122; GFX940-NEXT: ;;#ASMSTART 3123; GFX940-NEXT: ; def v[0:5] 3124; GFX940-NEXT: ;;#ASMEND 3125; GFX940-NEXT: v_mov_b32_e32 v10, 0 3126; GFX940-NEXT: ;;#ASMSTART 3127; GFX940-NEXT: ; def v[4:9] 3128; GFX940-NEXT: ;;#ASMEND 3129; GFX940-NEXT: s_nop 0 3130; GFX940-NEXT: v_mov_b32_e32 v4, v2 3131; GFX940-NEXT: v_mov_b32_e32 v5, v3 3132; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 3133; GFX940-NEXT: s_nop 1 3134; GFX940-NEXT: v_mov_b32_e32 v4, v8 3135; GFX940-NEXT: v_mov_b32_e32 v5, v9 3136; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 3137; GFX940-NEXT: s_waitcnt vmcnt(0) 3138; GFX940-NEXT: s_setpc_b64 s[30:31] 3139 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3140 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3141 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1> 3142 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3143 ret void 3144} 3145 3146define void @v_shuffle_v4i64_v3i64__5_5_1_1(ptr addrspace(1) inreg %ptr) { 3147; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_1: 3148; GFX900: ; %bb.0: 3149; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3150; GFX900-NEXT: ;;#ASMSTART 3151; GFX900-NEXT: ; def v[0:5] 3152; GFX900-NEXT: ;;#ASMEND 3153; GFX900-NEXT: ;;#ASMSTART 3154; GFX900-NEXT: ; def v[4:9] 3155; GFX900-NEXT: ;;#ASMEND 3156; GFX900-NEXT: v_mov_b32_e32 v10, 0 3157; GFX900-NEXT: v_mov_b32_e32 v4, v2 3158; GFX900-NEXT: v_mov_b32_e32 v5, v3 3159; GFX900-NEXT: v_mov_b32_e32 v6, v8 3160; GFX900-NEXT: v_mov_b32_e32 v7, v9 3161; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 3162; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 3163; GFX900-NEXT: s_waitcnt vmcnt(0) 3164; GFX900-NEXT: s_setpc_b64 s[30:31] 3165; 3166; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_1: 3167; GFX90A: ; %bb.0: 3168; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3169; GFX90A-NEXT: ;;#ASMSTART 3170; GFX90A-NEXT: ; def v[0:5] 3171; GFX90A-NEXT: ;;#ASMEND 3172; GFX90A-NEXT: ;;#ASMSTART 3173; GFX90A-NEXT: ; def v[4:9] 3174; GFX90A-NEXT: ;;#ASMEND 3175; GFX90A-NEXT: v_mov_b32_e32 v10, 0 3176; GFX90A-NEXT: v_mov_b32_e32 v4, v2 3177; GFX90A-NEXT: v_mov_b32_e32 v5, v3 3178; GFX90A-NEXT: v_mov_b32_e32 v6, v8 3179; GFX90A-NEXT: v_mov_b32_e32 v7, v9 3180; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 3181; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 3182; GFX90A-NEXT: s_waitcnt vmcnt(0) 3183; GFX90A-NEXT: s_setpc_b64 s[30:31] 3184; 3185; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_1: 3186; GFX940: ; %bb.0: 3187; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3188; GFX940-NEXT: ;;#ASMSTART 3189; GFX940-NEXT: ; def v[0:5] 3190; GFX940-NEXT: ;;#ASMEND 3191; GFX940-NEXT: v_mov_b32_e32 v10, 0 3192; GFX940-NEXT: ;;#ASMSTART 3193; GFX940-NEXT: ; def v[4:9] 3194; GFX940-NEXT: ;;#ASMEND 3195; GFX940-NEXT: s_nop 0 3196; GFX940-NEXT: v_mov_b32_e32 v4, v2 3197; GFX940-NEXT: v_mov_b32_e32 v5, v3 3198; GFX940-NEXT: v_mov_b32_e32 v6, v8 3199; GFX940-NEXT: v_mov_b32_e32 v7, v9 3200; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 3201; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 3202; GFX940-NEXT: s_waitcnt vmcnt(0) 3203; GFX940-NEXT: s_setpc_b64 s[30:31] 3204 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3205 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3206 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1> 3207 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3208 ret void 3209} 3210 3211define void @v_shuffle_v4i64_v3i64__5_5_u_1(ptr addrspace(1) inreg %ptr) { 3212; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1: 3213; GFX900: ; %bb.0: 3214; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3215; GFX900-NEXT: ;;#ASMSTART 3216; GFX900-NEXT: ; def v[0:5] 3217; GFX900-NEXT: ;;#ASMEND 3218; GFX900-NEXT: ;;#ASMSTART 3219; GFX900-NEXT: ; def v[4:9] 3220; GFX900-NEXT: ;;#ASMEND 3221; GFX900-NEXT: v_mov_b32_e32 v10, 0 3222; GFX900-NEXT: v_mov_b32_e32 v6, v8 3223; GFX900-NEXT: v_mov_b32_e32 v7, v9 3224; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16 3225; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 3226; GFX900-NEXT: s_waitcnt vmcnt(0) 3227; GFX900-NEXT: s_setpc_b64 s[30:31] 3228; 3229; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1: 3230; GFX90A: ; %bb.0: 3231; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3232; GFX90A-NEXT: ;;#ASMSTART 3233; GFX90A-NEXT: ; def v[0:5] 3234; GFX90A-NEXT: ;;#ASMEND 3235; GFX90A-NEXT: ;;#ASMSTART 3236; GFX90A-NEXT: ; def v[4:9] 3237; GFX90A-NEXT: ;;#ASMEND 3238; GFX90A-NEXT: v_mov_b32_e32 v10, 0 3239; GFX90A-NEXT: v_mov_b32_e32 v6, v8 3240; GFX90A-NEXT: v_mov_b32_e32 v7, v9 3241; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16 3242; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 3243; GFX90A-NEXT: s_waitcnt vmcnt(0) 3244; GFX90A-NEXT: s_setpc_b64 s[30:31] 3245; 3246; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1: 3247; GFX940: ; %bb.0: 3248; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3249; GFX940-NEXT: ;;#ASMSTART 3250; GFX940-NEXT: ; def v[0:5] 3251; GFX940-NEXT: ;;#ASMEND 3252; GFX940-NEXT: v_mov_b32_e32 v10, 0 3253; GFX940-NEXT: ;;#ASMSTART 3254; GFX940-NEXT: ; def v[4:9] 3255; GFX940-NEXT: ;;#ASMEND 3256; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 3257; GFX940-NEXT: v_mov_b32_e32 v6, v8 3258; GFX940-NEXT: v_mov_b32_e32 v7, v9 3259; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 3260; GFX940-NEXT: s_waitcnt vmcnt(0) 3261; GFX940-NEXT: s_setpc_b64 s[30:31] 3262 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3263 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3264 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1> 3265 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3266 ret void 3267} 3268 3269define void @v_shuffle_v4i64_v3i64__5_5_0_1(ptr addrspace(1) inreg %ptr) { 3270; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1: 3271; GFX900: ; %bb.0: 3272; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3273; GFX900-NEXT: ;;#ASMSTART 3274; GFX900-NEXT: ; def v[0:5] 3275; GFX900-NEXT: ;;#ASMEND 3276; GFX900-NEXT: ;;#ASMSTART 3277; GFX900-NEXT: ; def v[4:9] 3278; GFX900-NEXT: ;;#ASMEND 3279; GFX900-NEXT: v_mov_b32_e32 v10, 0 3280; GFX900-NEXT: v_mov_b32_e32 v6, v8 3281; GFX900-NEXT: v_mov_b32_e32 v7, v9 3282; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16 3283; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 3284; GFX900-NEXT: s_waitcnt vmcnt(0) 3285; GFX900-NEXT: s_setpc_b64 s[30:31] 3286; 3287; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1: 3288; GFX90A: ; %bb.0: 3289; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3290; GFX90A-NEXT: ;;#ASMSTART 3291; GFX90A-NEXT: ; def v[0:5] 3292; GFX90A-NEXT: ;;#ASMEND 3293; GFX90A-NEXT: ;;#ASMSTART 3294; GFX90A-NEXT: ; def v[4:9] 3295; GFX90A-NEXT: ;;#ASMEND 3296; GFX90A-NEXT: v_mov_b32_e32 v10, 0 3297; GFX90A-NEXT: v_mov_b32_e32 v6, v8 3298; GFX90A-NEXT: v_mov_b32_e32 v7, v9 3299; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16 3300; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 3301; GFX90A-NEXT: s_waitcnt vmcnt(0) 3302; GFX90A-NEXT: s_setpc_b64 s[30:31] 3303; 3304; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1: 3305; GFX940: ; %bb.0: 3306; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3307; GFX940-NEXT: ;;#ASMSTART 3308; GFX940-NEXT: ; def v[0:5] 3309; GFX940-NEXT: ;;#ASMEND 3310; GFX940-NEXT: v_mov_b32_e32 v10, 0 3311; GFX940-NEXT: ;;#ASMSTART 3312; GFX940-NEXT: ; def v[4:9] 3313; GFX940-NEXT: ;;#ASMEND 3314; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 3315; GFX940-NEXT: v_mov_b32_e32 v6, v8 3316; GFX940-NEXT: v_mov_b32_e32 v7, v9 3317; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 3318; GFX940-NEXT: s_waitcnt vmcnt(0) 3319; GFX940-NEXT: s_setpc_b64 s[30:31] 3320 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3321 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3322 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1> 3323 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3324 ret void 3325} 3326 3327define void @v_shuffle_v4i64_v3i64__5_5_2_1(ptr addrspace(1) inreg %ptr) { 3328; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_1: 3329; GFX900: ; %bb.0: 3330; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3331; GFX900-NEXT: ;;#ASMSTART 3332; GFX900-NEXT: ; def v[0:5] 3333; GFX900-NEXT: ;;#ASMEND 3334; GFX900-NEXT: ;;#ASMSTART 3335; GFX900-NEXT: ; def v[6:11] 3336; GFX900-NEXT: ;;#ASMEND 3337; GFX900-NEXT: v_mov_b32_e32 v12, 0 3338; GFX900-NEXT: v_mov_b32_e32 v0, v4 3339; GFX900-NEXT: v_mov_b32_e32 v1, v5 3340; GFX900-NEXT: v_mov_b32_e32 v8, v10 3341; GFX900-NEXT: v_mov_b32_e32 v9, v11 3342; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 3343; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 3344; GFX900-NEXT: s_waitcnt vmcnt(0) 3345; GFX900-NEXT: s_setpc_b64 s[30:31] 3346; 3347; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_1: 3348; GFX90A: ; %bb.0: 3349; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3350; GFX90A-NEXT: ;;#ASMSTART 3351; GFX90A-NEXT: ; def v[0:5] 3352; GFX90A-NEXT: ;;#ASMEND 3353; GFX90A-NEXT: ;;#ASMSTART 3354; GFX90A-NEXT: ; def v[6:11] 3355; GFX90A-NEXT: ;;#ASMEND 3356; GFX90A-NEXT: v_mov_b32_e32 v12, 0 3357; GFX90A-NEXT: v_mov_b32_e32 v0, v4 3358; GFX90A-NEXT: v_mov_b32_e32 v1, v5 3359; GFX90A-NEXT: v_mov_b32_e32 v8, v10 3360; GFX90A-NEXT: v_mov_b32_e32 v9, v11 3361; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 3362; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 3363; GFX90A-NEXT: s_waitcnt vmcnt(0) 3364; GFX90A-NEXT: s_setpc_b64 s[30:31] 3365; 3366; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_1: 3367; GFX940: ; %bb.0: 3368; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3369; GFX940-NEXT: ;;#ASMSTART 3370; GFX940-NEXT: ; def v[0:5] 3371; GFX940-NEXT: ;;#ASMEND 3372; GFX940-NEXT: ;;#ASMSTART 3373; GFX940-NEXT: ; def v[6:11] 3374; GFX940-NEXT: ;;#ASMEND 3375; GFX940-NEXT: v_mov_b32_e32 v12, 0 3376; GFX940-NEXT: v_mov_b32_e32 v0, v4 3377; GFX940-NEXT: v_mov_b32_e32 v1, v5 3378; GFX940-NEXT: v_mov_b32_e32 v8, v10 3379; GFX940-NEXT: v_mov_b32_e32 v9, v11 3380; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 3381; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 3382; GFX940-NEXT: s_waitcnt vmcnt(0) 3383; GFX940-NEXT: s_setpc_b64 s[30:31] 3384 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3385 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3386 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1> 3387 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3388 ret void 3389} 3390 3391define void @v_shuffle_v4i64_v3i64__5_5_3_1(ptr addrspace(1) inreg %ptr) { 3392; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1: 3393; GFX900: ; %bb.0: 3394; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3395; GFX900-NEXT: ;;#ASMSTART 3396; GFX900-NEXT: ; def v[0:5] 3397; GFX900-NEXT: ;;#ASMEND 3398; GFX900-NEXT: ;;#ASMSTART 3399; GFX900-NEXT: ; def v[4:9] 3400; GFX900-NEXT: ;;#ASMEND 3401; GFX900-NEXT: v_mov_b32_e32 v10, 0 3402; GFX900-NEXT: v_mov_b32_e32 v6, v2 3403; GFX900-NEXT: v_mov_b32_e32 v7, v3 3404; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16 3405; GFX900-NEXT: s_nop 0 3406; GFX900-NEXT: v_mov_b32_e32 v6, v8 3407; GFX900-NEXT: v_mov_b32_e32 v7, v9 3408; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 3409; GFX900-NEXT: s_waitcnt vmcnt(0) 3410; GFX900-NEXT: s_setpc_b64 s[30:31] 3411; 3412; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1: 3413; GFX90A: ; %bb.0: 3414; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3415; GFX90A-NEXT: ;;#ASMSTART 3416; GFX90A-NEXT: ; def v[0:5] 3417; GFX90A-NEXT: ;;#ASMEND 3418; GFX90A-NEXT: ;;#ASMSTART 3419; GFX90A-NEXT: ; def v[4:9] 3420; GFX90A-NEXT: ;;#ASMEND 3421; GFX90A-NEXT: v_mov_b32_e32 v10, 0 3422; GFX90A-NEXT: v_mov_b32_e32 v6, v2 3423; GFX90A-NEXT: v_mov_b32_e32 v7, v3 3424; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16 3425; GFX90A-NEXT: s_nop 0 3426; GFX90A-NEXT: v_mov_b32_e32 v6, v8 3427; GFX90A-NEXT: v_mov_b32_e32 v7, v9 3428; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 3429; GFX90A-NEXT: s_waitcnt vmcnt(0) 3430; GFX90A-NEXT: s_setpc_b64 s[30:31] 3431; 3432; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1: 3433; GFX940: ; %bb.0: 3434; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3435; GFX940-NEXT: ;;#ASMSTART 3436; GFX940-NEXT: ; def v[0:5] 3437; GFX940-NEXT: ;;#ASMEND 3438; GFX940-NEXT: v_mov_b32_e32 v10, 0 3439; GFX940-NEXT: ;;#ASMSTART 3440; GFX940-NEXT: ; def v[4:9] 3441; GFX940-NEXT: ;;#ASMEND 3442; GFX940-NEXT: s_nop 0 3443; GFX940-NEXT: v_mov_b32_e32 v6, v2 3444; GFX940-NEXT: v_mov_b32_e32 v7, v3 3445; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 3446; GFX940-NEXT: s_nop 1 3447; GFX940-NEXT: v_mov_b32_e32 v6, v8 3448; GFX940-NEXT: v_mov_b32_e32 v7, v9 3449; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 3450; GFX940-NEXT: s_waitcnt vmcnt(0) 3451; GFX940-NEXT: s_setpc_b64 s[30:31] 3452 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3453 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3454 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1> 3455 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3456 ret void 3457} 3458 3459define void @v_shuffle_v4i64_v3i64__5_5_4_1(ptr addrspace(1) inreg %ptr) { 3460; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_1: 3461; GFX900: ; %bb.0: 3462; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3463; GFX900-NEXT: ;;#ASMSTART 3464; GFX900-NEXT: ; def v[0:5] 3465; GFX900-NEXT: ;;#ASMEND 3466; GFX900-NEXT: ;;#ASMSTART 3467; GFX900-NEXT: ; def v[4:9] 3468; GFX900-NEXT: ;;#ASMEND 3469; GFX900-NEXT: v_mov_b32_e32 v10, 0 3470; GFX900-NEXT: v_mov_b32_e32 v0, v6 3471; GFX900-NEXT: v_mov_b32_e32 v1, v7 3472; GFX900-NEXT: v_mov_b32_e32 v6, v8 3473; GFX900-NEXT: v_mov_b32_e32 v7, v9 3474; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16 3475; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 3476; GFX900-NEXT: s_waitcnt vmcnt(0) 3477; GFX900-NEXT: s_setpc_b64 s[30:31] 3478; 3479; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_1: 3480; GFX90A: ; %bb.0: 3481; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3482; GFX90A-NEXT: ;;#ASMSTART 3483; GFX90A-NEXT: ; def v[0:5] 3484; GFX90A-NEXT: ;;#ASMEND 3485; GFX90A-NEXT: ;;#ASMSTART 3486; GFX90A-NEXT: ; def v[4:9] 3487; GFX90A-NEXT: ;;#ASMEND 3488; GFX90A-NEXT: v_mov_b32_e32 v10, 0 3489; GFX90A-NEXT: v_mov_b32_e32 v0, v6 3490; GFX90A-NEXT: v_mov_b32_e32 v1, v7 3491; GFX90A-NEXT: v_mov_b32_e32 v6, v8 3492; GFX90A-NEXT: v_mov_b32_e32 v7, v9 3493; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16 3494; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 3495; GFX90A-NEXT: s_waitcnt vmcnt(0) 3496; GFX90A-NEXT: s_setpc_b64 s[30:31] 3497; 3498; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_1: 3499; GFX940: ; %bb.0: 3500; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3501; GFX940-NEXT: ;;#ASMSTART 3502; GFX940-NEXT: ; def v[0:5] 3503; GFX940-NEXT: ;;#ASMEND 3504; GFX940-NEXT: v_mov_b32_e32 v10, 0 3505; GFX940-NEXT: ;;#ASMSTART 3506; GFX940-NEXT: ; def v[4:9] 3507; GFX940-NEXT: ;;#ASMEND 3508; GFX940-NEXT: s_nop 0 3509; GFX940-NEXT: v_mov_b32_e32 v0, v6 3510; GFX940-NEXT: v_mov_b32_e32 v1, v7 3511; GFX940-NEXT: v_mov_b32_e32 v6, v8 3512; GFX940-NEXT: v_mov_b32_e32 v7, v9 3513; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1 3514; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 3515; GFX940-NEXT: s_waitcnt vmcnt(0) 3516; GFX940-NEXT: s_setpc_b64 s[30:31] 3517 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3518 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3519 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1> 3520 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3521 ret void 3522} 3523 3524define void @v_shuffle_v4i64_v3i64__u_2_2_2(ptr addrspace(1) inreg %ptr) { 3525; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2: 3526; GFX900: ; %bb.0: 3527; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3528; GFX900-NEXT: ;;#ASMSTART 3529; GFX900-NEXT: ; def v[0:5] 3530; GFX900-NEXT: ;;#ASMEND 3531; GFX900-NEXT: v_mov_b32_e32 v6, 0 3532; GFX900-NEXT: v_mov_b32_e32 v2, v4 3533; GFX900-NEXT: v_mov_b32_e32 v3, v5 3534; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 3535; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3536; GFX900-NEXT: s_waitcnt vmcnt(0) 3537; GFX900-NEXT: s_setpc_b64 s[30:31] 3538; 3539; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2: 3540; GFX90A: ; %bb.0: 3541; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3542; GFX90A-NEXT: ;;#ASMSTART 3543; GFX90A-NEXT: ; def v[0:5] 3544; GFX90A-NEXT: ;;#ASMEND 3545; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3546; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3547; GFX90A-NEXT: v_mov_b32_e32 v3, v5 3548; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 3549; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3550; GFX90A-NEXT: s_waitcnt vmcnt(0) 3551; GFX90A-NEXT: s_setpc_b64 s[30:31] 3552; 3553; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2: 3554; GFX940: ; %bb.0: 3555; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3556; GFX940-NEXT: ;;#ASMSTART 3557; GFX940-NEXT: ; def v[0:5] 3558; GFX940-NEXT: ;;#ASMEND 3559; GFX940-NEXT: v_mov_b32_e32 v6, 0 3560; GFX940-NEXT: v_mov_b32_e32 v2, v4 3561; GFX940-NEXT: v_mov_b32_e32 v3, v5 3562; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 3563; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 3564; GFX940-NEXT: s_waitcnt vmcnt(0) 3565; GFX940-NEXT: s_setpc_b64 s[30:31] 3566 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3567 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2> 3568 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3569 ret void 3570} 3571 3572define void @v_shuffle_v4i64_v3i64__0_2_2_2(ptr addrspace(1) inreg %ptr) { 3573; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_2_2_2: 3574; GFX900: ; %bb.0: 3575; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3576; GFX900-NEXT: ;;#ASMSTART 3577; GFX900-NEXT: ; def v[0:5] 3578; GFX900-NEXT: ;;#ASMEND 3579; GFX900-NEXT: v_mov_b32_e32 v6, 0 3580; GFX900-NEXT: v_mov_b32_e32 v2, v4 3581; GFX900-NEXT: v_mov_b32_e32 v3, v5 3582; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 3583; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3584; GFX900-NEXT: s_waitcnt vmcnt(0) 3585; GFX900-NEXT: s_setpc_b64 s[30:31] 3586; 3587; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_2_2_2: 3588; GFX90A: ; %bb.0: 3589; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3590; GFX90A-NEXT: ;;#ASMSTART 3591; GFX90A-NEXT: ; def v[0:5] 3592; GFX90A-NEXT: ;;#ASMEND 3593; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3594; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3595; GFX90A-NEXT: v_mov_b32_e32 v3, v5 3596; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 3597; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3598; GFX90A-NEXT: s_waitcnt vmcnt(0) 3599; GFX90A-NEXT: s_setpc_b64 s[30:31] 3600; 3601; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_2_2_2: 3602; GFX940: ; %bb.0: 3603; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3604; GFX940-NEXT: ;;#ASMSTART 3605; GFX940-NEXT: ; def v[0:5] 3606; GFX940-NEXT: ;;#ASMEND 3607; GFX940-NEXT: v_mov_b32_e32 v6, 0 3608; GFX940-NEXT: v_mov_b32_e32 v2, v4 3609; GFX940-NEXT: v_mov_b32_e32 v3, v5 3610; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 3611; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 3612; GFX940-NEXT: s_waitcnt vmcnt(0) 3613; GFX940-NEXT: s_setpc_b64 s[30:31] 3614 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3615 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2> 3616 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3617 ret void 3618} 3619 3620define void @v_shuffle_v4i64_v3i64__1_2_2_2(ptr addrspace(1) inreg %ptr) { 3621; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2: 3622; GFX900: ; %bb.0: 3623; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3624; GFX900-NEXT: v_mov_b32_e32 v10, 0 3625; GFX900-NEXT: ;;#ASMSTART 3626; GFX900-NEXT: ; def v[0:5] 3627; GFX900-NEXT: ;;#ASMEND 3628; GFX900-NEXT: v_mov_b32_e32 v6, v4 3629; GFX900-NEXT: v_mov_b32_e32 v7, v5 3630; GFX900-NEXT: v_mov_b32_e32 v8, v4 3631; GFX900-NEXT: v_mov_b32_e32 v9, v5 3632; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 3633; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 3634; GFX900-NEXT: s_waitcnt vmcnt(0) 3635; GFX900-NEXT: s_setpc_b64 s[30:31] 3636; 3637; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2: 3638; GFX90A: ; %bb.0: 3639; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3640; GFX90A-NEXT: v_mov_b32_e32 v10, 0 3641; GFX90A-NEXT: ;;#ASMSTART 3642; GFX90A-NEXT: ; def v[0:5] 3643; GFX90A-NEXT: ;;#ASMEND 3644; GFX90A-NEXT: v_mov_b32_e32 v6, v4 3645; GFX90A-NEXT: v_mov_b32_e32 v7, v5 3646; GFX90A-NEXT: v_mov_b32_e32 v8, v4 3647; GFX90A-NEXT: v_mov_b32_e32 v9, v5 3648; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 3649; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 3650; GFX90A-NEXT: s_waitcnt vmcnt(0) 3651; GFX90A-NEXT: s_setpc_b64 s[30:31] 3652; 3653; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2: 3654; GFX940: ; %bb.0: 3655; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3656; GFX940-NEXT: v_mov_b32_e32 v10, 0 3657; GFX940-NEXT: ;;#ASMSTART 3658; GFX940-NEXT: ; def v[0:5] 3659; GFX940-NEXT: ;;#ASMEND 3660; GFX940-NEXT: s_nop 0 3661; GFX940-NEXT: v_mov_b32_e32 v6, v4 3662; GFX940-NEXT: v_mov_b32_e32 v7, v5 3663; GFX940-NEXT: v_mov_b32_e32 v8, v4 3664; GFX940-NEXT: v_mov_b32_e32 v9, v5 3665; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 3666; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 3667; GFX940-NEXT: s_waitcnt vmcnt(0) 3668; GFX940-NEXT: s_setpc_b64 s[30:31] 3669 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3670 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2> 3671 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3672 ret void 3673} 3674 3675define void @v_shuffle_v4i64_v3i64__2_2_2_2(ptr addrspace(1) inreg %ptr) { 3676; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2: 3677; GFX900: ; %bb.0: 3678; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3679; GFX900-NEXT: ;;#ASMSTART 3680; GFX900-NEXT: ; def v[0:5] 3681; GFX900-NEXT: ;;#ASMEND 3682; GFX900-NEXT: v_mov_b32_e32 v6, 0 3683; GFX900-NEXT: v_mov_b32_e32 v2, v4 3684; GFX900-NEXT: v_mov_b32_e32 v3, v5 3685; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 3686; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3687; GFX900-NEXT: s_waitcnt vmcnt(0) 3688; GFX900-NEXT: s_setpc_b64 s[30:31] 3689; 3690; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2: 3691; GFX90A: ; %bb.0: 3692; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3693; GFX90A-NEXT: ;;#ASMSTART 3694; GFX90A-NEXT: ; def v[0:5] 3695; GFX90A-NEXT: ;;#ASMEND 3696; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3697; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3698; GFX90A-NEXT: v_mov_b32_e32 v3, v5 3699; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 3700; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3701; GFX90A-NEXT: s_waitcnt vmcnt(0) 3702; GFX90A-NEXT: s_setpc_b64 s[30:31] 3703; 3704; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2: 3705; GFX940: ; %bb.0: 3706; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3707; GFX940-NEXT: ;;#ASMSTART 3708; GFX940-NEXT: ; def v[0:5] 3709; GFX940-NEXT: ;;#ASMEND 3710; GFX940-NEXT: v_mov_b32_e32 v6, 0 3711; GFX940-NEXT: v_mov_b32_e32 v2, v4 3712; GFX940-NEXT: v_mov_b32_e32 v3, v5 3713; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 3714; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 3715; GFX940-NEXT: s_waitcnt vmcnt(0) 3716; GFX940-NEXT: s_setpc_b64 s[30:31] 3717 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3718 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 3719 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3720 ret void 3721} 3722 3723define void @v_shuffle_v4i64_v3i64__3_2_2_2(ptr addrspace(1) inreg %ptr) { 3724; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2: 3725; GFX900: ; %bb.0: 3726; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3727; GFX900-NEXT: ;;#ASMSTART 3728; GFX900-NEXT: ; def v[0:5] 3729; GFX900-NEXT: ;;#ASMEND 3730; GFX900-NEXT: v_mov_b32_e32 v6, 0 3731; GFX900-NEXT: v_mov_b32_e32 v2, v4 3732; GFX900-NEXT: v_mov_b32_e32 v3, v5 3733; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 3734; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3735; GFX900-NEXT: s_waitcnt vmcnt(0) 3736; GFX900-NEXT: s_setpc_b64 s[30:31] 3737; 3738; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2: 3739; GFX90A: ; %bb.0: 3740; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3741; GFX90A-NEXT: ;;#ASMSTART 3742; GFX90A-NEXT: ; def v[0:5] 3743; GFX90A-NEXT: ;;#ASMEND 3744; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3745; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3746; GFX90A-NEXT: v_mov_b32_e32 v3, v5 3747; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 3748; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3749; GFX90A-NEXT: s_waitcnt vmcnt(0) 3750; GFX90A-NEXT: s_setpc_b64 s[30:31] 3751; 3752; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2: 3753; GFX940: ; %bb.0: 3754; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3755; GFX940-NEXT: ;;#ASMSTART 3756; GFX940-NEXT: ; def v[0:5] 3757; GFX940-NEXT: ;;#ASMEND 3758; GFX940-NEXT: v_mov_b32_e32 v6, 0 3759; GFX940-NEXT: v_mov_b32_e32 v2, v4 3760; GFX940-NEXT: v_mov_b32_e32 v3, v5 3761; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 3762; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 3763; GFX940-NEXT: s_waitcnt vmcnt(0) 3764; GFX940-NEXT: s_setpc_b64 s[30:31] 3765 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3766 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2> 3767 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3768 ret void 3769} 3770 3771define void @v_shuffle_v4i64_v3i64__4_2_2_2(ptr addrspace(1) inreg %ptr) { 3772; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_2_2_2: 3773; GFX900: ; %bb.0: 3774; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3775; GFX900-NEXT: ;;#ASMSTART 3776; GFX900-NEXT: ; def v[0:5] 3777; GFX900-NEXT: ;;#ASMEND 3778; GFX900-NEXT: ;;#ASMSTART 3779; GFX900-NEXT: ; def v[6:11] 3780; GFX900-NEXT: ;;#ASMEND 3781; GFX900-NEXT: v_mov_b32_e32 v12, 0 3782; GFX900-NEXT: v_mov_b32_e32 v2, v4 3783; GFX900-NEXT: v_mov_b32_e32 v3, v5 3784; GFX900-NEXT: v_mov_b32_e32 v10, v4 3785; GFX900-NEXT: v_mov_b32_e32 v11, v5 3786; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 3787; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 3788; GFX900-NEXT: s_waitcnt vmcnt(0) 3789; GFX900-NEXT: s_setpc_b64 s[30:31] 3790; 3791; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_2_2_2: 3792; GFX90A: ; %bb.0: 3793; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3794; GFX90A-NEXT: ;;#ASMSTART 3795; GFX90A-NEXT: ; def v[0:5] 3796; GFX90A-NEXT: ;;#ASMEND 3797; GFX90A-NEXT: ;;#ASMSTART 3798; GFX90A-NEXT: ; def v[6:11] 3799; GFX90A-NEXT: ;;#ASMEND 3800; GFX90A-NEXT: v_mov_b32_e32 v12, 0 3801; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3802; GFX90A-NEXT: v_mov_b32_e32 v3, v5 3803; GFX90A-NEXT: v_mov_b32_e32 v10, v4 3804; GFX90A-NEXT: v_mov_b32_e32 v11, v5 3805; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 3806; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 3807; GFX90A-NEXT: s_waitcnt vmcnt(0) 3808; GFX90A-NEXT: s_setpc_b64 s[30:31] 3809; 3810; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_2_2_2: 3811; GFX940: ; %bb.0: 3812; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3813; GFX940-NEXT: ;;#ASMSTART 3814; GFX940-NEXT: ; def v[0:5] 3815; GFX940-NEXT: ;;#ASMEND 3816; GFX940-NEXT: ;;#ASMSTART 3817; GFX940-NEXT: ; def v[6:11] 3818; GFX940-NEXT: ;;#ASMEND 3819; GFX940-NEXT: v_mov_b32_e32 v12, 0 3820; GFX940-NEXT: v_mov_b32_e32 v2, v4 3821; GFX940-NEXT: v_mov_b32_e32 v3, v5 3822; GFX940-NEXT: v_mov_b32_e32 v10, v4 3823; GFX940-NEXT: v_mov_b32_e32 v11, v5 3824; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 3825; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 3826; GFX940-NEXT: s_waitcnt vmcnt(0) 3827; GFX940-NEXT: s_setpc_b64 s[30:31] 3828 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3829 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3830 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2> 3831 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3832 ret void 3833} 3834 3835define void @v_shuffle_v4i64_v3i64__5_2_2_2(ptr addrspace(1) inreg %ptr) { 3836; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_2_2: 3837; GFX900: ; %bb.0: 3838; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3839; GFX900-NEXT: ;;#ASMSTART 3840; GFX900-NEXT: ; def v[0:5] 3841; GFX900-NEXT: ;;#ASMEND 3842; GFX900-NEXT: v_mov_b32_e32 v12, 0 3843; GFX900-NEXT: v_mov_b32_e32 v2, v4 3844; GFX900-NEXT: v_mov_b32_e32 v3, v5 3845; GFX900-NEXT: ;;#ASMSTART 3846; GFX900-NEXT: ; def v[6:11] 3847; GFX900-NEXT: ;;#ASMEND 3848; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 3849; GFX900-NEXT: s_nop 0 3850; GFX900-NEXT: v_mov_b32_e32 v2, v10 3851; GFX900-NEXT: v_mov_b32_e32 v3, v11 3852; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 3853; GFX900-NEXT: s_waitcnt vmcnt(0) 3854; GFX900-NEXT: s_setpc_b64 s[30:31] 3855; 3856; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_2_2: 3857; GFX90A: ; %bb.0: 3858; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3859; GFX90A-NEXT: ;;#ASMSTART 3860; GFX90A-NEXT: ; def v[0:5] 3861; GFX90A-NEXT: ;;#ASMEND 3862; GFX90A-NEXT: v_mov_b32_e32 v12, 0 3863; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3864; GFX90A-NEXT: v_mov_b32_e32 v3, v5 3865; GFX90A-NEXT: ;;#ASMSTART 3866; GFX90A-NEXT: ; def v[6:11] 3867; GFX90A-NEXT: ;;#ASMEND 3868; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 3869; GFX90A-NEXT: s_nop 0 3870; GFX90A-NEXT: v_mov_b32_e32 v2, v10 3871; GFX90A-NEXT: v_mov_b32_e32 v3, v11 3872; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 3873; GFX90A-NEXT: s_waitcnt vmcnt(0) 3874; GFX90A-NEXT: s_setpc_b64 s[30:31] 3875; 3876; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_2_2: 3877; GFX940: ; %bb.0: 3878; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3879; GFX940-NEXT: ;;#ASMSTART 3880; GFX940-NEXT: ; def v[0:5] 3881; GFX940-NEXT: ;;#ASMEND 3882; GFX940-NEXT: v_mov_b32_e32 v12, 0 3883; GFX940-NEXT: v_mov_b32_e32 v2, v4 3884; GFX940-NEXT: v_mov_b32_e32 v3, v5 3885; GFX940-NEXT: ;;#ASMSTART 3886; GFX940-NEXT: ; def v[6:11] 3887; GFX940-NEXT: ;;#ASMEND 3888; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 3889; GFX940-NEXT: s_nop 1 3890; GFX940-NEXT: v_mov_b32_e32 v2, v10 3891; GFX940-NEXT: v_mov_b32_e32 v3, v11 3892; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 3893; GFX940-NEXT: s_waitcnt vmcnt(0) 3894; GFX940-NEXT: s_setpc_b64 s[30:31] 3895 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3896 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3897 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2> 3898 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3899 ret void 3900} 3901 3902define void @v_shuffle_v4i64_v3i64__5_u_2_2(ptr addrspace(1) inreg %ptr) { 3903; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_2_2: 3904; GFX900: ; %bb.0: 3905; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3906; GFX900-NEXT: ;;#ASMSTART 3907; GFX900-NEXT: ; def v[0:5] 3908; GFX900-NEXT: ;;#ASMEND 3909; GFX900-NEXT: v_mov_b32_e32 v12, 0 3910; GFX900-NEXT: ;;#ASMSTART 3911; GFX900-NEXT: ; def v[6:11] 3912; GFX900-NEXT: ;;#ASMEND 3913; GFX900-NEXT: v_mov_b32_e32 v2, v4 3914; GFX900-NEXT: v_mov_b32_e32 v3, v5 3915; GFX900-NEXT: v_mov_b32_e32 v0, v10 3916; GFX900-NEXT: v_mov_b32_e32 v1, v11 3917; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 3918; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 3919; GFX900-NEXT: s_waitcnt vmcnt(0) 3920; GFX900-NEXT: s_setpc_b64 s[30:31] 3921; 3922; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_2_2: 3923; GFX90A: ; %bb.0: 3924; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3925; GFX90A-NEXT: ;;#ASMSTART 3926; GFX90A-NEXT: ; def v[0:5] 3927; GFX90A-NEXT: ;;#ASMEND 3928; GFX90A-NEXT: v_mov_b32_e32 v12, 0 3929; GFX90A-NEXT: ;;#ASMSTART 3930; GFX90A-NEXT: ; def v[6:11] 3931; GFX90A-NEXT: ;;#ASMEND 3932; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3933; GFX90A-NEXT: v_mov_b32_e32 v3, v5 3934; GFX90A-NEXT: v_mov_b32_e32 v0, v10 3935; GFX90A-NEXT: v_mov_b32_e32 v1, v11 3936; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 3937; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 3938; GFX90A-NEXT: s_waitcnt vmcnt(0) 3939; GFX90A-NEXT: s_setpc_b64 s[30:31] 3940; 3941; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_2_2: 3942; GFX940: ; %bb.0: 3943; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3944; GFX940-NEXT: ;;#ASMSTART 3945; GFX940-NEXT: ; def v[0:5] 3946; GFX940-NEXT: ;;#ASMEND 3947; GFX940-NEXT: v_mov_b32_e32 v12, 0 3948; GFX940-NEXT: ;;#ASMSTART 3949; GFX940-NEXT: ; def v[6:11] 3950; GFX940-NEXT: ;;#ASMEND 3951; GFX940-NEXT: v_mov_b32_e32 v2, v4 3952; GFX940-NEXT: v_mov_b32_e32 v3, v5 3953; GFX940-NEXT: v_mov_b32_e32 v0, v10 3954; GFX940-NEXT: v_mov_b32_e32 v1, v11 3955; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 3956; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 3957; GFX940-NEXT: s_waitcnt vmcnt(0) 3958; GFX940-NEXT: s_setpc_b64 s[30:31] 3959 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3960 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3961 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2> 3962 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3963 ret void 3964} 3965 3966define void @v_shuffle_v4i64_v3i64__5_0_2_2(ptr addrspace(1) inreg %ptr) { 3967; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2: 3968; GFX900: ; %bb.0: 3969; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3970; GFX900-NEXT: ;;#ASMSTART 3971; GFX900-NEXT: ; def v[0:5] 3972; GFX900-NEXT: ;;#ASMEND 3973; GFX900-NEXT: v_mov_b32_e32 v12, 0 3974; GFX900-NEXT: v_mov_b32_e32 v2, v4 3975; GFX900-NEXT: v_mov_b32_e32 v3, v5 3976; GFX900-NEXT: ;;#ASMSTART 3977; GFX900-NEXT: ; def v[6:11] 3978; GFX900-NEXT: ;;#ASMEND 3979; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 3980; GFX900-NEXT: s_nop 0 3981; GFX900-NEXT: v_mov_b32_e32 v2, v10 3982; GFX900-NEXT: v_mov_b32_e32 v3, v11 3983; GFX900-NEXT: v_mov_b32_e32 v4, v0 3984; GFX900-NEXT: v_mov_b32_e32 v5, v1 3985; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 3986; GFX900-NEXT: s_waitcnt vmcnt(0) 3987; GFX900-NEXT: s_setpc_b64 s[30:31] 3988; 3989; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2: 3990; GFX90A: ; %bb.0: 3991; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3992; GFX90A-NEXT: ;;#ASMSTART 3993; GFX90A-NEXT: ; def v[0:5] 3994; GFX90A-NEXT: ;;#ASMEND 3995; GFX90A-NEXT: v_mov_b32_e32 v12, 0 3996; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3997; GFX90A-NEXT: v_mov_b32_e32 v3, v5 3998; GFX90A-NEXT: ;;#ASMSTART 3999; GFX90A-NEXT: ; def v[6:11] 4000; GFX90A-NEXT: ;;#ASMEND 4001; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4002; GFX90A-NEXT: s_nop 0 4003; GFX90A-NEXT: v_mov_b32_e32 v2, v10 4004; GFX90A-NEXT: v_mov_b32_e32 v3, v11 4005; GFX90A-NEXT: v_mov_b32_e32 v4, v0 4006; GFX90A-NEXT: v_mov_b32_e32 v5, v1 4007; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 4008; GFX90A-NEXT: s_waitcnt vmcnt(0) 4009; GFX90A-NEXT: s_setpc_b64 s[30:31] 4010; 4011; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2: 4012; GFX940: ; %bb.0: 4013; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4014; GFX940-NEXT: ;;#ASMSTART 4015; GFX940-NEXT: ; def v[0:5] 4016; GFX940-NEXT: ;;#ASMEND 4017; GFX940-NEXT: v_mov_b32_e32 v12, 0 4018; GFX940-NEXT: v_mov_b32_e32 v2, v4 4019; GFX940-NEXT: v_mov_b32_e32 v3, v5 4020; GFX940-NEXT: ;;#ASMSTART 4021; GFX940-NEXT: ; def v[6:11] 4022; GFX940-NEXT: ;;#ASMEND 4023; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 4024; GFX940-NEXT: s_nop 1 4025; GFX940-NEXT: v_mov_b32_e32 v2, v10 4026; GFX940-NEXT: v_mov_b32_e32 v3, v11 4027; GFX940-NEXT: v_mov_b32_e32 v4, v0 4028; GFX940-NEXT: v_mov_b32_e32 v5, v1 4029; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 4030; GFX940-NEXT: s_waitcnt vmcnt(0) 4031; GFX940-NEXT: s_setpc_b64 s[30:31] 4032 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4033 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4034 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2> 4035 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4036 ret void 4037} 4038 4039define void @v_shuffle_v4i64_v3i64__5_1_2_2(ptr addrspace(1) inreg %ptr) { 4040; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2: 4041; GFX900: ; %bb.0: 4042; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4043; GFX900-NEXT: ;;#ASMSTART 4044; GFX900-NEXT: ; def v[0:5] 4045; GFX900-NEXT: ;;#ASMEND 4046; GFX900-NEXT: ;;#ASMSTART 4047; GFX900-NEXT: ; def v[6:11] 4048; GFX900-NEXT: ;;#ASMEND 4049; GFX900-NEXT: v_mov_b32_e32 v12, 0 4050; GFX900-NEXT: v_mov_b32_e32 v6, v4 4051; GFX900-NEXT: v_mov_b32_e32 v7, v5 4052; GFX900-NEXT: v_mov_b32_e32 v8, v4 4053; GFX900-NEXT: v_mov_b32_e32 v9, v5 4054; GFX900-NEXT: v_mov_b32_e32 v0, v10 4055; GFX900-NEXT: v_mov_b32_e32 v1, v11 4056; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16 4057; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 4058; GFX900-NEXT: s_waitcnt vmcnt(0) 4059; GFX900-NEXT: s_setpc_b64 s[30:31] 4060; 4061; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2: 4062; GFX90A: ; %bb.0: 4063; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4064; GFX90A-NEXT: ;;#ASMSTART 4065; GFX90A-NEXT: ; def v[0:5] 4066; GFX90A-NEXT: ;;#ASMEND 4067; GFX90A-NEXT: ;;#ASMSTART 4068; GFX90A-NEXT: ; def v[6:11] 4069; GFX90A-NEXT: ;;#ASMEND 4070; GFX90A-NEXT: v_mov_b32_e32 v12, 0 4071; GFX90A-NEXT: v_mov_b32_e32 v6, v4 4072; GFX90A-NEXT: v_mov_b32_e32 v7, v5 4073; GFX90A-NEXT: v_mov_b32_e32 v8, v4 4074; GFX90A-NEXT: v_mov_b32_e32 v9, v5 4075; GFX90A-NEXT: v_mov_b32_e32 v0, v10 4076; GFX90A-NEXT: v_mov_b32_e32 v1, v11 4077; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16 4078; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 4079; GFX90A-NEXT: s_waitcnt vmcnt(0) 4080; GFX90A-NEXT: s_setpc_b64 s[30:31] 4081; 4082; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2: 4083; GFX940: ; %bb.0: 4084; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4085; GFX940-NEXT: ;;#ASMSTART 4086; GFX940-NEXT: ; def v[0:5] 4087; GFX940-NEXT: ;;#ASMEND 4088; GFX940-NEXT: ;;#ASMSTART 4089; GFX940-NEXT: ; def v[6:11] 4090; GFX940-NEXT: ;;#ASMEND 4091; GFX940-NEXT: v_mov_b32_e32 v12, 0 4092; GFX940-NEXT: v_mov_b32_e32 v6, v4 4093; GFX940-NEXT: v_mov_b32_e32 v7, v5 4094; GFX940-NEXT: v_mov_b32_e32 v8, v4 4095; GFX940-NEXT: v_mov_b32_e32 v9, v5 4096; GFX940-NEXT: v_mov_b32_e32 v0, v10 4097; GFX940-NEXT: v_mov_b32_e32 v1, v11 4098; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 4099; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 4100; GFX940-NEXT: s_waitcnt vmcnt(0) 4101; GFX940-NEXT: s_setpc_b64 s[30:31] 4102 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4103 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4104 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2> 4105 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4106 ret void 4107} 4108 4109define void @v_shuffle_v4i64_v3i64__5_3_2_2(ptr addrspace(1) inreg %ptr) { 4110; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2: 4111; GFX900: ; %bb.0: 4112; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4113; GFX900-NEXT: ;;#ASMSTART 4114; GFX900-NEXT: ; def v[0:5] 4115; GFX900-NEXT: ;;#ASMEND 4116; GFX900-NEXT: v_mov_b32_e32 v12, 0 4117; GFX900-NEXT: v_mov_b32_e32 v2, v4 4118; GFX900-NEXT: v_mov_b32_e32 v3, v5 4119; GFX900-NEXT: ;;#ASMSTART 4120; GFX900-NEXT: ; def v[6:11] 4121; GFX900-NEXT: ;;#ASMEND 4122; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4123; GFX900-NEXT: v_mov_b32_e32 v0, v10 4124; GFX900-NEXT: v_mov_b32_e32 v1, v11 4125; GFX900-NEXT: v_mov_b32_e32 v2, v6 4126; GFX900-NEXT: v_mov_b32_e32 v3, v7 4127; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 4128; GFX900-NEXT: s_waitcnt vmcnt(0) 4129; GFX900-NEXT: s_setpc_b64 s[30:31] 4130; 4131; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2: 4132; GFX90A: ; %bb.0: 4133; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4134; GFX90A-NEXT: ;;#ASMSTART 4135; GFX90A-NEXT: ; def v[0:5] 4136; GFX90A-NEXT: ;;#ASMEND 4137; GFX90A-NEXT: v_mov_b32_e32 v12, 0 4138; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4139; GFX90A-NEXT: v_mov_b32_e32 v3, v5 4140; GFX90A-NEXT: ;;#ASMSTART 4141; GFX90A-NEXT: ; def v[6:11] 4142; GFX90A-NEXT: ;;#ASMEND 4143; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4144; GFX90A-NEXT: v_mov_b32_e32 v0, v10 4145; GFX90A-NEXT: v_mov_b32_e32 v1, v11 4146; GFX90A-NEXT: v_mov_b32_e32 v2, v6 4147; GFX90A-NEXT: v_mov_b32_e32 v3, v7 4148; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 4149; GFX90A-NEXT: s_waitcnt vmcnt(0) 4150; GFX90A-NEXT: s_setpc_b64 s[30:31] 4151; 4152; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2: 4153; GFX940: ; %bb.0: 4154; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4155; GFX940-NEXT: ;;#ASMSTART 4156; GFX940-NEXT: ; def v[0:5] 4157; GFX940-NEXT: ;;#ASMEND 4158; GFX940-NEXT: v_mov_b32_e32 v12, 0 4159; GFX940-NEXT: v_mov_b32_e32 v2, v4 4160; GFX940-NEXT: v_mov_b32_e32 v3, v5 4161; GFX940-NEXT: ;;#ASMSTART 4162; GFX940-NEXT: ; def v[6:11] 4163; GFX940-NEXT: ;;#ASMEND 4164; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 4165; GFX940-NEXT: v_mov_b32_e32 v0, v10 4166; GFX940-NEXT: v_mov_b32_e32 v1, v11 4167; GFX940-NEXT: v_mov_b32_e32 v2, v6 4168; GFX940-NEXT: v_mov_b32_e32 v3, v7 4169; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 4170; GFX940-NEXT: s_waitcnt vmcnt(0) 4171; GFX940-NEXT: s_setpc_b64 s[30:31] 4172 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4173 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4174 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2> 4175 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4176 ret void 4177} 4178 4179define void @v_shuffle_v4i64_v3i64__5_4_2_2(ptr addrspace(1) inreg %ptr) { 4180; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_2_2: 4181; GFX900: ; %bb.0: 4182; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4183; GFX900-NEXT: ;;#ASMSTART 4184; GFX900-NEXT: ; def v[0:5] 4185; GFX900-NEXT: ;;#ASMEND 4186; GFX900-NEXT: ;;#ASMSTART 4187; GFX900-NEXT: ; def v[6:11] 4188; GFX900-NEXT: ;;#ASMEND 4189; GFX900-NEXT: v_mov_b32_e32 v12, 0 4190; GFX900-NEXT: v_mov_b32_e32 v2, v4 4191; GFX900-NEXT: v_mov_b32_e32 v3, v5 4192; GFX900-NEXT: v_mov_b32_e32 v6, v10 4193; GFX900-NEXT: v_mov_b32_e32 v7, v11 4194; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4195; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] 4196; GFX900-NEXT: s_waitcnt vmcnt(0) 4197; GFX900-NEXT: s_setpc_b64 s[30:31] 4198; 4199; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_2_2: 4200; GFX90A: ; %bb.0: 4201; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4202; GFX90A-NEXT: ;;#ASMSTART 4203; GFX90A-NEXT: ; def v[0:5] 4204; GFX90A-NEXT: ;;#ASMEND 4205; GFX90A-NEXT: ;;#ASMSTART 4206; GFX90A-NEXT: ; def v[6:11] 4207; GFX90A-NEXT: ;;#ASMEND 4208; GFX90A-NEXT: v_mov_b32_e32 v12, 0 4209; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4210; GFX90A-NEXT: v_mov_b32_e32 v3, v5 4211; GFX90A-NEXT: v_mov_b32_e32 v6, v10 4212; GFX90A-NEXT: v_mov_b32_e32 v7, v11 4213; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4214; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] 4215; GFX90A-NEXT: s_waitcnt vmcnt(0) 4216; GFX90A-NEXT: s_setpc_b64 s[30:31] 4217; 4218; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_2_2: 4219; GFX940: ; %bb.0: 4220; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4221; GFX940-NEXT: ;;#ASMSTART 4222; GFX940-NEXT: ; def v[0:5] 4223; GFX940-NEXT: ;;#ASMEND 4224; GFX940-NEXT: ;;#ASMSTART 4225; GFX940-NEXT: ; def v[6:11] 4226; GFX940-NEXT: ;;#ASMEND 4227; GFX940-NEXT: v_mov_b32_e32 v12, 0 4228; GFX940-NEXT: v_mov_b32_e32 v2, v4 4229; GFX940-NEXT: v_mov_b32_e32 v3, v5 4230; GFX940-NEXT: v_mov_b32_e32 v6, v10 4231; GFX940-NEXT: v_mov_b32_e32 v7, v11 4232; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 4233; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 4234; GFX940-NEXT: s_waitcnt vmcnt(0) 4235; GFX940-NEXT: s_setpc_b64 s[30:31] 4236 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4237 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4238 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2> 4239 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4240 ret void 4241} 4242 4243define void @v_shuffle_v4i64_v3i64__5_5_2_2(ptr addrspace(1) inreg %ptr) { 4244; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_2: 4245; GFX900: ; %bb.0: 4246; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4247; GFX900-NEXT: ;;#ASMSTART 4248; GFX900-NEXT: ; def v[0:5] 4249; GFX900-NEXT: ;;#ASMEND 4250; GFX900-NEXT: ;;#ASMSTART 4251; GFX900-NEXT: ; def v[6:11] 4252; GFX900-NEXT: ;;#ASMEND 4253; GFX900-NEXT: v_mov_b32_e32 v12, 0 4254; GFX900-NEXT: v_mov_b32_e32 v2, v4 4255; GFX900-NEXT: v_mov_b32_e32 v3, v5 4256; GFX900-NEXT: v_mov_b32_e32 v8, v10 4257; GFX900-NEXT: v_mov_b32_e32 v9, v11 4258; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4259; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4260; GFX900-NEXT: s_waitcnt vmcnt(0) 4261; GFX900-NEXT: s_setpc_b64 s[30:31] 4262; 4263; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_2: 4264; GFX90A: ; %bb.0: 4265; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4266; GFX90A-NEXT: ;;#ASMSTART 4267; GFX90A-NEXT: ; def v[0:5] 4268; GFX90A-NEXT: ;;#ASMEND 4269; GFX90A-NEXT: ;;#ASMSTART 4270; GFX90A-NEXT: ; def v[6:11] 4271; GFX90A-NEXT: ;;#ASMEND 4272; GFX90A-NEXT: v_mov_b32_e32 v12, 0 4273; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4274; GFX90A-NEXT: v_mov_b32_e32 v3, v5 4275; GFX90A-NEXT: v_mov_b32_e32 v8, v10 4276; GFX90A-NEXT: v_mov_b32_e32 v9, v11 4277; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4278; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4279; GFX90A-NEXT: s_waitcnt vmcnt(0) 4280; GFX90A-NEXT: s_setpc_b64 s[30:31] 4281; 4282; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_2: 4283; GFX940: ; %bb.0: 4284; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4285; GFX940-NEXT: ;;#ASMSTART 4286; GFX940-NEXT: ; def v[0:5] 4287; GFX940-NEXT: ;;#ASMEND 4288; GFX940-NEXT: ;;#ASMSTART 4289; GFX940-NEXT: ; def v[6:11] 4290; GFX940-NEXT: ;;#ASMEND 4291; GFX940-NEXT: v_mov_b32_e32 v12, 0 4292; GFX940-NEXT: v_mov_b32_e32 v2, v4 4293; GFX940-NEXT: v_mov_b32_e32 v3, v5 4294; GFX940-NEXT: v_mov_b32_e32 v8, v10 4295; GFX940-NEXT: v_mov_b32_e32 v9, v11 4296; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 4297; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 4298; GFX940-NEXT: s_waitcnt vmcnt(0) 4299; GFX940-NEXT: s_setpc_b64 s[30:31] 4300 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4301 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4302 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2> 4303 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4304 ret void 4305} 4306 4307define void @v_shuffle_v4i64_v3i64__5_5_u_2(ptr addrspace(1) inreg %ptr) { 4308; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2: 4309; GFX900: ; %bb.0: 4310; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4311; GFX900-NEXT: ;;#ASMSTART 4312; GFX900-NEXT: ; def v[6:11] 4313; GFX900-NEXT: ;;#ASMEND 4314; GFX900-NEXT: v_mov_b32_e32 v12, 0 4315; GFX900-NEXT: v_mov_b32_e32 v8, v10 4316; GFX900-NEXT: v_mov_b32_e32 v9, v11 4317; GFX900-NEXT: ;;#ASMSTART 4318; GFX900-NEXT: ; def v[0:5] 4319; GFX900-NEXT: ;;#ASMEND 4320; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4321; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4322; GFX900-NEXT: s_waitcnt vmcnt(0) 4323; GFX900-NEXT: s_setpc_b64 s[30:31] 4324; 4325; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2: 4326; GFX90A: ; %bb.0: 4327; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4328; GFX90A-NEXT: ;;#ASMSTART 4329; GFX90A-NEXT: ; def v[6:11] 4330; GFX90A-NEXT: ;;#ASMEND 4331; GFX90A-NEXT: v_mov_b32_e32 v12, 0 4332; GFX90A-NEXT: v_mov_b32_e32 v8, v10 4333; GFX90A-NEXT: v_mov_b32_e32 v9, v11 4334; GFX90A-NEXT: ;;#ASMSTART 4335; GFX90A-NEXT: ; def v[0:5] 4336; GFX90A-NEXT: ;;#ASMEND 4337; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4338; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4339; GFX90A-NEXT: s_waitcnt vmcnt(0) 4340; GFX90A-NEXT: s_setpc_b64 s[30:31] 4341; 4342; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2: 4343; GFX940: ; %bb.0: 4344; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4345; GFX940-NEXT: ;;#ASMSTART 4346; GFX940-NEXT: ; def v[6:11] 4347; GFX940-NEXT: ;;#ASMEND 4348; GFX940-NEXT: v_mov_b32_e32 v12, 0 4349; GFX940-NEXT: v_mov_b32_e32 v8, v10 4350; GFX940-NEXT: v_mov_b32_e32 v9, v11 4351; GFX940-NEXT: ;;#ASMSTART 4352; GFX940-NEXT: ; def v[0:5] 4353; GFX940-NEXT: ;;#ASMEND 4354; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 4355; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 4356; GFX940-NEXT: s_waitcnt vmcnt(0) 4357; GFX940-NEXT: s_setpc_b64 s[30:31] 4358 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4359 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4360 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2> 4361 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4362 ret void 4363} 4364 4365define void @v_shuffle_v4i64_v3i64__5_5_0_2(ptr addrspace(1) inreg %ptr) { 4366; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_2: 4367; GFX900: ; %bb.0: 4368; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4369; GFX900-NEXT: ;;#ASMSTART 4370; GFX900-NEXT: ; def v[0:5] 4371; GFX900-NEXT: ;;#ASMEND 4372; GFX900-NEXT: ;;#ASMSTART 4373; GFX900-NEXT: ; def v[6:11] 4374; GFX900-NEXT: ;;#ASMEND 4375; GFX900-NEXT: v_mov_b32_e32 v12, 0 4376; GFX900-NEXT: v_mov_b32_e32 v2, v4 4377; GFX900-NEXT: v_mov_b32_e32 v3, v5 4378; GFX900-NEXT: v_mov_b32_e32 v8, v10 4379; GFX900-NEXT: v_mov_b32_e32 v9, v11 4380; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 4381; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4382; GFX900-NEXT: s_waitcnt vmcnt(0) 4383; GFX900-NEXT: s_setpc_b64 s[30:31] 4384; 4385; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_2: 4386; GFX90A: ; %bb.0: 4387; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4388; GFX90A-NEXT: ;;#ASMSTART 4389; GFX90A-NEXT: ; def v[0:5] 4390; GFX90A-NEXT: ;;#ASMEND 4391; GFX90A-NEXT: ;;#ASMSTART 4392; GFX90A-NEXT: ; def v[6:11] 4393; GFX90A-NEXT: ;;#ASMEND 4394; GFX90A-NEXT: v_mov_b32_e32 v12, 0 4395; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4396; GFX90A-NEXT: v_mov_b32_e32 v3, v5 4397; GFX90A-NEXT: v_mov_b32_e32 v8, v10 4398; GFX90A-NEXT: v_mov_b32_e32 v9, v11 4399; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 4400; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4401; GFX90A-NEXT: s_waitcnt vmcnt(0) 4402; GFX90A-NEXT: s_setpc_b64 s[30:31] 4403; 4404; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_2: 4405; GFX940: ; %bb.0: 4406; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4407; GFX940-NEXT: ;;#ASMSTART 4408; GFX940-NEXT: ; def v[0:5] 4409; GFX940-NEXT: ;;#ASMEND 4410; GFX940-NEXT: ;;#ASMSTART 4411; GFX940-NEXT: ; def v[6:11] 4412; GFX940-NEXT: ;;#ASMEND 4413; GFX940-NEXT: v_mov_b32_e32 v12, 0 4414; GFX940-NEXT: v_mov_b32_e32 v2, v4 4415; GFX940-NEXT: v_mov_b32_e32 v3, v5 4416; GFX940-NEXT: v_mov_b32_e32 v8, v10 4417; GFX940-NEXT: v_mov_b32_e32 v9, v11 4418; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 4419; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 4420; GFX940-NEXT: s_waitcnt vmcnt(0) 4421; GFX940-NEXT: s_setpc_b64 s[30:31] 4422 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4423 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4424 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2> 4425 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4426 ret void 4427} 4428 4429define void @v_shuffle_v4i64_v3i64__5_5_1_2(ptr addrspace(1) inreg %ptr) { 4430; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2: 4431; GFX900: ; %bb.0: 4432; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4433; GFX900-NEXT: ;;#ASMSTART 4434; GFX900-NEXT: ; def v[6:11] 4435; GFX900-NEXT: ;;#ASMEND 4436; GFX900-NEXT: v_mov_b32_e32 v12, 0 4437; GFX900-NEXT: v_mov_b32_e32 v8, v10 4438; GFX900-NEXT: v_mov_b32_e32 v9, v11 4439; GFX900-NEXT: ;;#ASMSTART 4440; GFX900-NEXT: ; def v[0:5] 4441; GFX900-NEXT: ;;#ASMEND 4442; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4443; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4444; GFX900-NEXT: s_waitcnt vmcnt(0) 4445; GFX900-NEXT: s_setpc_b64 s[30:31] 4446; 4447; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2: 4448; GFX90A: ; %bb.0: 4449; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4450; GFX90A-NEXT: ;;#ASMSTART 4451; GFX90A-NEXT: ; def v[6:11] 4452; GFX90A-NEXT: ;;#ASMEND 4453; GFX90A-NEXT: v_mov_b32_e32 v12, 0 4454; GFX90A-NEXT: v_mov_b32_e32 v8, v10 4455; GFX90A-NEXT: v_mov_b32_e32 v9, v11 4456; GFX90A-NEXT: ;;#ASMSTART 4457; GFX90A-NEXT: ; def v[0:5] 4458; GFX90A-NEXT: ;;#ASMEND 4459; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4460; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4461; GFX90A-NEXT: s_waitcnt vmcnt(0) 4462; GFX90A-NEXT: s_setpc_b64 s[30:31] 4463; 4464; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2: 4465; GFX940: ; %bb.0: 4466; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4467; GFX940-NEXT: ;;#ASMSTART 4468; GFX940-NEXT: ; def v[6:11] 4469; GFX940-NEXT: ;;#ASMEND 4470; GFX940-NEXT: v_mov_b32_e32 v12, 0 4471; GFX940-NEXT: v_mov_b32_e32 v8, v10 4472; GFX940-NEXT: v_mov_b32_e32 v9, v11 4473; GFX940-NEXT: ;;#ASMSTART 4474; GFX940-NEXT: ; def v[0:5] 4475; GFX940-NEXT: ;;#ASMEND 4476; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 4477; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 4478; GFX940-NEXT: s_waitcnt vmcnt(0) 4479; GFX940-NEXT: s_setpc_b64 s[30:31] 4480 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4481 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4482 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2> 4483 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4484 ret void 4485} 4486 4487define void @v_shuffle_v4i64_v3i64__5_5_3_2(ptr addrspace(1) inreg %ptr) { 4488; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_2: 4489; GFX900: ; %bb.0: 4490; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4491; GFX900-NEXT: ;;#ASMSTART 4492; GFX900-NEXT: ; def v[6:11] 4493; GFX900-NEXT: ;;#ASMEND 4494; GFX900-NEXT: v_mov_b32_e32 v12, 0 4495; GFX900-NEXT: ;;#ASMSTART 4496; GFX900-NEXT: ; def v[0:5] 4497; GFX900-NEXT: ;;#ASMEND 4498; GFX900-NEXT: v_mov_b32_e32 v8, v4 4499; GFX900-NEXT: v_mov_b32_e32 v9, v5 4500; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16 4501; GFX900-NEXT: s_nop 0 4502; GFX900-NEXT: v_mov_b32_e32 v8, v10 4503; GFX900-NEXT: v_mov_b32_e32 v9, v11 4504; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4505; GFX900-NEXT: s_waitcnt vmcnt(0) 4506; GFX900-NEXT: s_setpc_b64 s[30:31] 4507; 4508; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_2: 4509; GFX90A: ; %bb.0: 4510; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4511; GFX90A-NEXT: ;;#ASMSTART 4512; GFX90A-NEXT: ; def v[6:11] 4513; GFX90A-NEXT: ;;#ASMEND 4514; GFX90A-NEXT: v_mov_b32_e32 v12, 0 4515; GFX90A-NEXT: ;;#ASMSTART 4516; GFX90A-NEXT: ; def v[0:5] 4517; GFX90A-NEXT: ;;#ASMEND 4518; GFX90A-NEXT: v_mov_b32_e32 v8, v4 4519; GFX90A-NEXT: v_mov_b32_e32 v9, v5 4520; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16 4521; GFX90A-NEXT: s_nop 0 4522; GFX90A-NEXT: v_mov_b32_e32 v8, v10 4523; GFX90A-NEXT: v_mov_b32_e32 v9, v11 4524; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4525; GFX90A-NEXT: s_waitcnt vmcnt(0) 4526; GFX90A-NEXT: s_setpc_b64 s[30:31] 4527; 4528; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_2: 4529; GFX940: ; %bb.0: 4530; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4531; GFX940-NEXT: ;;#ASMSTART 4532; GFX940-NEXT: ; def v[6:11] 4533; GFX940-NEXT: ;;#ASMEND 4534; GFX940-NEXT: v_mov_b32_e32 v12, 0 4535; GFX940-NEXT: ;;#ASMSTART 4536; GFX940-NEXT: ; def v[0:5] 4537; GFX940-NEXT: ;;#ASMEND 4538; GFX940-NEXT: s_nop 0 4539; GFX940-NEXT: v_mov_b32_e32 v8, v4 4540; GFX940-NEXT: v_mov_b32_e32 v9, v5 4541; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 4542; GFX940-NEXT: s_nop 1 4543; GFX940-NEXT: v_mov_b32_e32 v8, v10 4544; GFX940-NEXT: v_mov_b32_e32 v9, v11 4545; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 4546; GFX940-NEXT: s_waitcnt vmcnt(0) 4547; GFX940-NEXT: s_setpc_b64 s[30:31] 4548 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4549 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4550 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2> 4551 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4552 ret void 4553} 4554 4555define void @v_shuffle_v4i64_v3i64__5_5_4_2(ptr addrspace(1) inreg %ptr) { 4556; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_2: 4557; GFX900: ; %bb.0: 4558; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4559; GFX900-NEXT: ;;#ASMSTART 4560; GFX900-NEXT: ; def v[0:5] 4561; GFX900-NEXT: ;;#ASMEND 4562; GFX900-NEXT: ;;#ASMSTART 4563; GFX900-NEXT: ; def v[6:11] 4564; GFX900-NEXT: ;;#ASMEND 4565; GFX900-NEXT: v_mov_b32_e32 v12, 0 4566; GFX900-NEXT: v_mov_b32_e32 v2, v8 4567; GFX900-NEXT: v_mov_b32_e32 v3, v9 4568; GFX900-NEXT: v_mov_b32_e32 v8, v10 4569; GFX900-NEXT: v_mov_b32_e32 v9, v11 4570; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4571; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4572; GFX900-NEXT: s_waitcnt vmcnt(0) 4573; GFX900-NEXT: s_setpc_b64 s[30:31] 4574; 4575; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_2: 4576; GFX90A: ; %bb.0: 4577; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4578; GFX90A-NEXT: ;;#ASMSTART 4579; GFX90A-NEXT: ; def v[0:5] 4580; GFX90A-NEXT: ;;#ASMEND 4581; GFX90A-NEXT: ;;#ASMSTART 4582; GFX90A-NEXT: ; def v[6:11] 4583; GFX90A-NEXT: ;;#ASMEND 4584; GFX90A-NEXT: v_mov_b32_e32 v12, 0 4585; GFX90A-NEXT: v_mov_b32_e32 v2, v8 4586; GFX90A-NEXT: v_mov_b32_e32 v3, v9 4587; GFX90A-NEXT: v_mov_b32_e32 v8, v10 4588; GFX90A-NEXT: v_mov_b32_e32 v9, v11 4589; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16 4590; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4591; GFX90A-NEXT: s_waitcnt vmcnt(0) 4592; GFX90A-NEXT: s_setpc_b64 s[30:31] 4593; 4594; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_2: 4595; GFX940: ; %bb.0: 4596; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4597; GFX940-NEXT: ;;#ASMSTART 4598; GFX940-NEXT: ; def v[0:5] 4599; GFX940-NEXT: ;;#ASMEND 4600; GFX940-NEXT: ;;#ASMSTART 4601; GFX940-NEXT: ; def v[6:11] 4602; GFX940-NEXT: ;;#ASMEND 4603; GFX940-NEXT: v_mov_b32_e32 v12, 0 4604; GFX940-NEXT: v_mov_b32_e32 v2, v8 4605; GFX940-NEXT: v_mov_b32_e32 v3, v9 4606; GFX940-NEXT: v_mov_b32_e32 v8, v10 4607; GFX940-NEXT: v_mov_b32_e32 v9, v11 4608; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1 4609; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 4610; GFX940-NEXT: s_waitcnt vmcnt(0) 4611; GFX940-NEXT: s_setpc_b64 s[30:31] 4612 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4613 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4614 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2> 4615 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4616 ret void 4617} 4618 4619define void @v_shuffle_v4i64_v3i64__u_3_3_3(ptr addrspace(1) inreg %ptr) { 4620; GFX9-LABEL: v_shuffle_v4i64_v3i64__u_3_3_3: 4621; GFX9: ; %bb.0: 4622; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4623; GFX9-NEXT: s_setpc_b64 s[30:31] 4624 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4625 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3> 4626 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4627 ret void 4628} 4629 4630define void @v_shuffle_v4i64_v3i64__0_3_3_3(ptr addrspace(1) inreg %ptr) { 4631; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3: 4632; GFX900: ; %bb.0: 4633; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4634; GFX900-NEXT: v_mov_b32_e32 v6, 0 4635; GFX900-NEXT: ;;#ASMSTART 4636; GFX900-NEXT: ; def v[0:5] 4637; GFX900-NEXT: ;;#ASMEND 4638; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16 4639; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4640; GFX900-NEXT: s_waitcnt vmcnt(0) 4641; GFX900-NEXT: s_setpc_b64 s[30:31] 4642; 4643; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3: 4644; GFX90A: ; %bb.0: 4645; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4646; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4647; GFX90A-NEXT: ;;#ASMSTART 4648; GFX90A-NEXT: ; def v[0:5] 4649; GFX90A-NEXT: ;;#ASMEND 4650; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17] offset:16 4651; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4652; GFX90A-NEXT: s_waitcnt vmcnt(0) 4653; GFX90A-NEXT: s_setpc_b64 s[30:31] 4654; 4655; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3: 4656; GFX940: ; %bb.0: 4657; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4658; GFX940-NEXT: v_mov_b32_e32 v6, 0 4659; GFX940-NEXT: ;;#ASMSTART 4660; GFX940-NEXT: ; def v[0:5] 4661; GFX940-NEXT: ;;#ASMEND 4662; GFX940-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 sc0 sc1 4663; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 4664; GFX940-NEXT: s_waitcnt vmcnt(0) 4665; GFX940-NEXT: s_setpc_b64 s[30:31] 4666 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4667 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3> 4668 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4669 ret void 4670} 4671 4672define void @v_shuffle_v4i64_v3i64__1_3_3_3(ptr addrspace(1) inreg %ptr) { 4673; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3: 4674; GFX900: ; %bb.0: 4675; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4676; GFX900-NEXT: v_mov_b32_e32 v6, 0 4677; GFX900-NEXT: ;;#ASMSTART 4678; GFX900-NEXT: ; def v[0:5] 4679; GFX900-NEXT: ;;#ASMEND 4680; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 4681; GFX900-NEXT: s_waitcnt vmcnt(0) 4682; GFX900-NEXT: s_setpc_b64 s[30:31] 4683; 4684; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3: 4685; GFX90A: ; %bb.0: 4686; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4687; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4688; GFX90A-NEXT: ;;#ASMSTART 4689; GFX90A-NEXT: ; def v[0:5] 4690; GFX90A-NEXT: ;;#ASMEND 4691; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 4692; GFX90A-NEXT: s_waitcnt vmcnt(0) 4693; GFX90A-NEXT: s_setpc_b64 s[30:31] 4694; 4695; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3: 4696; GFX940: ; %bb.0: 4697; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4698; GFX940-NEXT: v_mov_b32_e32 v6, 0 4699; GFX940-NEXT: ;;#ASMSTART 4700; GFX940-NEXT: ; def v[0:5] 4701; GFX940-NEXT: ;;#ASMEND 4702; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 4703; GFX940-NEXT: s_waitcnt vmcnt(0) 4704; GFX940-NEXT: s_setpc_b64 s[30:31] 4705 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4706 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3> 4707 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4708 ret void 4709} 4710 4711define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) { 4712; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3: 4713; GFX900: ; %bb.0: 4714; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4715; GFX900-NEXT: ;;#ASMSTART 4716; GFX900-NEXT: ; def v[0:5] 4717; GFX900-NEXT: ;;#ASMEND 4718; GFX900-NEXT: v_mov_b32_e32 v6, 0 4719; GFX900-NEXT: v_mov_b32_e32 v0, v4 4720; GFX900-NEXT: v_mov_b32_e32 v1, v5 4721; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4722; GFX900-NEXT: s_waitcnt vmcnt(0) 4723; GFX900-NEXT: s_setpc_b64 s[30:31] 4724; 4725; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3: 4726; GFX90A: ; %bb.0: 4727; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4728; GFX90A-NEXT: ;;#ASMSTART 4729; GFX90A-NEXT: ; def v[0:5] 4730; GFX90A-NEXT: ;;#ASMEND 4731; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4732; GFX90A-NEXT: v_mov_b32_e32 v0, v4 4733; GFX90A-NEXT: v_mov_b32_e32 v1, v5 4734; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4735; GFX90A-NEXT: s_waitcnt vmcnt(0) 4736; GFX90A-NEXT: s_setpc_b64 s[30:31] 4737; 4738; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3: 4739; GFX940: ; %bb.0: 4740; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4741; GFX940-NEXT: ;;#ASMSTART 4742; GFX940-NEXT: ; def v[0:5] 4743; GFX940-NEXT: ;;#ASMEND 4744; GFX940-NEXT: v_mov_b32_e32 v6, 0 4745; GFX940-NEXT: v_mov_b32_e32 v0, v4 4746; GFX940-NEXT: v_mov_b32_e32 v1, v5 4747; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 4748; GFX940-NEXT: s_waitcnt vmcnt(0) 4749; GFX940-NEXT: s_setpc_b64 s[30:31] 4750 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4751 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3> 4752 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4753 ret void 4754} 4755 4756define void @v_shuffle_v4i64_v3i64__3_3_3_3(ptr addrspace(1) inreg %ptr) { 4757; GFX9-LABEL: v_shuffle_v4i64_v3i64__3_3_3_3: 4758; GFX9: ; %bb.0: 4759; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4760; GFX9-NEXT: s_setpc_b64 s[30:31] 4761 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4762 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 4763 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4764 ret void 4765} 4766 4767define void @v_shuffle_v4i64_v3i64__4_3_3_3(ptr addrspace(1) inreg %ptr) { 4768; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_3_3_3: 4769; GFX900: ; %bb.0: 4770; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4771; GFX900-NEXT: ;;#ASMSTART 4772; GFX900-NEXT: ; def v[0:5] 4773; GFX900-NEXT: ;;#ASMEND 4774; GFX900-NEXT: v_mov_b32_e32 v8, 0 4775; GFX900-NEXT: v_mov_b32_e32 v4, v0 4776; GFX900-NEXT: v_mov_b32_e32 v5, v1 4777; GFX900-NEXT: v_mov_b32_e32 v6, v0 4778; GFX900-NEXT: v_mov_b32_e32 v7, v1 4779; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16 4780; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 4781; GFX900-NEXT: s_waitcnt vmcnt(0) 4782; GFX900-NEXT: s_setpc_b64 s[30:31] 4783; 4784; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_3_3_3: 4785; GFX90A: ; %bb.0: 4786; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4787; GFX90A-NEXT: ;;#ASMSTART 4788; GFX90A-NEXT: ; def v[0:5] 4789; GFX90A-NEXT: ;;#ASMEND 4790; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4791; GFX90A-NEXT: v_mov_b32_e32 v4, v0 4792; GFX90A-NEXT: v_mov_b32_e32 v5, v1 4793; GFX90A-NEXT: v_mov_b32_e32 v6, v0 4794; GFX90A-NEXT: v_mov_b32_e32 v7, v1 4795; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16 4796; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 4797; GFX90A-NEXT: s_waitcnt vmcnt(0) 4798; GFX90A-NEXT: s_setpc_b64 s[30:31] 4799; 4800; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_3_3_3: 4801; GFX940: ; %bb.0: 4802; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4803; GFX940-NEXT: ;;#ASMSTART 4804; GFX940-NEXT: ; def v[0:5] 4805; GFX940-NEXT: ;;#ASMEND 4806; GFX940-NEXT: v_mov_b32_e32 v8, 0 4807; GFX940-NEXT: v_mov_b32_e32 v4, v0 4808; GFX940-NEXT: v_mov_b32_e32 v5, v1 4809; GFX940-NEXT: v_mov_b32_e32 v6, v0 4810; GFX940-NEXT: v_mov_b32_e32 v7, v1 4811; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 4812; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 4813; GFX940-NEXT: s_waitcnt vmcnt(0) 4814; GFX940-NEXT: s_setpc_b64 s[30:31] 4815 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4816 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4817 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3> 4818 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4819 ret void 4820} 4821 4822define void @v_shuffle_v4i64_v3i64__5_3_3_3(ptr addrspace(1) inreg %ptr) { 4823; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_3_3: 4824; GFX900: ; %bb.0: 4825; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4826; GFX900-NEXT: ;;#ASMSTART 4827; GFX900-NEXT: ; def v[0:5] 4828; GFX900-NEXT: ;;#ASMEND 4829; GFX900-NEXT: v_mov_b32_e32 v6, 0 4830; GFX900-NEXT: v_mov_b32_e32 v2, v0 4831; GFX900-NEXT: v_mov_b32_e32 v3, v1 4832; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 4833; GFX900-NEXT: s_nop 0 4834; GFX900-NEXT: v_mov_b32_e32 v2, v4 4835; GFX900-NEXT: v_mov_b32_e32 v3, v5 4836; GFX900-NEXT: v_mov_b32_e32 v4, v0 4837; GFX900-NEXT: v_mov_b32_e32 v5, v1 4838; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 4839; GFX900-NEXT: s_waitcnt vmcnt(0) 4840; GFX900-NEXT: s_setpc_b64 s[30:31] 4841; 4842; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_3_3: 4843; GFX90A: ; %bb.0: 4844; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4845; GFX90A-NEXT: ;;#ASMSTART 4846; GFX90A-NEXT: ; def v[0:5] 4847; GFX90A-NEXT: ;;#ASMEND 4848; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4849; GFX90A-NEXT: v_mov_b32_e32 v2, v0 4850; GFX90A-NEXT: v_mov_b32_e32 v3, v1 4851; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 4852; GFX90A-NEXT: s_nop 0 4853; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4854; GFX90A-NEXT: v_mov_b32_e32 v3, v5 4855; GFX90A-NEXT: v_mov_b32_e32 v4, v0 4856; GFX90A-NEXT: v_mov_b32_e32 v5, v1 4857; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 4858; GFX90A-NEXT: s_waitcnt vmcnt(0) 4859; GFX90A-NEXT: s_setpc_b64 s[30:31] 4860; 4861; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_3_3: 4862; GFX940: ; %bb.0: 4863; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4864; GFX940-NEXT: ;;#ASMSTART 4865; GFX940-NEXT: ; def v[0:5] 4866; GFX940-NEXT: ;;#ASMEND 4867; GFX940-NEXT: v_mov_b32_e32 v6, 0 4868; GFX940-NEXT: v_mov_b32_e32 v2, v0 4869; GFX940-NEXT: v_mov_b32_e32 v3, v1 4870; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 4871; GFX940-NEXT: s_nop 1 4872; GFX940-NEXT: v_mov_b32_e32 v2, v4 4873; GFX940-NEXT: v_mov_b32_e32 v3, v5 4874; GFX940-NEXT: v_mov_b32_e32 v4, v0 4875; GFX940-NEXT: v_mov_b32_e32 v5, v1 4876; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 4877; GFX940-NEXT: s_waitcnt vmcnt(0) 4878; GFX940-NEXT: s_setpc_b64 s[30:31] 4879 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4880 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4881 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3> 4882 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4883 ret void 4884} 4885 4886define void @v_shuffle_v4i64_v3i64__5_u_3_3(ptr addrspace(1) inreg %ptr) { 4887; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_3_3: 4888; GFX900: ; %bb.0: 4889; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4890; GFX900-NEXT: ;;#ASMSTART 4891; GFX900-NEXT: ; def v[0:5] 4892; GFX900-NEXT: ;;#ASMEND 4893; GFX900-NEXT: v_mov_b32_e32 v6, 0 4894; GFX900-NEXT: v_mov_b32_e32 v2, v0 4895; GFX900-NEXT: v_mov_b32_e32 v3, v1 4896; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 4897; GFX900-NEXT: s_nop 0 4898; GFX900-NEXT: v_mov_b32_e32 v0, v4 4899; GFX900-NEXT: v_mov_b32_e32 v1, v5 4900; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4901; GFX900-NEXT: s_waitcnt vmcnt(0) 4902; GFX900-NEXT: s_setpc_b64 s[30:31] 4903; 4904; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_3_3: 4905; GFX90A: ; %bb.0: 4906; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4907; GFX90A-NEXT: ;;#ASMSTART 4908; GFX90A-NEXT: ; def v[0:5] 4909; GFX90A-NEXT: ;;#ASMEND 4910; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4911; GFX90A-NEXT: v_mov_b32_e32 v2, v0 4912; GFX90A-NEXT: v_mov_b32_e32 v3, v1 4913; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 4914; GFX90A-NEXT: s_nop 0 4915; GFX90A-NEXT: v_mov_b32_e32 v0, v4 4916; GFX90A-NEXT: v_mov_b32_e32 v1, v5 4917; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4918; GFX90A-NEXT: s_waitcnt vmcnt(0) 4919; GFX90A-NEXT: s_setpc_b64 s[30:31] 4920; 4921; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_3_3: 4922; GFX940: ; %bb.0: 4923; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4924; GFX940-NEXT: ;;#ASMSTART 4925; GFX940-NEXT: ; def v[0:5] 4926; GFX940-NEXT: ;;#ASMEND 4927; GFX940-NEXT: v_mov_b32_e32 v6, 0 4928; GFX940-NEXT: v_mov_b32_e32 v2, v0 4929; GFX940-NEXT: v_mov_b32_e32 v3, v1 4930; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 4931; GFX940-NEXT: s_nop 1 4932; GFX940-NEXT: v_mov_b32_e32 v0, v4 4933; GFX940-NEXT: v_mov_b32_e32 v1, v5 4934; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 4935; GFX940-NEXT: s_waitcnt vmcnt(0) 4936; GFX940-NEXT: s_setpc_b64 s[30:31] 4937 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4938 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4939 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3> 4940 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4941 ret void 4942} 4943 4944define void @v_shuffle_v4i64_v3i64__5_0_3_3(ptr addrspace(1) inreg %ptr) { 4945; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_3_3: 4946; GFX900: ; %bb.0: 4947; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4948; GFX900-NEXT: ;;#ASMSTART 4949; GFX900-NEXT: ; def v[0:5] 4950; GFX900-NEXT: ;;#ASMEND 4951; GFX900-NEXT: ;;#ASMSTART 4952; GFX900-NEXT: ; def v[2:7] 4953; GFX900-NEXT: ;;#ASMEND 4954; GFX900-NEXT: v_mov_b32_e32 v8, 0 4955; GFX900-NEXT: v_mov_b32_e32 v4, v2 4956; GFX900-NEXT: v_mov_b32_e32 v5, v3 4957; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16 4958; GFX900-NEXT: s_nop 0 4959; GFX900-NEXT: v_mov_b32_e32 v2, v6 4960; GFX900-NEXT: v_mov_b32_e32 v3, v7 4961; GFX900-NEXT: v_mov_b32_e32 v4, v0 4962; GFX900-NEXT: v_mov_b32_e32 v5, v1 4963; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 4964; GFX900-NEXT: s_waitcnt vmcnt(0) 4965; GFX900-NEXT: s_setpc_b64 s[30:31] 4966; 4967; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_3_3: 4968; GFX90A: ; %bb.0: 4969; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4970; GFX90A-NEXT: ;;#ASMSTART 4971; GFX90A-NEXT: ; def v[0:5] 4972; GFX90A-NEXT: ;;#ASMEND 4973; GFX90A-NEXT: ;;#ASMSTART 4974; GFX90A-NEXT: ; def v[2:7] 4975; GFX90A-NEXT: ;;#ASMEND 4976; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4977; GFX90A-NEXT: v_mov_b32_e32 v4, v2 4978; GFX90A-NEXT: v_mov_b32_e32 v5, v3 4979; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16 4980; GFX90A-NEXT: s_nop 0 4981; GFX90A-NEXT: v_mov_b32_e32 v2, v6 4982; GFX90A-NEXT: v_mov_b32_e32 v3, v7 4983; GFX90A-NEXT: v_mov_b32_e32 v4, v0 4984; GFX90A-NEXT: v_mov_b32_e32 v5, v1 4985; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 4986; GFX90A-NEXT: s_waitcnt vmcnt(0) 4987; GFX90A-NEXT: s_setpc_b64 s[30:31] 4988; 4989; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_3_3: 4990; GFX940: ; %bb.0: 4991; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4992; GFX940-NEXT: ;;#ASMSTART 4993; GFX940-NEXT: ; def v[0:5] 4994; GFX940-NEXT: ;;#ASMEND 4995; GFX940-NEXT: v_mov_b32_e32 v8, 0 4996; GFX940-NEXT: ;;#ASMSTART 4997; GFX940-NEXT: ; def v[2:7] 4998; GFX940-NEXT: ;;#ASMEND 4999; GFX940-NEXT: s_nop 0 5000; GFX940-NEXT: v_mov_b32_e32 v4, v2 5001; GFX940-NEXT: v_mov_b32_e32 v5, v3 5002; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 5003; GFX940-NEXT: s_nop 1 5004; GFX940-NEXT: v_mov_b32_e32 v2, v6 5005; GFX940-NEXT: v_mov_b32_e32 v3, v7 5006; GFX940-NEXT: v_mov_b32_e32 v4, v0 5007; GFX940-NEXT: v_mov_b32_e32 v5, v1 5008; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 5009; GFX940-NEXT: s_waitcnt vmcnt(0) 5010; GFX940-NEXT: s_setpc_b64 s[30:31] 5011 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5012 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5013 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3> 5014 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5015 ret void 5016} 5017 5018define void @v_shuffle_v4i64_v3i64__5_1_3_3(ptr addrspace(1) inreg %ptr) { 5019; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_3_3: 5020; GFX900: ; %bb.0: 5021; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5022; GFX900-NEXT: ;;#ASMSTART 5023; GFX900-NEXT: ; def v[0:5] 5024; GFX900-NEXT: ;;#ASMEND 5025; GFX900-NEXT: ;;#ASMSTART 5026; GFX900-NEXT: ; def v[4:9] 5027; GFX900-NEXT: ;;#ASMEND 5028; GFX900-NEXT: v_mov_b32_e32 v10, 0 5029; GFX900-NEXT: v_mov_b32_e32 v6, v4 5030; GFX900-NEXT: v_mov_b32_e32 v7, v5 5031; GFX900-NEXT: v_mov_b32_e32 v0, v8 5032; GFX900-NEXT: v_mov_b32_e32 v1, v9 5033; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16 5034; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 5035; GFX900-NEXT: s_waitcnt vmcnt(0) 5036; GFX900-NEXT: s_setpc_b64 s[30:31] 5037; 5038; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_3_3: 5039; GFX90A: ; %bb.0: 5040; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5041; GFX90A-NEXT: ;;#ASMSTART 5042; GFX90A-NEXT: ; def v[0:5] 5043; GFX90A-NEXT: ;;#ASMEND 5044; GFX90A-NEXT: ;;#ASMSTART 5045; GFX90A-NEXT: ; def v[4:9] 5046; GFX90A-NEXT: ;;#ASMEND 5047; GFX90A-NEXT: v_mov_b32_e32 v10, 0 5048; GFX90A-NEXT: v_mov_b32_e32 v6, v4 5049; GFX90A-NEXT: v_mov_b32_e32 v7, v5 5050; GFX90A-NEXT: v_mov_b32_e32 v0, v8 5051; GFX90A-NEXT: v_mov_b32_e32 v1, v9 5052; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16 5053; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 5054; GFX90A-NEXT: s_waitcnt vmcnt(0) 5055; GFX90A-NEXT: s_setpc_b64 s[30:31] 5056; 5057; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_3_3: 5058; GFX940: ; %bb.0: 5059; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5060; GFX940-NEXT: ;;#ASMSTART 5061; GFX940-NEXT: ; def v[0:5] 5062; GFX940-NEXT: ;;#ASMEND 5063; GFX940-NEXT: v_mov_b32_e32 v10, 0 5064; GFX940-NEXT: ;;#ASMSTART 5065; GFX940-NEXT: ; def v[4:9] 5066; GFX940-NEXT: ;;#ASMEND 5067; GFX940-NEXT: s_nop 0 5068; GFX940-NEXT: v_mov_b32_e32 v6, v4 5069; GFX940-NEXT: v_mov_b32_e32 v7, v5 5070; GFX940-NEXT: v_mov_b32_e32 v0, v8 5071; GFX940-NEXT: v_mov_b32_e32 v1, v9 5072; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 5073; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 5074; GFX940-NEXT: s_waitcnt vmcnt(0) 5075; GFX940-NEXT: s_setpc_b64 s[30:31] 5076 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5077 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5078 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3> 5079 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5080 ret void 5081} 5082 5083define void @v_shuffle_v4i64_v3i64__5_2_3_3(ptr addrspace(1) inreg %ptr) { 5084; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_3_3: 5085; GFX900: ; %bb.0: 5086; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5087; GFX900-NEXT: ;;#ASMSTART 5088; GFX900-NEXT: ; def v[0:5] 5089; GFX900-NEXT: ;;#ASMEND 5090; GFX900-NEXT: ;;#ASMSTART 5091; GFX900-NEXT: ; def v[6:11] 5092; GFX900-NEXT: ;;#ASMEND 5093; GFX900-NEXT: v_mov_b32_e32 v12, 0 5094; GFX900-NEXT: v_mov_b32_e32 v8, v6 5095; GFX900-NEXT: v_mov_b32_e32 v9, v7 5096; GFX900-NEXT: v_mov_b32_e32 v2, v10 5097; GFX900-NEXT: v_mov_b32_e32 v3, v11 5098; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16 5099; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 5100; GFX900-NEXT: s_waitcnt vmcnt(0) 5101; GFX900-NEXT: s_setpc_b64 s[30:31] 5102; 5103; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_3_3: 5104; GFX90A: ; %bb.0: 5105; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5106; GFX90A-NEXT: ;;#ASMSTART 5107; GFX90A-NEXT: ; def v[0:5] 5108; GFX90A-NEXT: ;;#ASMEND 5109; GFX90A-NEXT: ;;#ASMSTART 5110; GFX90A-NEXT: ; def v[6:11] 5111; GFX90A-NEXT: ;;#ASMEND 5112; GFX90A-NEXT: v_mov_b32_e32 v12, 0 5113; GFX90A-NEXT: v_mov_b32_e32 v8, v6 5114; GFX90A-NEXT: v_mov_b32_e32 v9, v7 5115; GFX90A-NEXT: v_mov_b32_e32 v2, v10 5116; GFX90A-NEXT: v_mov_b32_e32 v3, v11 5117; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16 5118; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 5119; GFX90A-NEXT: s_waitcnt vmcnt(0) 5120; GFX90A-NEXT: s_setpc_b64 s[30:31] 5121; 5122; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_3_3: 5123; GFX940: ; %bb.0: 5124; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5125; GFX940-NEXT: ;;#ASMSTART 5126; GFX940-NEXT: ; def v[0:5] 5127; GFX940-NEXT: ;;#ASMEND 5128; GFX940-NEXT: ;;#ASMSTART 5129; GFX940-NEXT: ; def v[6:11] 5130; GFX940-NEXT: ;;#ASMEND 5131; GFX940-NEXT: v_mov_b32_e32 v12, 0 5132; GFX940-NEXT: v_mov_b32_e32 v8, v6 5133; GFX940-NEXT: v_mov_b32_e32 v9, v7 5134; GFX940-NEXT: v_mov_b32_e32 v2, v10 5135; GFX940-NEXT: v_mov_b32_e32 v3, v11 5136; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 5137; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 5138; GFX940-NEXT: s_waitcnt vmcnt(0) 5139; GFX940-NEXT: s_setpc_b64 s[30:31] 5140 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5141 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5142 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3> 5143 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5144 ret void 5145} 5146 5147define void @v_shuffle_v4i64_v3i64__5_4_3_3(ptr addrspace(1) inreg %ptr) { 5148; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3: 5149; GFX900: ; %bb.0: 5150; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5151; GFX900-NEXT: ;;#ASMSTART 5152; GFX900-NEXT: ; def v[0:5] 5153; GFX900-NEXT: ;;#ASMEND 5154; GFX900-NEXT: v_mov_b32_e32 v10, 0 5155; GFX900-NEXT: v_mov_b32_e32 v6, v0 5156; GFX900-NEXT: v_mov_b32_e32 v7, v1 5157; GFX900-NEXT: v_mov_b32_e32 v8, v0 5158; GFX900-NEXT: v_mov_b32_e32 v9, v1 5159; GFX900-NEXT: v_mov_b32_e32 v0, v4 5160; GFX900-NEXT: v_mov_b32_e32 v1, v5 5161; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 5162; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 5163; GFX900-NEXT: s_waitcnt vmcnt(0) 5164; GFX900-NEXT: s_setpc_b64 s[30:31] 5165; 5166; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3: 5167; GFX90A: ; %bb.0: 5168; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5169; GFX90A-NEXT: ;;#ASMSTART 5170; GFX90A-NEXT: ; def v[0:5] 5171; GFX90A-NEXT: ;;#ASMEND 5172; GFX90A-NEXT: v_mov_b32_e32 v10, 0 5173; GFX90A-NEXT: v_mov_b32_e32 v6, v0 5174; GFX90A-NEXT: v_mov_b32_e32 v7, v1 5175; GFX90A-NEXT: v_mov_b32_e32 v8, v0 5176; GFX90A-NEXT: v_mov_b32_e32 v9, v1 5177; GFX90A-NEXT: v_mov_b32_e32 v0, v4 5178; GFX90A-NEXT: v_mov_b32_e32 v1, v5 5179; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 5180; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 5181; GFX90A-NEXT: s_waitcnt vmcnt(0) 5182; GFX90A-NEXT: s_setpc_b64 s[30:31] 5183; 5184; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3: 5185; GFX940: ; %bb.0: 5186; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5187; GFX940-NEXT: ;;#ASMSTART 5188; GFX940-NEXT: ; def v[0:5] 5189; GFX940-NEXT: ;;#ASMEND 5190; GFX940-NEXT: v_mov_b32_e32 v10, 0 5191; GFX940-NEXT: v_mov_b32_e32 v6, v0 5192; GFX940-NEXT: v_mov_b32_e32 v7, v1 5193; GFX940-NEXT: v_mov_b32_e32 v8, v0 5194; GFX940-NEXT: v_mov_b32_e32 v9, v1 5195; GFX940-NEXT: v_mov_b32_e32 v0, v4 5196; GFX940-NEXT: v_mov_b32_e32 v1, v5 5197; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 5198; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 5199; GFX940-NEXT: s_waitcnt vmcnt(0) 5200; GFX940-NEXT: s_setpc_b64 s[30:31] 5201 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5202 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5203 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3> 5204 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5205 ret void 5206} 5207 5208define void @v_shuffle_v4i64_v3i64__5_5_3_3(ptr addrspace(1) inreg %ptr) { 5209; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_3: 5210; GFX900: ; %bb.0: 5211; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5212; GFX900-NEXT: ;;#ASMSTART 5213; GFX900-NEXT: ; def v[0:5] 5214; GFX900-NEXT: ;;#ASMEND 5215; GFX900-NEXT: v_mov_b32_e32 v6, 0 5216; GFX900-NEXT: v_mov_b32_e32 v2, v0 5217; GFX900-NEXT: v_mov_b32_e32 v3, v1 5218; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 5219; GFX900-NEXT: s_nop 0 5220; GFX900-NEXT: v_mov_b32_e32 v2, v4 5221; GFX900-NEXT: v_mov_b32_e32 v3, v5 5222; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 5223; GFX900-NEXT: s_waitcnt vmcnt(0) 5224; GFX900-NEXT: s_setpc_b64 s[30:31] 5225; 5226; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_3: 5227; GFX90A: ; %bb.0: 5228; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5229; GFX90A-NEXT: ;;#ASMSTART 5230; GFX90A-NEXT: ; def v[0:5] 5231; GFX90A-NEXT: ;;#ASMEND 5232; GFX90A-NEXT: v_mov_b32_e32 v6, 0 5233; GFX90A-NEXT: v_mov_b32_e32 v2, v0 5234; GFX90A-NEXT: v_mov_b32_e32 v3, v1 5235; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 5236; GFX90A-NEXT: s_nop 0 5237; GFX90A-NEXT: v_mov_b32_e32 v2, v4 5238; GFX90A-NEXT: v_mov_b32_e32 v3, v5 5239; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 5240; GFX90A-NEXT: s_waitcnt vmcnt(0) 5241; GFX90A-NEXT: s_setpc_b64 s[30:31] 5242; 5243; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_3: 5244; GFX940: ; %bb.0: 5245; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5246; GFX940-NEXT: ;;#ASMSTART 5247; GFX940-NEXT: ; def v[0:5] 5248; GFX940-NEXT: ;;#ASMEND 5249; GFX940-NEXT: v_mov_b32_e32 v6, 0 5250; GFX940-NEXT: v_mov_b32_e32 v2, v0 5251; GFX940-NEXT: v_mov_b32_e32 v3, v1 5252; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 5253; GFX940-NEXT: s_nop 1 5254; GFX940-NEXT: v_mov_b32_e32 v2, v4 5255; GFX940-NEXT: v_mov_b32_e32 v3, v5 5256; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 5257; GFX940-NEXT: s_waitcnt vmcnt(0) 5258; GFX940-NEXT: s_setpc_b64 s[30:31] 5259 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5260 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5261 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3> 5262 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5263 ret void 5264} 5265 5266define void @v_shuffle_v4i64_v3i64__5_5_u_3(ptr addrspace(1) inreg %ptr) { 5267; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3: 5268; GFX900: ; %bb.0: 5269; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5270; GFX900-NEXT: ;;#ASMSTART 5271; GFX900-NEXT: ; def v[0:5] 5272; GFX900-NEXT: ;;#ASMEND 5273; GFX900-NEXT: v_mov_b32_e32 v6, 0 5274; GFX900-NEXT: v_mov_b32_e32 v2, v0 5275; GFX900-NEXT: v_mov_b32_e32 v3, v1 5276; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 5277; GFX900-NEXT: s_nop 0 5278; GFX900-NEXT: v_mov_b32_e32 v2, v4 5279; GFX900-NEXT: v_mov_b32_e32 v3, v5 5280; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 5281; GFX900-NEXT: s_waitcnt vmcnt(0) 5282; GFX900-NEXT: s_setpc_b64 s[30:31] 5283; 5284; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3: 5285; GFX90A: ; %bb.0: 5286; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5287; GFX90A-NEXT: ;;#ASMSTART 5288; GFX90A-NEXT: ; def v[0:5] 5289; GFX90A-NEXT: ;;#ASMEND 5290; GFX90A-NEXT: v_mov_b32_e32 v6, 0 5291; GFX90A-NEXT: v_mov_b32_e32 v2, v0 5292; GFX90A-NEXT: v_mov_b32_e32 v3, v1 5293; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 5294; GFX90A-NEXT: s_nop 0 5295; GFX90A-NEXT: v_mov_b32_e32 v2, v4 5296; GFX90A-NEXT: v_mov_b32_e32 v3, v5 5297; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 5298; GFX90A-NEXT: s_waitcnt vmcnt(0) 5299; GFX90A-NEXT: s_setpc_b64 s[30:31] 5300; 5301; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3: 5302; GFX940: ; %bb.0: 5303; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5304; GFX940-NEXT: ;;#ASMSTART 5305; GFX940-NEXT: ; def v[0:5] 5306; GFX940-NEXT: ;;#ASMEND 5307; GFX940-NEXT: v_mov_b32_e32 v6, 0 5308; GFX940-NEXT: v_mov_b32_e32 v2, v0 5309; GFX940-NEXT: v_mov_b32_e32 v3, v1 5310; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 5311; GFX940-NEXT: s_nop 1 5312; GFX940-NEXT: v_mov_b32_e32 v2, v4 5313; GFX940-NEXT: v_mov_b32_e32 v3, v5 5314; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 5315; GFX940-NEXT: s_waitcnt vmcnt(0) 5316; GFX940-NEXT: s_setpc_b64 s[30:31] 5317 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5318 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5319 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3> 5320 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5321 ret void 5322} 5323 5324define void @v_shuffle_v4i64_v3i64__5_5_0_3(ptr addrspace(1) inreg %ptr) { 5325; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_3: 5326; GFX900: ; %bb.0: 5327; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5328; GFX900-NEXT: ;;#ASMSTART 5329; GFX900-NEXT: ; def v[0:5] 5330; GFX900-NEXT: ;;#ASMEND 5331; GFX900-NEXT: ;;#ASMSTART 5332; GFX900-NEXT: ; def v[2:7] 5333; GFX900-NEXT: ;;#ASMEND 5334; GFX900-NEXT: v_mov_b32_e32 v8, 0 5335; GFX900-NEXT: v_mov_b32_e32 v4, v6 5336; GFX900-NEXT: v_mov_b32_e32 v5, v7 5337; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 5338; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 5339; GFX900-NEXT: s_waitcnt vmcnt(0) 5340; GFX900-NEXT: s_setpc_b64 s[30:31] 5341; 5342; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_3: 5343; GFX90A: ; %bb.0: 5344; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5345; GFX90A-NEXT: ;;#ASMSTART 5346; GFX90A-NEXT: ; def v[0:5] 5347; GFX90A-NEXT: ;;#ASMEND 5348; GFX90A-NEXT: ;;#ASMSTART 5349; GFX90A-NEXT: ; def v[2:7] 5350; GFX90A-NEXT: ;;#ASMEND 5351; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5352; GFX90A-NEXT: v_mov_b32_e32 v4, v6 5353; GFX90A-NEXT: v_mov_b32_e32 v5, v7 5354; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 5355; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 5356; GFX90A-NEXT: s_waitcnt vmcnt(0) 5357; GFX90A-NEXT: s_setpc_b64 s[30:31] 5358; 5359; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_3: 5360; GFX940: ; %bb.0: 5361; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5362; GFX940-NEXT: ;;#ASMSTART 5363; GFX940-NEXT: ; def v[0:5] 5364; GFX940-NEXT: ;;#ASMEND 5365; GFX940-NEXT: v_mov_b32_e32 v8, 0 5366; GFX940-NEXT: ;;#ASMSTART 5367; GFX940-NEXT: ; def v[2:7] 5368; GFX940-NEXT: ;;#ASMEND 5369; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 5370; GFX940-NEXT: v_mov_b32_e32 v4, v6 5371; GFX940-NEXT: v_mov_b32_e32 v5, v7 5372; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 5373; GFX940-NEXT: s_waitcnt vmcnt(0) 5374; GFX940-NEXT: s_setpc_b64 s[30:31] 5375 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5376 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5377 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3> 5378 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5379 ret void 5380} 5381 5382define void @v_shuffle_v4i64_v3i64__5_5_1_3(ptr addrspace(1) inreg %ptr) { 5383; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_3: 5384; GFX900: ; %bb.0: 5385; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5386; GFX900-NEXT: ;;#ASMSTART 5387; GFX900-NEXT: ; def v[0:5] 5388; GFX900-NEXT: ;;#ASMEND 5389; GFX900-NEXT: ;;#ASMSTART 5390; GFX900-NEXT: ; def v[4:9] 5391; GFX900-NEXT: ;;#ASMEND 5392; GFX900-NEXT: v_mov_b32_e32 v10, 0 5393; GFX900-NEXT: v_mov_b32_e32 v6, v8 5394; GFX900-NEXT: v_mov_b32_e32 v7, v9 5395; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 5396; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 5397; GFX900-NEXT: s_waitcnt vmcnt(0) 5398; GFX900-NEXT: s_setpc_b64 s[30:31] 5399; 5400; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_3: 5401; GFX90A: ; %bb.0: 5402; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5403; GFX90A-NEXT: ;;#ASMSTART 5404; GFX90A-NEXT: ; def v[0:5] 5405; GFX90A-NEXT: ;;#ASMEND 5406; GFX90A-NEXT: ;;#ASMSTART 5407; GFX90A-NEXT: ; def v[4:9] 5408; GFX90A-NEXT: ;;#ASMEND 5409; GFX90A-NEXT: v_mov_b32_e32 v10, 0 5410; GFX90A-NEXT: v_mov_b32_e32 v6, v8 5411; GFX90A-NEXT: v_mov_b32_e32 v7, v9 5412; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 5413; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 5414; GFX90A-NEXT: s_waitcnt vmcnt(0) 5415; GFX90A-NEXT: s_setpc_b64 s[30:31] 5416; 5417; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_3: 5418; GFX940: ; %bb.0: 5419; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5420; GFX940-NEXT: ;;#ASMSTART 5421; GFX940-NEXT: ; def v[0:5] 5422; GFX940-NEXT: ;;#ASMEND 5423; GFX940-NEXT: v_mov_b32_e32 v10, 0 5424; GFX940-NEXT: ;;#ASMSTART 5425; GFX940-NEXT: ; def v[4:9] 5426; GFX940-NEXT: ;;#ASMEND 5427; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 5428; GFX940-NEXT: v_mov_b32_e32 v6, v8 5429; GFX940-NEXT: v_mov_b32_e32 v7, v9 5430; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 5431; GFX940-NEXT: s_waitcnt vmcnt(0) 5432; GFX940-NEXT: s_setpc_b64 s[30:31] 5433 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5434 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5435 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3> 5436 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5437 ret void 5438} 5439 5440define void @v_shuffle_v4i64_v3i64__5_5_2_3(ptr addrspace(1) inreg %ptr) { 5441; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3: 5442; GFX900: ; %bb.0: 5443; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5444; GFX900-NEXT: ;;#ASMSTART 5445; GFX900-NEXT: ; def v[0:5] 5446; GFX900-NEXT: ;;#ASMEND 5447; GFX900-NEXT: ;;#ASMSTART 5448; GFX900-NEXT: ; def v[6:11] 5449; GFX900-NEXT: ;;#ASMEND 5450; GFX900-NEXT: v_mov_b32_e32 v12, 0 5451; GFX900-NEXT: v_mov_b32_e32 v0, v4 5452; GFX900-NEXT: v_mov_b32_e32 v1, v5 5453; GFX900-NEXT: v_mov_b32_e32 v2, v6 5454; GFX900-NEXT: v_mov_b32_e32 v3, v7 5455; GFX900-NEXT: v_mov_b32_e32 v8, v10 5456; GFX900-NEXT: v_mov_b32_e32 v9, v11 5457; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 5458; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 5459; GFX900-NEXT: s_waitcnt vmcnt(0) 5460; GFX900-NEXT: s_setpc_b64 s[30:31] 5461; 5462; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3: 5463; GFX90A: ; %bb.0: 5464; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5465; GFX90A-NEXT: ;;#ASMSTART 5466; GFX90A-NEXT: ; def v[0:5] 5467; GFX90A-NEXT: ;;#ASMEND 5468; GFX90A-NEXT: ;;#ASMSTART 5469; GFX90A-NEXT: ; def v[6:11] 5470; GFX90A-NEXT: ;;#ASMEND 5471; GFX90A-NEXT: v_mov_b32_e32 v12, 0 5472; GFX90A-NEXT: v_mov_b32_e32 v0, v4 5473; GFX90A-NEXT: v_mov_b32_e32 v1, v5 5474; GFX90A-NEXT: v_mov_b32_e32 v2, v6 5475; GFX90A-NEXT: v_mov_b32_e32 v3, v7 5476; GFX90A-NEXT: v_mov_b32_e32 v8, v10 5477; GFX90A-NEXT: v_mov_b32_e32 v9, v11 5478; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16 5479; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 5480; GFX90A-NEXT: s_waitcnt vmcnt(0) 5481; GFX90A-NEXT: s_setpc_b64 s[30:31] 5482; 5483; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3: 5484; GFX940: ; %bb.0: 5485; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5486; GFX940-NEXT: ;;#ASMSTART 5487; GFX940-NEXT: ; def v[0:5] 5488; GFX940-NEXT: ;;#ASMEND 5489; GFX940-NEXT: ;;#ASMSTART 5490; GFX940-NEXT: ; def v[6:11] 5491; GFX940-NEXT: ;;#ASMEND 5492; GFX940-NEXT: v_mov_b32_e32 v12, 0 5493; GFX940-NEXT: v_mov_b32_e32 v0, v4 5494; GFX940-NEXT: v_mov_b32_e32 v1, v5 5495; GFX940-NEXT: v_mov_b32_e32 v2, v6 5496; GFX940-NEXT: v_mov_b32_e32 v3, v7 5497; GFX940-NEXT: v_mov_b32_e32 v8, v10 5498; GFX940-NEXT: v_mov_b32_e32 v9, v11 5499; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1 5500; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 5501; GFX940-NEXT: s_waitcnt vmcnt(0) 5502; GFX940-NEXT: s_setpc_b64 s[30:31] 5503 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5504 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5505 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3> 5506 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5507 ret void 5508} 5509 5510define void @v_shuffle_v4i64_v3i64__5_5_4_3(ptr addrspace(1) inreg %ptr) { 5511; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3: 5512; GFX900: ; %bb.0: 5513; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5514; GFX900-NEXT: ;;#ASMSTART 5515; GFX900-NEXT: ; def v[0:5] 5516; GFX900-NEXT: ;;#ASMEND 5517; GFX900-NEXT: v_mov_b32_e32 v10, 0 5518; GFX900-NEXT: v_mov_b32_e32 v6, v2 5519; GFX900-NEXT: v_mov_b32_e32 v7, v3 5520; GFX900-NEXT: v_mov_b32_e32 v8, v0 5521; GFX900-NEXT: v_mov_b32_e32 v9, v1 5522; GFX900-NEXT: v_mov_b32_e32 v2, v4 5523; GFX900-NEXT: v_mov_b32_e32 v3, v5 5524; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 5525; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 5526; GFX900-NEXT: s_waitcnt vmcnt(0) 5527; GFX900-NEXT: s_setpc_b64 s[30:31] 5528; 5529; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3: 5530; GFX90A: ; %bb.0: 5531; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5532; GFX90A-NEXT: ;;#ASMSTART 5533; GFX90A-NEXT: ; def v[0:5] 5534; GFX90A-NEXT: ;;#ASMEND 5535; GFX90A-NEXT: v_mov_b32_e32 v10, 0 5536; GFX90A-NEXT: v_mov_b32_e32 v6, v2 5537; GFX90A-NEXT: v_mov_b32_e32 v7, v3 5538; GFX90A-NEXT: v_mov_b32_e32 v8, v0 5539; GFX90A-NEXT: v_mov_b32_e32 v9, v1 5540; GFX90A-NEXT: v_mov_b32_e32 v2, v4 5541; GFX90A-NEXT: v_mov_b32_e32 v3, v5 5542; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 5543; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 5544; GFX90A-NEXT: s_waitcnt vmcnt(0) 5545; GFX90A-NEXT: s_setpc_b64 s[30:31] 5546; 5547; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3: 5548; GFX940: ; %bb.0: 5549; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5550; GFX940-NEXT: ;;#ASMSTART 5551; GFX940-NEXT: ; def v[0:5] 5552; GFX940-NEXT: ;;#ASMEND 5553; GFX940-NEXT: v_mov_b32_e32 v10, 0 5554; GFX940-NEXT: v_mov_b32_e32 v6, v2 5555; GFX940-NEXT: v_mov_b32_e32 v7, v3 5556; GFX940-NEXT: v_mov_b32_e32 v8, v0 5557; GFX940-NEXT: v_mov_b32_e32 v9, v1 5558; GFX940-NEXT: v_mov_b32_e32 v2, v4 5559; GFX940-NEXT: v_mov_b32_e32 v3, v5 5560; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 5561; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 5562; GFX940-NEXT: s_waitcnt vmcnt(0) 5563; GFX940-NEXT: s_setpc_b64 s[30:31] 5564 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5565 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5566 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3> 5567 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5568 ret void 5569} 5570 5571define void @v_shuffle_v4i64_v3i64__u_4_4_4(ptr addrspace(1) inreg %ptr) { 5572; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_4_4_4: 5573; GFX900: ; %bb.0: 5574; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5575; GFX900-NEXT: ;;#ASMSTART 5576; GFX900-NEXT: ; def v[0:5] 5577; GFX900-NEXT: ;;#ASMEND 5578; GFX900-NEXT: v_mov_b32_e32 v6, 0 5579; GFX900-NEXT: v_mov_b32_e32 v4, v2 5580; GFX900-NEXT: v_mov_b32_e32 v5, v3 5581; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 5582; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 5583; GFX900-NEXT: s_waitcnt vmcnt(0) 5584; GFX900-NEXT: s_setpc_b64 s[30:31] 5585; 5586; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_4_4_4: 5587; GFX90A: ; %bb.0: 5588; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5589; GFX90A-NEXT: ;;#ASMSTART 5590; GFX90A-NEXT: ; def v[0:5] 5591; GFX90A-NEXT: ;;#ASMEND 5592; GFX90A-NEXT: v_mov_b32_e32 v6, 0 5593; GFX90A-NEXT: v_mov_b32_e32 v4, v2 5594; GFX90A-NEXT: v_mov_b32_e32 v5, v3 5595; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 5596; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 5597; GFX90A-NEXT: s_waitcnt vmcnt(0) 5598; GFX90A-NEXT: s_setpc_b64 s[30:31] 5599; 5600; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_4_4_4: 5601; GFX940: ; %bb.0: 5602; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5603; GFX940-NEXT: ;;#ASMSTART 5604; GFX940-NEXT: ; def v[0:5] 5605; GFX940-NEXT: ;;#ASMEND 5606; GFX940-NEXT: v_mov_b32_e32 v6, 0 5607; GFX940-NEXT: v_mov_b32_e32 v4, v2 5608; GFX940-NEXT: v_mov_b32_e32 v5, v3 5609; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 5610; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 5611; GFX940-NEXT: s_waitcnt vmcnt(0) 5612; GFX940-NEXT: s_setpc_b64 s[30:31] 5613 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5614 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5615 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4> 5616 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5617 ret void 5618} 5619 5620define void @v_shuffle_v4i64_v3i64__0_4_4_4(ptr addrspace(1) inreg %ptr) { 5621; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_4_4_4: 5622; GFX900: ; %bb.0: 5623; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5624; GFX900-NEXT: ;;#ASMSTART 5625; GFX900-NEXT: ; def v[0:5] 5626; GFX900-NEXT: ;;#ASMEND 5627; GFX900-NEXT: ;;#ASMSTART 5628; GFX900-NEXT: ; def v[2:7] 5629; GFX900-NEXT: ;;#ASMEND 5630; GFX900-NEXT: v_mov_b32_e32 v8, 0 5631; GFX900-NEXT: v_mov_b32_e32 v6, v4 5632; GFX900-NEXT: v_mov_b32_e32 v7, v5 5633; GFX900-NEXT: v_mov_b32_e32 v2, v4 5634; GFX900-NEXT: v_mov_b32_e32 v3, v5 5635; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16 5636; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 5637; GFX900-NEXT: s_waitcnt vmcnt(0) 5638; GFX900-NEXT: s_setpc_b64 s[30:31] 5639; 5640; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_4_4_4: 5641; GFX90A: ; %bb.0: 5642; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5643; GFX90A-NEXT: ;;#ASMSTART 5644; GFX90A-NEXT: ; def v[0:5] 5645; GFX90A-NEXT: ;;#ASMEND 5646; GFX90A-NEXT: ;;#ASMSTART 5647; GFX90A-NEXT: ; def v[2:7] 5648; GFX90A-NEXT: ;;#ASMEND 5649; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5650; GFX90A-NEXT: v_mov_b32_e32 v6, v4 5651; GFX90A-NEXT: v_mov_b32_e32 v7, v5 5652; GFX90A-NEXT: v_mov_b32_e32 v2, v4 5653; GFX90A-NEXT: v_mov_b32_e32 v3, v5 5654; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16 5655; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 5656; GFX90A-NEXT: s_waitcnt vmcnt(0) 5657; GFX90A-NEXT: s_setpc_b64 s[30:31] 5658; 5659; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_4_4_4: 5660; GFX940: ; %bb.0: 5661; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5662; GFX940-NEXT: ;;#ASMSTART 5663; GFX940-NEXT: ; def v[0:5] 5664; GFX940-NEXT: ;;#ASMEND 5665; GFX940-NEXT: v_mov_b32_e32 v8, 0 5666; GFX940-NEXT: ;;#ASMSTART 5667; GFX940-NEXT: ; def v[2:7] 5668; GFX940-NEXT: ;;#ASMEND 5669; GFX940-NEXT: s_nop 0 5670; GFX940-NEXT: v_mov_b32_e32 v6, v4 5671; GFX940-NEXT: v_mov_b32_e32 v7, v5 5672; GFX940-NEXT: v_mov_b32_e32 v2, v4 5673; GFX940-NEXT: v_mov_b32_e32 v3, v5 5674; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 5675; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 5676; GFX940-NEXT: s_waitcnt vmcnt(0) 5677; GFX940-NEXT: s_setpc_b64 s[30:31] 5678 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5679 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5680 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 5681 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5682 ret void 5683} 5684 5685define void @v_shuffle_v4i64_v3i64__1_4_4_4(ptr addrspace(1) inreg %ptr) { 5686; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_4_4_4: 5687; GFX900: ; %bb.0: 5688; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5689; GFX900-NEXT: ;;#ASMSTART 5690; GFX900-NEXT: ; def v[0:5] 5691; GFX900-NEXT: ;;#ASMEND 5692; GFX900-NEXT: ;;#ASMSTART 5693; GFX900-NEXT: ; def v[4:9] 5694; GFX900-NEXT: ;;#ASMEND 5695; GFX900-NEXT: v_mov_b32_e32 v10, 0 5696; GFX900-NEXT: v_mov_b32_e32 v8, v6 5697; GFX900-NEXT: v_mov_b32_e32 v9, v7 5698; GFX900-NEXT: v_mov_b32_e32 v4, v6 5699; GFX900-NEXT: v_mov_b32_e32 v5, v7 5700; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 5701; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 5702; GFX900-NEXT: s_waitcnt vmcnt(0) 5703; GFX900-NEXT: s_setpc_b64 s[30:31] 5704; 5705; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_4_4_4: 5706; GFX90A: ; %bb.0: 5707; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5708; GFX90A-NEXT: ;;#ASMSTART 5709; GFX90A-NEXT: ; def v[0:5] 5710; GFX90A-NEXT: ;;#ASMEND 5711; GFX90A-NEXT: ;;#ASMSTART 5712; GFX90A-NEXT: ; def v[4:9] 5713; GFX90A-NEXT: ;;#ASMEND 5714; GFX90A-NEXT: v_mov_b32_e32 v10, 0 5715; GFX90A-NEXT: v_mov_b32_e32 v8, v6 5716; GFX90A-NEXT: v_mov_b32_e32 v9, v7 5717; GFX90A-NEXT: v_mov_b32_e32 v4, v6 5718; GFX90A-NEXT: v_mov_b32_e32 v5, v7 5719; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 5720; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 5721; GFX90A-NEXT: s_waitcnt vmcnt(0) 5722; GFX90A-NEXT: s_setpc_b64 s[30:31] 5723; 5724; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_4_4_4: 5725; GFX940: ; %bb.0: 5726; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5727; GFX940-NEXT: ;;#ASMSTART 5728; GFX940-NEXT: ; def v[0:5] 5729; GFX940-NEXT: ;;#ASMEND 5730; GFX940-NEXT: v_mov_b32_e32 v10, 0 5731; GFX940-NEXT: ;;#ASMSTART 5732; GFX940-NEXT: ; def v[4:9] 5733; GFX940-NEXT: ;;#ASMEND 5734; GFX940-NEXT: s_nop 0 5735; GFX940-NEXT: v_mov_b32_e32 v8, v6 5736; GFX940-NEXT: v_mov_b32_e32 v9, v7 5737; GFX940-NEXT: v_mov_b32_e32 v4, v6 5738; GFX940-NEXT: v_mov_b32_e32 v5, v7 5739; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 5740; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 5741; GFX940-NEXT: s_waitcnt vmcnt(0) 5742; GFX940-NEXT: s_setpc_b64 s[30:31] 5743 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5744 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5745 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4> 5746 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5747 ret void 5748} 5749 5750define void @v_shuffle_v4i64_v3i64__2_4_4_4(ptr addrspace(1) inreg %ptr) { 5751; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_4_4_4: 5752; GFX900: ; %bb.0: 5753; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5754; GFX900-NEXT: ;;#ASMSTART 5755; GFX900-NEXT: ; def v[6:11] 5756; GFX900-NEXT: ;;#ASMEND 5757; GFX900-NEXT: v_mov_b32_e32 v12, 0 5758; GFX900-NEXT: ;;#ASMSTART 5759; GFX900-NEXT: ; def v[0:5] 5760; GFX900-NEXT: ;;#ASMEND 5761; GFX900-NEXT: v_mov_b32_e32 v10, v8 5762; GFX900-NEXT: v_mov_b32_e32 v11, v9 5763; GFX900-NEXT: v_mov_b32_e32 v6, v4 5764; GFX900-NEXT: v_mov_b32_e32 v7, v5 5765; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16 5766; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] 5767; GFX900-NEXT: s_waitcnt vmcnt(0) 5768; GFX900-NEXT: s_setpc_b64 s[30:31] 5769; 5770; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_4_4_4: 5771; GFX90A: ; %bb.0: 5772; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5773; GFX90A-NEXT: ;;#ASMSTART 5774; GFX90A-NEXT: ; def v[6:11] 5775; GFX90A-NEXT: ;;#ASMEND 5776; GFX90A-NEXT: v_mov_b32_e32 v12, 0 5777; GFX90A-NEXT: ;;#ASMSTART 5778; GFX90A-NEXT: ; def v[0:5] 5779; GFX90A-NEXT: ;;#ASMEND 5780; GFX90A-NEXT: v_mov_b32_e32 v10, v8 5781; GFX90A-NEXT: v_mov_b32_e32 v11, v9 5782; GFX90A-NEXT: v_mov_b32_e32 v6, v4 5783; GFX90A-NEXT: v_mov_b32_e32 v7, v5 5784; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16 5785; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] 5786; GFX90A-NEXT: s_waitcnt vmcnt(0) 5787; GFX90A-NEXT: s_setpc_b64 s[30:31] 5788; 5789; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_4_4_4: 5790; GFX940: ; %bb.0: 5791; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5792; GFX940-NEXT: ;;#ASMSTART 5793; GFX940-NEXT: ; def v[6:11] 5794; GFX940-NEXT: ;;#ASMEND 5795; GFX940-NEXT: v_mov_b32_e32 v12, 0 5796; GFX940-NEXT: ;;#ASMSTART 5797; GFX940-NEXT: ; def v[0:5] 5798; GFX940-NEXT: ;;#ASMEND 5799; GFX940-NEXT: v_mov_b32_e32 v10, v8 5800; GFX940-NEXT: v_mov_b32_e32 v11, v9 5801; GFX940-NEXT: v_mov_b32_e32 v6, v4 5802; GFX940-NEXT: v_mov_b32_e32 v7, v5 5803; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 5804; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 5805; GFX940-NEXT: s_waitcnt vmcnt(0) 5806; GFX940-NEXT: s_setpc_b64 s[30:31] 5807 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5808 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5809 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4> 5810 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5811 ret void 5812} 5813 5814define void @v_shuffle_v4i64_v3i64__3_4_4_4(ptr addrspace(1) inreg %ptr) { 5815; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_4_4_4: 5816; GFX900: ; %bb.0: 5817; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5818; GFX900-NEXT: ;;#ASMSTART 5819; GFX900-NEXT: ; def v[0:5] 5820; GFX900-NEXT: ;;#ASMEND 5821; GFX900-NEXT: v_mov_b32_e32 v6, 0 5822; GFX900-NEXT: v_mov_b32_e32 v4, v2 5823; GFX900-NEXT: v_mov_b32_e32 v5, v3 5824; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 5825; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 5826; GFX900-NEXT: s_waitcnt vmcnt(0) 5827; GFX900-NEXT: s_setpc_b64 s[30:31] 5828; 5829; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_4_4_4: 5830; GFX90A: ; %bb.0: 5831; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5832; GFX90A-NEXT: ;;#ASMSTART 5833; GFX90A-NEXT: ; def v[0:5] 5834; GFX90A-NEXT: ;;#ASMEND 5835; GFX90A-NEXT: v_mov_b32_e32 v6, 0 5836; GFX90A-NEXT: v_mov_b32_e32 v4, v2 5837; GFX90A-NEXT: v_mov_b32_e32 v5, v3 5838; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 5839; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 5840; GFX90A-NEXT: s_waitcnt vmcnt(0) 5841; GFX90A-NEXT: s_setpc_b64 s[30:31] 5842; 5843; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_4_4_4: 5844; GFX940: ; %bb.0: 5845; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5846; GFX940-NEXT: ;;#ASMSTART 5847; GFX940-NEXT: ; def v[0:5] 5848; GFX940-NEXT: ;;#ASMEND 5849; GFX940-NEXT: v_mov_b32_e32 v6, 0 5850; GFX940-NEXT: v_mov_b32_e32 v4, v2 5851; GFX940-NEXT: v_mov_b32_e32 v5, v3 5852; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 5853; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 5854; GFX940-NEXT: s_waitcnt vmcnt(0) 5855; GFX940-NEXT: s_setpc_b64 s[30:31] 5856 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5857 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5858 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4> 5859 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5860 ret void 5861} 5862 5863define void @v_shuffle_v4i64_v3i64__4_4_4_4(ptr addrspace(1) inreg %ptr) { 5864; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_4_4_4: 5865; GFX900: ; %bb.0: 5866; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5867; GFX900-NEXT: ;;#ASMSTART 5868; GFX900-NEXT: ; def v[0:5] 5869; GFX900-NEXT: ;;#ASMEND 5870; GFX900-NEXT: v_mov_b32_e32 v6, 0 5871; GFX900-NEXT: v_mov_b32_e32 v4, v2 5872; GFX900-NEXT: v_mov_b32_e32 v5, v3 5873; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 5874; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 5875; GFX900-NEXT: s_waitcnt vmcnt(0) 5876; GFX900-NEXT: s_setpc_b64 s[30:31] 5877; 5878; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_4_4_4: 5879; GFX90A: ; %bb.0: 5880; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5881; GFX90A-NEXT: ;;#ASMSTART 5882; GFX90A-NEXT: ; def v[0:5] 5883; GFX90A-NEXT: ;;#ASMEND 5884; GFX90A-NEXT: v_mov_b32_e32 v6, 0 5885; GFX90A-NEXT: v_mov_b32_e32 v4, v2 5886; GFX90A-NEXT: v_mov_b32_e32 v5, v3 5887; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 5888; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 5889; GFX90A-NEXT: s_waitcnt vmcnt(0) 5890; GFX90A-NEXT: s_setpc_b64 s[30:31] 5891; 5892; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_4_4_4: 5893; GFX940: ; %bb.0: 5894; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5895; GFX940-NEXT: ;;#ASMSTART 5896; GFX940-NEXT: ; def v[0:5] 5897; GFX940-NEXT: ;;#ASMEND 5898; GFX940-NEXT: v_mov_b32_e32 v6, 0 5899; GFX940-NEXT: v_mov_b32_e32 v4, v2 5900; GFX940-NEXT: v_mov_b32_e32 v5, v3 5901; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 5902; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 5903; GFX940-NEXT: s_waitcnt vmcnt(0) 5904; GFX940-NEXT: s_setpc_b64 s[30:31] 5905 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5906 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5907 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4> 5908 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5909 ret void 5910} 5911 5912define void @v_shuffle_v4i64_v3i64__5_4_4_4(ptr addrspace(1) inreg %ptr) { 5913; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_4_4: 5914; GFX900: ; %bb.0: 5915; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5916; GFX900-NEXT: ;;#ASMSTART 5917; GFX900-NEXT: ; def v[0:5] 5918; GFX900-NEXT: ;;#ASMEND 5919; GFX900-NEXT: v_mov_b32_e32 v6, 0 5920; GFX900-NEXT: v_mov_b32_e32 v0, v2 5921; GFX900-NEXT: v_mov_b32_e32 v1, v3 5922; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 5923; GFX900-NEXT: s_nop 0 5924; GFX900-NEXT: v_mov_b32_e32 v0, v4 5925; GFX900-NEXT: v_mov_b32_e32 v1, v5 5926; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 5927; GFX900-NEXT: s_waitcnt vmcnt(0) 5928; GFX900-NEXT: s_setpc_b64 s[30:31] 5929; 5930; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_4_4: 5931; GFX90A: ; %bb.0: 5932; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5933; GFX90A-NEXT: ;;#ASMSTART 5934; GFX90A-NEXT: ; def v[0:5] 5935; GFX90A-NEXT: ;;#ASMEND 5936; GFX90A-NEXT: v_mov_b32_e32 v6, 0 5937; GFX90A-NEXT: v_mov_b32_e32 v0, v2 5938; GFX90A-NEXT: v_mov_b32_e32 v1, v3 5939; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 5940; GFX90A-NEXT: s_nop 0 5941; GFX90A-NEXT: v_mov_b32_e32 v0, v4 5942; GFX90A-NEXT: v_mov_b32_e32 v1, v5 5943; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 5944; GFX90A-NEXT: s_waitcnt vmcnt(0) 5945; GFX90A-NEXT: s_setpc_b64 s[30:31] 5946; 5947; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_4_4: 5948; GFX940: ; %bb.0: 5949; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5950; GFX940-NEXT: ;;#ASMSTART 5951; GFX940-NEXT: ; def v[0:5] 5952; GFX940-NEXT: ;;#ASMEND 5953; GFX940-NEXT: v_mov_b32_e32 v6, 0 5954; GFX940-NEXT: v_mov_b32_e32 v0, v2 5955; GFX940-NEXT: v_mov_b32_e32 v1, v3 5956; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 5957; GFX940-NEXT: s_nop 1 5958; GFX940-NEXT: v_mov_b32_e32 v0, v4 5959; GFX940-NEXT: v_mov_b32_e32 v1, v5 5960; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 5961; GFX940-NEXT: s_waitcnt vmcnt(0) 5962; GFX940-NEXT: s_setpc_b64 s[30:31] 5963 %vec0 = call <3 x i64> asm "; def $0", "=v"() 5964 %vec1 = call <3 x i64> asm "; def $0", "=v"() 5965 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4> 5966 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 5967 ret void 5968} 5969 5970define void @v_shuffle_v4i64_v3i64__5_u_4_4(ptr addrspace(1) inreg %ptr) { 5971; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_4_4: 5972; GFX900: ; %bb.0: 5973; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5974; GFX900-NEXT: ;;#ASMSTART 5975; GFX900-NEXT: ; def v[0:5] 5976; GFX900-NEXT: ;;#ASMEND 5977; GFX900-NEXT: v_mov_b32_e32 v6, 0 5978; GFX900-NEXT: v_mov_b32_e32 v0, v2 5979; GFX900-NEXT: v_mov_b32_e32 v1, v3 5980; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 5981; GFX900-NEXT: s_nop 0 5982; GFX900-NEXT: v_mov_b32_e32 v0, v4 5983; GFX900-NEXT: v_mov_b32_e32 v1, v5 5984; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 5985; GFX900-NEXT: s_waitcnt vmcnt(0) 5986; GFX900-NEXT: s_setpc_b64 s[30:31] 5987; 5988; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_4_4: 5989; GFX90A: ; %bb.0: 5990; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5991; GFX90A-NEXT: ;;#ASMSTART 5992; GFX90A-NEXT: ; def v[0:5] 5993; GFX90A-NEXT: ;;#ASMEND 5994; GFX90A-NEXT: v_mov_b32_e32 v6, 0 5995; GFX90A-NEXT: v_mov_b32_e32 v0, v2 5996; GFX90A-NEXT: v_mov_b32_e32 v1, v3 5997; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 5998; GFX90A-NEXT: s_nop 0 5999; GFX90A-NEXT: v_mov_b32_e32 v0, v4 6000; GFX90A-NEXT: v_mov_b32_e32 v1, v5 6001; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 6002; GFX90A-NEXT: s_waitcnt vmcnt(0) 6003; GFX90A-NEXT: s_setpc_b64 s[30:31] 6004; 6005; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_4_4: 6006; GFX940: ; %bb.0: 6007; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6008; GFX940-NEXT: ;;#ASMSTART 6009; GFX940-NEXT: ; def v[0:5] 6010; GFX940-NEXT: ;;#ASMEND 6011; GFX940-NEXT: v_mov_b32_e32 v6, 0 6012; GFX940-NEXT: v_mov_b32_e32 v0, v2 6013; GFX940-NEXT: v_mov_b32_e32 v1, v3 6014; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 6015; GFX940-NEXT: s_nop 1 6016; GFX940-NEXT: v_mov_b32_e32 v0, v4 6017; GFX940-NEXT: v_mov_b32_e32 v1, v5 6018; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 6019; GFX940-NEXT: s_waitcnt vmcnt(0) 6020; GFX940-NEXT: s_setpc_b64 s[30:31] 6021 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6022 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6023 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4> 6024 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6025 ret void 6026} 6027 6028define void @v_shuffle_v4i64_v3i64__5_0_4_4(ptr addrspace(1) inreg %ptr) { 6029; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_4_4: 6030; GFX900: ; %bb.0: 6031; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6032; GFX900-NEXT: ;;#ASMSTART 6033; GFX900-NEXT: ; def v[0:5] 6034; GFX900-NEXT: ;;#ASMEND 6035; GFX900-NEXT: ;;#ASMSTART 6036; GFX900-NEXT: ; def v[2:7] 6037; GFX900-NEXT: ;;#ASMEND 6038; GFX900-NEXT: v_mov_b32_e32 v8, 0 6039; GFX900-NEXT: v_mov_b32_e32 v2, v4 6040; GFX900-NEXT: v_mov_b32_e32 v3, v5 6041; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16 6042; GFX900-NEXT: s_nop 0 6043; GFX900-NEXT: v_mov_b32_e32 v2, v6 6044; GFX900-NEXT: v_mov_b32_e32 v3, v7 6045; GFX900-NEXT: v_mov_b32_e32 v4, v0 6046; GFX900-NEXT: v_mov_b32_e32 v5, v1 6047; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 6048; GFX900-NEXT: s_waitcnt vmcnt(0) 6049; GFX900-NEXT: s_setpc_b64 s[30:31] 6050; 6051; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_4_4: 6052; GFX90A: ; %bb.0: 6053; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6054; GFX90A-NEXT: ;;#ASMSTART 6055; GFX90A-NEXT: ; def v[0:5] 6056; GFX90A-NEXT: ;;#ASMEND 6057; GFX90A-NEXT: ;;#ASMSTART 6058; GFX90A-NEXT: ; def v[2:7] 6059; GFX90A-NEXT: ;;#ASMEND 6060; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6061; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6062; GFX90A-NEXT: v_mov_b32_e32 v3, v5 6063; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16 6064; GFX90A-NEXT: s_nop 0 6065; GFX90A-NEXT: v_mov_b32_e32 v2, v6 6066; GFX90A-NEXT: v_mov_b32_e32 v3, v7 6067; GFX90A-NEXT: v_mov_b32_e32 v4, v0 6068; GFX90A-NEXT: v_mov_b32_e32 v5, v1 6069; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 6070; GFX90A-NEXT: s_waitcnt vmcnt(0) 6071; GFX90A-NEXT: s_setpc_b64 s[30:31] 6072; 6073; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_4_4: 6074; GFX940: ; %bb.0: 6075; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6076; GFX940-NEXT: ;;#ASMSTART 6077; GFX940-NEXT: ; def v[0:5] 6078; GFX940-NEXT: ;;#ASMEND 6079; GFX940-NEXT: v_mov_b32_e32 v8, 0 6080; GFX940-NEXT: ;;#ASMSTART 6081; GFX940-NEXT: ; def v[2:7] 6082; GFX940-NEXT: ;;#ASMEND 6083; GFX940-NEXT: s_nop 0 6084; GFX940-NEXT: v_mov_b32_e32 v2, v4 6085; GFX940-NEXT: v_mov_b32_e32 v3, v5 6086; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1 6087; GFX940-NEXT: s_nop 1 6088; GFX940-NEXT: v_mov_b32_e32 v2, v6 6089; GFX940-NEXT: v_mov_b32_e32 v3, v7 6090; GFX940-NEXT: v_mov_b32_e32 v4, v0 6091; GFX940-NEXT: v_mov_b32_e32 v5, v1 6092; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 6093; GFX940-NEXT: s_waitcnt vmcnt(0) 6094; GFX940-NEXT: s_setpc_b64 s[30:31] 6095 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6096 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6097 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4> 6098 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6099 ret void 6100} 6101 6102define void @v_shuffle_v4i64_v3i64__5_1_4_4(ptr addrspace(1) inreg %ptr) { 6103; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_4_4: 6104; GFX900: ; %bb.0: 6105; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6106; GFX900-NEXT: ;;#ASMSTART 6107; GFX900-NEXT: ; def v[0:5] 6108; GFX900-NEXT: ;;#ASMEND 6109; GFX900-NEXT: ;;#ASMSTART 6110; GFX900-NEXT: ; def v[4:9] 6111; GFX900-NEXT: ;;#ASMEND 6112; GFX900-NEXT: v_mov_b32_e32 v10, 0 6113; GFX900-NEXT: v_mov_b32_e32 v4, v6 6114; GFX900-NEXT: v_mov_b32_e32 v5, v7 6115; GFX900-NEXT: v_mov_b32_e32 v0, v8 6116; GFX900-NEXT: v_mov_b32_e32 v1, v9 6117; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16 6118; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 6119; GFX900-NEXT: s_waitcnt vmcnt(0) 6120; GFX900-NEXT: s_setpc_b64 s[30:31] 6121; 6122; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_4_4: 6123; GFX90A: ; %bb.0: 6124; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6125; GFX90A-NEXT: ;;#ASMSTART 6126; GFX90A-NEXT: ; def v[0:5] 6127; GFX90A-NEXT: ;;#ASMEND 6128; GFX90A-NEXT: ;;#ASMSTART 6129; GFX90A-NEXT: ; def v[4:9] 6130; GFX90A-NEXT: ;;#ASMEND 6131; GFX90A-NEXT: v_mov_b32_e32 v10, 0 6132; GFX90A-NEXT: v_mov_b32_e32 v4, v6 6133; GFX90A-NEXT: v_mov_b32_e32 v5, v7 6134; GFX90A-NEXT: v_mov_b32_e32 v0, v8 6135; GFX90A-NEXT: v_mov_b32_e32 v1, v9 6136; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16 6137; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 6138; GFX90A-NEXT: s_waitcnt vmcnt(0) 6139; GFX90A-NEXT: s_setpc_b64 s[30:31] 6140; 6141; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_4_4: 6142; GFX940: ; %bb.0: 6143; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6144; GFX940-NEXT: ;;#ASMSTART 6145; GFX940-NEXT: ; def v[0:5] 6146; GFX940-NEXT: ;;#ASMEND 6147; GFX940-NEXT: v_mov_b32_e32 v10, 0 6148; GFX940-NEXT: ;;#ASMSTART 6149; GFX940-NEXT: ; def v[4:9] 6150; GFX940-NEXT: ;;#ASMEND 6151; GFX940-NEXT: s_nop 0 6152; GFX940-NEXT: v_mov_b32_e32 v4, v6 6153; GFX940-NEXT: v_mov_b32_e32 v5, v7 6154; GFX940-NEXT: v_mov_b32_e32 v0, v8 6155; GFX940-NEXT: v_mov_b32_e32 v1, v9 6156; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1 6157; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 6158; GFX940-NEXT: s_waitcnt vmcnt(0) 6159; GFX940-NEXT: s_setpc_b64 s[30:31] 6160 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6161 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6162 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4> 6163 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6164 ret void 6165} 6166 6167define void @v_shuffle_v4i64_v3i64__5_2_4_4(ptr addrspace(1) inreg %ptr) { 6168; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_4_4: 6169; GFX900: ; %bb.0: 6170; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6171; GFX900-NEXT: ;;#ASMSTART 6172; GFX900-NEXT: ; def v[0:5] 6173; GFX900-NEXT: ;;#ASMEND 6174; GFX900-NEXT: ;;#ASMSTART 6175; GFX900-NEXT: ; def v[6:11] 6176; GFX900-NEXT: ;;#ASMEND 6177; GFX900-NEXT: v_mov_b32_e32 v12, 0 6178; GFX900-NEXT: v_mov_b32_e32 v6, v8 6179; GFX900-NEXT: v_mov_b32_e32 v7, v9 6180; GFX900-NEXT: v_mov_b32_e32 v2, v10 6181; GFX900-NEXT: v_mov_b32_e32 v3, v11 6182; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16 6183; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 6184; GFX900-NEXT: s_waitcnt vmcnt(0) 6185; GFX900-NEXT: s_setpc_b64 s[30:31] 6186; 6187; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_4_4: 6188; GFX90A: ; %bb.0: 6189; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6190; GFX90A-NEXT: ;;#ASMSTART 6191; GFX90A-NEXT: ; def v[0:5] 6192; GFX90A-NEXT: ;;#ASMEND 6193; GFX90A-NEXT: ;;#ASMSTART 6194; GFX90A-NEXT: ; def v[6:11] 6195; GFX90A-NEXT: ;;#ASMEND 6196; GFX90A-NEXT: v_mov_b32_e32 v12, 0 6197; GFX90A-NEXT: v_mov_b32_e32 v6, v8 6198; GFX90A-NEXT: v_mov_b32_e32 v7, v9 6199; GFX90A-NEXT: v_mov_b32_e32 v2, v10 6200; GFX90A-NEXT: v_mov_b32_e32 v3, v11 6201; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16 6202; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 6203; GFX90A-NEXT: s_waitcnt vmcnt(0) 6204; GFX90A-NEXT: s_setpc_b64 s[30:31] 6205; 6206; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_4_4: 6207; GFX940: ; %bb.0: 6208; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6209; GFX940-NEXT: ;;#ASMSTART 6210; GFX940-NEXT: ; def v[0:5] 6211; GFX940-NEXT: ;;#ASMEND 6212; GFX940-NEXT: ;;#ASMSTART 6213; GFX940-NEXT: ; def v[6:11] 6214; GFX940-NEXT: ;;#ASMEND 6215; GFX940-NEXT: v_mov_b32_e32 v12, 0 6216; GFX940-NEXT: v_mov_b32_e32 v6, v8 6217; GFX940-NEXT: v_mov_b32_e32 v7, v9 6218; GFX940-NEXT: v_mov_b32_e32 v2, v10 6219; GFX940-NEXT: v_mov_b32_e32 v3, v11 6220; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 6221; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 6222; GFX940-NEXT: s_waitcnt vmcnt(0) 6223; GFX940-NEXT: s_setpc_b64 s[30:31] 6224 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6225 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6226 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4> 6227 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6228 ret void 6229} 6230 6231define void @v_shuffle_v4i64_v3i64__5_3_4_4(ptr addrspace(1) inreg %ptr) { 6232; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4: 6233; GFX900: ; %bb.0: 6234; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6235; GFX900-NEXT: ;;#ASMSTART 6236; GFX900-NEXT: ; def v[0:5] 6237; GFX900-NEXT: ;;#ASMEND 6238; GFX900-NEXT: v_mov_b32_e32 v10, 0 6239; GFX900-NEXT: v_mov_b32_e32 v6, v2 6240; GFX900-NEXT: v_mov_b32_e32 v7, v3 6241; GFX900-NEXT: v_mov_b32_e32 v8, v2 6242; GFX900-NEXT: v_mov_b32_e32 v9, v3 6243; GFX900-NEXT: v_mov_b32_e32 v2, v4 6244; GFX900-NEXT: v_mov_b32_e32 v3, v5 6245; GFX900-NEXT: v_mov_b32_e32 v4, v0 6246; GFX900-NEXT: v_mov_b32_e32 v5, v1 6247; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 6248; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 6249; GFX900-NEXT: s_waitcnt vmcnt(0) 6250; GFX900-NEXT: s_setpc_b64 s[30:31] 6251; 6252; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4: 6253; GFX90A: ; %bb.0: 6254; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6255; GFX90A-NEXT: ;;#ASMSTART 6256; GFX90A-NEXT: ; def v[0:5] 6257; GFX90A-NEXT: ;;#ASMEND 6258; GFX90A-NEXT: v_mov_b32_e32 v10, 0 6259; GFX90A-NEXT: v_mov_b32_e32 v6, v2 6260; GFX90A-NEXT: v_mov_b32_e32 v7, v3 6261; GFX90A-NEXT: v_mov_b32_e32 v8, v2 6262; GFX90A-NEXT: v_mov_b32_e32 v9, v3 6263; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6264; GFX90A-NEXT: v_mov_b32_e32 v3, v5 6265; GFX90A-NEXT: v_mov_b32_e32 v4, v0 6266; GFX90A-NEXT: v_mov_b32_e32 v5, v1 6267; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 6268; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 6269; GFX90A-NEXT: s_waitcnt vmcnt(0) 6270; GFX90A-NEXT: s_setpc_b64 s[30:31] 6271; 6272; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4: 6273; GFX940: ; %bb.0: 6274; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6275; GFX940-NEXT: ;;#ASMSTART 6276; GFX940-NEXT: ; def v[0:5] 6277; GFX940-NEXT: ;;#ASMEND 6278; GFX940-NEXT: v_mov_b32_e32 v10, 0 6279; GFX940-NEXT: v_mov_b32_e32 v6, v2 6280; GFX940-NEXT: v_mov_b32_e32 v7, v3 6281; GFX940-NEXT: v_mov_b32_e32 v8, v2 6282; GFX940-NEXT: v_mov_b32_e32 v9, v3 6283; GFX940-NEXT: v_mov_b32_e32 v2, v4 6284; GFX940-NEXT: v_mov_b32_e32 v3, v5 6285; GFX940-NEXT: v_mov_b32_e32 v4, v0 6286; GFX940-NEXT: v_mov_b32_e32 v5, v1 6287; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 6288; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 6289; GFX940-NEXT: s_waitcnt vmcnt(0) 6290; GFX940-NEXT: s_setpc_b64 s[30:31] 6291 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6292 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6293 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4> 6294 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6295 ret void 6296} 6297 6298define void @v_shuffle_v4i64_v3i64__5_5_4_4(ptr addrspace(1) inreg %ptr) { 6299; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_4: 6300; GFX900: ; %bb.0: 6301; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6302; GFX900-NEXT: ;;#ASMSTART 6303; GFX900-NEXT: ; def v[0:5] 6304; GFX900-NEXT: ;;#ASMEND 6305; GFX900-NEXT: v_mov_b32_e32 v6, 0 6306; GFX900-NEXT: v_mov_b32_e32 v0, v2 6307; GFX900-NEXT: v_mov_b32_e32 v1, v3 6308; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 6309; GFX900-NEXT: s_nop 0 6310; GFX900-NEXT: v_mov_b32_e32 v2, v4 6311; GFX900-NEXT: v_mov_b32_e32 v3, v5 6312; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 6313; GFX900-NEXT: s_waitcnt vmcnt(0) 6314; GFX900-NEXT: s_setpc_b64 s[30:31] 6315; 6316; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_4: 6317; GFX90A: ; %bb.0: 6318; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6319; GFX90A-NEXT: ;;#ASMSTART 6320; GFX90A-NEXT: ; def v[0:5] 6321; GFX90A-NEXT: ;;#ASMEND 6322; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6323; GFX90A-NEXT: v_mov_b32_e32 v0, v2 6324; GFX90A-NEXT: v_mov_b32_e32 v1, v3 6325; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 6326; GFX90A-NEXT: s_nop 0 6327; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6328; GFX90A-NEXT: v_mov_b32_e32 v3, v5 6329; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 6330; GFX90A-NEXT: s_waitcnt vmcnt(0) 6331; GFX90A-NEXT: s_setpc_b64 s[30:31] 6332; 6333; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_4: 6334; GFX940: ; %bb.0: 6335; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6336; GFX940-NEXT: ;;#ASMSTART 6337; GFX940-NEXT: ; def v[0:5] 6338; GFX940-NEXT: ;;#ASMEND 6339; GFX940-NEXT: v_mov_b32_e32 v6, 0 6340; GFX940-NEXT: v_mov_b32_e32 v0, v2 6341; GFX940-NEXT: v_mov_b32_e32 v1, v3 6342; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 6343; GFX940-NEXT: s_nop 1 6344; GFX940-NEXT: v_mov_b32_e32 v2, v4 6345; GFX940-NEXT: v_mov_b32_e32 v3, v5 6346; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 6347; GFX940-NEXT: s_waitcnt vmcnt(0) 6348; GFX940-NEXT: s_setpc_b64 s[30:31] 6349 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6350 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6351 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4> 6352 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6353 ret void 6354} 6355 6356define void @v_shuffle_v4i64_v3i64__5_5_u_4(ptr addrspace(1) inreg %ptr) { 6357; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4: 6358; GFX900: ; %bb.0: 6359; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6360; GFX900-NEXT: v_mov_b32_e32 v6, 0 6361; GFX900-NEXT: ;;#ASMSTART 6362; GFX900-NEXT: ; def v[0:5] 6363; GFX900-NEXT: ;;#ASMEND 6364; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 6365; GFX900-NEXT: s_nop 0 6366; GFX900-NEXT: v_mov_b32_e32 v2, v4 6367; GFX900-NEXT: v_mov_b32_e32 v3, v5 6368; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 6369; GFX900-NEXT: s_waitcnt vmcnt(0) 6370; GFX900-NEXT: s_setpc_b64 s[30:31] 6371; 6372; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4: 6373; GFX90A: ; %bb.0: 6374; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6375; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6376; GFX90A-NEXT: ;;#ASMSTART 6377; GFX90A-NEXT: ; def v[0:5] 6378; GFX90A-NEXT: ;;#ASMEND 6379; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 6380; GFX90A-NEXT: s_nop 0 6381; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6382; GFX90A-NEXT: v_mov_b32_e32 v3, v5 6383; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 6384; GFX90A-NEXT: s_waitcnt vmcnt(0) 6385; GFX90A-NEXT: s_setpc_b64 s[30:31] 6386; 6387; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4: 6388; GFX940: ; %bb.0: 6389; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6390; GFX940-NEXT: v_mov_b32_e32 v6, 0 6391; GFX940-NEXT: ;;#ASMSTART 6392; GFX940-NEXT: ; def v[0:5] 6393; GFX940-NEXT: ;;#ASMEND 6394; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 6395; GFX940-NEXT: s_nop 1 6396; GFX940-NEXT: v_mov_b32_e32 v2, v4 6397; GFX940-NEXT: v_mov_b32_e32 v3, v5 6398; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 6399; GFX940-NEXT: s_waitcnt vmcnt(0) 6400; GFX940-NEXT: s_setpc_b64 s[30:31] 6401 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6402 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6403 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4> 6404 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6405 ret void 6406} 6407 6408define void @v_shuffle_v4i64_v3i64__5_5_0_4(ptr addrspace(1) inreg %ptr) { 6409; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_4: 6410; GFX900: ; %bb.0: 6411; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6412; GFX900-NEXT: ;;#ASMSTART 6413; GFX900-NEXT: ; def v[0:5] 6414; GFX900-NEXT: ;;#ASMEND 6415; GFX900-NEXT: ;;#ASMSTART 6416; GFX900-NEXT: ; def v[2:7] 6417; GFX900-NEXT: ;;#ASMEND 6418; GFX900-NEXT: v_mov_b32_e32 v8, 0 6419; GFX900-NEXT: v_mov_b32_e32 v2, v4 6420; GFX900-NEXT: v_mov_b32_e32 v3, v5 6421; GFX900-NEXT: v_mov_b32_e32 v4, v6 6422; GFX900-NEXT: v_mov_b32_e32 v5, v7 6423; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 6424; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 6425; GFX900-NEXT: s_waitcnt vmcnt(0) 6426; GFX900-NEXT: s_setpc_b64 s[30:31] 6427; 6428; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_4: 6429; GFX90A: ; %bb.0: 6430; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6431; GFX90A-NEXT: ;;#ASMSTART 6432; GFX90A-NEXT: ; def v[0:5] 6433; GFX90A-NEXT: ;;#ASMEND 6434; GFX90A-NEXT: ;;#ASMSTART 6435; GFX90A-NEXT: ; def v[2:7] 6436; GFX90A-NEXT: ;;#ASMEND 6437; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6438; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6439; GFX90A-NEXT: v_mov_b32_e32 v3, v5 6440; GFX90A-NEXT: v_mov_b32_e32 v4, v6 6441; GFX90A-NEXT: v_mov_b32_e32 v5, v7 6442; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 6443; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 6444; GFX90A-NEXT: s_waitcnt vmcnt(0) 6445; GFX90A-NEXT: s_setpc_b64 s[30:31] 6446; 6447; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_4: 6448; GFX940: ; %bb.0: 6449; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6450; GFX940-NEXT: ;;#ASMSTART 6451; GFX940-NEXT: ; def v[0:5] 6452; GFX940-NEXT: ;;#ASMEND 6453; GFX940-NEXT: v_mov_b32_e32 v8, 0 6454; GFX940-NEXT: ;;#ASMSTART 6455; GFX940-NEXT: ; def v[2:7] 6456; GFX940-NEXT: ;;#ASMEND 6457; GFX940-NEXT: s_nop 0 6458; GFX940-NEXT: v_mov_b32_e32 v2, v4 6459; GFX940-NEXT: v_mov_b32_e32 v3, v5 6460; GFX940-NEXT: v_mov_b32_e32 v4, v6 6461; GFX940-NEXT: v_mov_b32_e32 v5, v7 6462; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 6463; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 6464; GFX940-NEXT: s_waitcnt vmcnt(0) 6465; GFX940-NEXT: s_setpc_b64 s[30:31] 6466 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6467 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6468 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4> 6469 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6470 ret void 6471} 6472 6473define void @v_shuffle_v4i64_v3i64__5_5_1_4(ptr addrspace(1) inreg %ptr) { 6474; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_4: 6475; GFX900: ; %bb.0: 6476; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6477; GFX900-NEXT: ;;#ASMSTART 6478; GFX900-NEXT: ; def v[0:5] 6479; GFX900-NEXT: ;;#ASMEND 6480; GFX900-NEXT: ;;#ASMSTART 6481; GFX900-NEXT: ; def v[4:9] 6482; GFX900-NEXT: ;;#ASMEND 6483; GFX900-NEXT: v_mov_b32_e32 v10, 0 6484; GFX900-NEXT: v_mov_b32_e32 v4, v6 6485; GFX900-NEXT: v_mov_b32_e32 v5, v7 6486; GFX900-NEXT: v_mov_b32_e32 v6, v8 6487; GFX900-NEXT: v_mov_b32_e32 v7, v9 6488; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 6489; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 6490; GFX900-NEXT: s_waitcnt vmcnt(0) 6491; GFX900-NEXT: s_setpc_b64 s[30:31] 6492; 6493; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_4: 6494; GFX90A: ; %bb.0: 6495; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6496; GFX90A-NEXT: ;;#ASMSTART 6497; GFX90A-NEXT: ; def v[0:5] 6498; GFX90A-NEXT: ;;#ASMEND 6499; GFX90A-NEXT: ;;#ASMSTART 6500; GFX90A-NEXT: ; def v[4:9] 6501; GFX90A-NEXT: ;;#ASMEND 6502; GFX90A-NEXT: v_mov_b32_e32 v10, 0 6503; GFX90A-NEXT: v_mov_b32_e32 v4, v6 6504; GFX90A-NEXT: v_mov_b32_e32 v5, v7 6505; GFX90A-NEXT: v_mov_b32_e32 v6, v8 6506; GFX90A-NEXT: v_mov_b32_e32 v7, v9 6507; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 6508; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 6509; GFX90A-NEXT: s_waitcnt vmcnt(0) 6510; GFX90A-NEXT: s_setpc_b64 s[30:31] 6511; 6512; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_4: 6513; GFX940: ; %bb.0: 6514; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6515; GFX940-NEXT: ;;#ASMSTART 6516; GFX940-NEXT: ; def v[0:5] 6517; GFX940-NEXT: ;;#ASMEND 6518; GFX940-NEXT: v_mov_b32_e32 v10, 0 6519; GFX940-NEXT: ;;#ASMSTART 6520; GFX940-NEXT: ; def v[4:9] 6521; GFX940-NEXT: ;;#ASMEND 6522; GFX940-NEXT: s_nop 0 6523; GFX940-NEXT: v_mov_b32_e32 v4, v6 6524; GFX940-NEXT: v_mov_b32_e32 v5, v7 6525; GFX940-NEXT: v_mov_b32_e32 v6, v8 6526; GFX940-NEXT: v_mov_b32_e32 v7, v9 6527; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 6528; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 6529; GFX940-NEXT: s_waitcnt vmcnt(0) 6530; GFX940-NEXT: s_setpc_b64 s[30:31] 6531 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6532 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6533 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4> 6534 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6535 ret void 6536} 6537 6538define void @v_shuffle_v4i64_v3i64__5_5_2_4(ptr addrspace(1) inreg %ptr) { 6539; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_4: 6540; GFX900: ; %bb.0: 6541; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6542; GFX900-NEXT: ;;#ASMSTART 6543; GFX900-NEXT: ; def v[6:11] 6544; GFX900-NEXT: ;;#ASMEND 6545; GFX900-NEXT: v_mov_b32_e32 v12, 0 6546; GFX900-NEXT: ;;#ASMSTART 6547; GFX900-NEXT: ; def v[0:5] 6548; GFX900-NEXT: ;;#ASMEND 6549; GFX900-NEXT: v_mov_b32_e32 v6, v4 6550; GFX900-NEXT: v_mov_b32_e32 v7, v5 6551; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16 6552; GFX900-NEXT: s_nop 0 6553; GFX900-NEXT: v_mov_b32_e32 v8, v10 6554; GFX900-NEXT: v_mov_b32_e32 v9, v11 6555; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 6556; GFX900-NEXT: s_waitcnt vmcnt(0) 6557; GFX900-NEXT: s_setpc_b64 s[30:31] 6558; 6559; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_4: 6560; GFX90A: ; %bb.0: 6561; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6562; GFX90A-NEXT: ;;#ASMSTART 6563; GFX90A-NEXT: ; def v[6:11] 6564; GFX90A-NEXT: ;;#ASMEND 6565; GFX90A-NEXT: v_mov_b32_e32 v12, 0 6566; GFX90A-NEXT: ;;#ASMSTART 6567; GFX90A-NEXT: ; def v[0:5] 6568; GFX90A-NEXT: ;;#ASMEND 6569; GFX90A-NEXT: v_mov_b32_e32 v6, v4 6570; GFX90A-NEXT: v_mov_b32_e32 v7, v5 6571; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16 6572; GFX90A-NEXT: s_nop 0 6573; GFX90A-NEXT: v_mov_b32_e32 v8, v10 6574; GFX90A-NEXT: v_mov_b32_e32 v9, v11 6575; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 6576; GFX90A-NEXT: s_waitcnt vmcnt(0) 6577; GFX90A-NEXT: s_setpc_b64 s[30:31] 6578; 6579; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_4: 6580; GFX940: ; %bb.0: 6581; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6582; GFX940-NEXT: ;;#ASMSTART 6583; GFX940-NEXT: ; def v[6:11] 6584; GFX940-NEXT: ;;#ASMEND 6585; GFX940-NEXT: v_mov_b32_e32 v12, 0 6586; GFX940-NEXT: ;;#ASMSTART 6587; GFX940-NEXT: ; def v[0:5] 6588; GFX940-NEXT: ;;#ASMEND 6589; GFX940-NEXT: s_nop 0 6590; GFX940-NEXT: v_mov_b32_e32 v6, v4 6591; GFX940-NEXT: v_mov_b32_e32 v7, v5 6592; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1 6593; GFX940-NEXT: s_nop 1 6594; GFX940-NEXT: v_mov_b32_e32 v8, v10 6595; GFX940-NEXT: v_mov_b32_e32 v9, v11 6596; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 6597; GFX940-NEXT: s_waitcnt vmcnt(0) 6598; GFX940-NEXT: s_setpc_b64 s[30:31] 6599 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6600 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6601 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4> 6602 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6603 ret void 6604} 6605 6606define void @v_shuffle_v4i64_v3i64__5_5_3_4(ptr addrspace(1) inreg %ptr) { 6607; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4: 6608; GFX900: ; %bb.0: 6609; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6610; GFX900-NEXT: v_mov_b32_e32 v6, 0 6611; GFX900-NEXT: ;;#ASMSTART 6612; GFX900-NEXT: ; def v[0:5] 6613; GFX900-NEXT: ;;#ASMEND 6614; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 6615; GFX900-NEXT: s_nop 0 6616; GFX900-NEXT: v_mov_b32_e32 v2, v4 6617; GFX900-NEXT: v_mov_b32_e32 v3, v5 6618; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 6619; GFX900-NEXT: s_waitcnt vmcnt(0) 6620; GFX900-NEXT: s_setpc_b64 s[30:31] 6621; 6622; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4: 6623; GFX90A: ; %bb.0: 6624; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6625; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6626; GFX90A-NEXT: ;;#ASMSTART 6627; GFX90A-NEXT: ; def v[0:5] 6628; GFX90A-NEXT: ;;#ASMEND 6629; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 6630; GFX90A-NEXT: s_nop 0 6631; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6632; GFX90A-NEXT: v_mov_b32_e32 v3, v5 6633; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 6634; GFX90A-NEXT: s_waitcnt vmcnt(0) 6635; GFX90A-NEXT: s_setpc_b64 s[30:31] 6636; 6637; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4: 6638; GFX940: ; %bb.0: 6639; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6640; GFX940-NEXT: v_mov_b32_e32 v6, 0 6641; GFX940-NEXT: ;;#ASMSTART 6642; GFX940-NEXT: ; def v[0:5] 6643; GFX940-NEXT: ;;#ASMEND 6644; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 6645; GFX940-NEXT: s_nop 1 6646; GFX940-NEXT: v_mov_b32_e32 v2, v4 6647; GFX940-NEXT: v_mov_b32_e32 v3, v5 6648; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 6649; GFX940-NEXT: s_waitcnt vmcnt(0) 6650; GFX940-NEXT: s_setpc_b64 s[30:31] 6651 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6652 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6653 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4> 6654 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6655 ret void 6656} 6657 6658define void @v_shuffle_v4i64_v3i64__u_5_5_5(ptr addrspace(1) inreg %ptr) { 6659; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5: 6660; GFX900: ; %bb.0: 6661; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6662; GFX900-NEXT: ;;#ASMSTART 6663; GFX900-NEXT: ; def v[0:5] 6664; GFX900-NEXT: ;;#ASMEND 6665; GFX900-NEXT: v_mov_b32_e32 v6, 0 6666; GFX900-NEXT: v_mov_b32_e32 v2, v4 6667; GFX900-NEXT: v_mov_b32_e32 v3, v5 6668; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 6669; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 6670; GFX900-NEXT: s_waitcnt vmcnt(0) 6671; GFX900-NEXT: s_setpc_b64 s[30:31] 6672; 6673; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5: 6674; GFX90A: ; %bb.0: 6675; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6676; GFX90A-NEXT: ;;#ASMSTART 6677; GFX90A-NEXT: ; def v[0:5] 6678; GFX90A-NEXT: ;;#ASMEND 6679; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6680; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6681; GFX90A-NEXT: v_mov_b32_e32 v3, v5 6682; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 6683; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 6684; GFX90A-NEXT: s_waitcnt vmcnt(0) 6685; GFX90A-NEXT: s_setpc_b64 s[30:31] 6686; 6687; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5: 6688; GFX940: ; %bb.0: 6689; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6690; GFX940-NEXT: ;;#ASMSTART 6691; GFX940-NEXT: ; def v[0:5] 6692; GFX940-NEXT: ;;#ASMEND 6693; GFX940-NEXT: v_mov_b32_e32 v6, 0 6694; GFX940-NEXT: v_mov_b32_e32 v2, v4 6695; GFX940-NEXT: v_mov_b32_e32 v3, v5 6696; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 6697; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 6698; GFX940-NEXT: s_waitcnt vmcnt(0) 6699; GFX940-NEXT: s_setpc_b64 s[30:31] 6700 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6701 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6702 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5> 6703 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6704 ret void 6705} 6706 6707define void @v_shuffle_v4i64_v3i64__0_5_5_5(ptr addrspace(1) inreg %ptr) { 6708; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_5_5_5: 6709; GFX900: ; %bb.0: 6710; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6711; GFX900-NEXT: ;;#ASMSTART 6712; GFX900-NEXT: ; def v[0:5] 6713; GFX900-NEXT: ;;#ASMEND 6714; GFX900-NEXT: ;;#ASMSTART 6715; GFX900-NEXT: ; def v[2:7] 6716; GFX900-NEXT: ;;#ASMEND 6717; GFX900-NEXT: v_mov_b32_e32 v8, 0 6718; GFX900-NEXT: v_mov_b32_e32 v4, v6 6719; GFX900-NEXT: v_mov_b32_e32 v5, v7 6720; GFX900-NEXT: v_mov_b32_e32 v2, v6 6721; GFX900-NEXT: v_mov_b32_e32 v3, v7 6722; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16 6723; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 6724; GFX900-NEXT: s_waitcnt vmcnt(0) 6725; GFX900-NEXT: s_setpc_b64 s[30:31] 6726; 6727; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_5_5_5: 6728; GFX90A: ; %bb.0: 6729; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6730; GFX90A-NEXT: ;;#ASMSTART 6731; GFX90A-NEXT: ; def v[0:5] 6732; GFX90A-NEXT: ;;#ASMEND 6733; GFX90A-NEXT: ;;#ASMSTART 6734; GFX90A-NEXT: ; def v[2:7] 6735; GFX90A-NEXT: ;;#ASMEND 6736; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6737; GFX90A-NEXT: v_mov_b32_e32 v4, v6 6738; GFX90A-NEXT: v_mov_b32_e32 v5, v7 6739; GFX90A-NEXT: v_mov_b32_e32 v2, v6 6740; GFX90A-NEXT: v_mov_b32_e32 v3, v7 6741; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16 6742; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 6743; GFX90A-NEXT: s_waitcnt vmcnt(0) 6744; GFX90A-NEXT: s_setpc_b64 s[30:31] 6745; 6746; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_5_5_5: 6747; GFX940: ; %bb.0: 6748; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6749; GFX940-NEXT: ;;#ASMSTART 6750; GFX940-NEXT: ; def v[0:5] 6751; GFX940-NEXT: ;;#ASMEND 6752; GFX940-NEXT: v_mov_b32_e32 v8, 0 6753; GFX940-NEXT: ;;#ASMSTART 6754; GFX940-NEXT: ; def v[2:7] 6755; GFX940-NEXT: ;;#ASMEND 6756; GFX940-NEXT: s_nop 0 6757; GFX940-NEXT: v_mov_b32_e32 v4, v6 6758; GFX940-NEXT: v_mov_b32_e32 v5, v7 6759; GFX940-NEXT: v_mov_b32_e32 v2, v6 6760; GFX940-NEXT: v_mov_b32_e32 v3, v7 6761; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 6762; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 6763; GFX940-NEXT: s_waitcnt vmcnt(0) 6764; GFX940-NEXT: s_setpc_b64 s[30:31] 6765 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6766 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6767 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5> 6768 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6769 ret void 6770} 6771 6772define void @v_shuffle_v4i64_v3i64__1_5_5_5(ptr addrspace(1) inreg %ptr) { 6773; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_5_5_5: 6774; GFX900: ; %bb.0: 6775; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6776; GFX900-NEXT: ;;#ASMSTART 6777; GFX900-NEXT: ; def v[0:5] 6778; GFX900-NEXT: ;;#ASMEND 6779; GFX900-NEXT: ;;#ASMSTART 6780; GFX900-NEXT: ; def v[4:9] 6781; GFX900-NEXT: ;;#ASMEND 6782; GFX900-NEXT: v_mov_b32_e32 v10, 0 6783; GFX900-NEXT: v_mov_b32_e32 v6, v8 6784; GFX900-NEXT: v_mov_b32_e32 v7, v9 6785; GFX900-NEXT: v_mov_b32_e32 v4, v8 6786; GFX900-NEXT: v_mov_b32_e32 v5, v9 6787; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 6788; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 6789; GFX900-NEXT: s_waitcnt vmcnt(0) 6790; GFX900-NEXT: s_setpc_b64 s[30:31] 6791; 6792; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_5_5_5: 6793; GFX90A: ; %bb.0: 6794; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6795; GFX90A-NEXT: ;;#ASMSTART 6796; GFX90A-NEXT: ; def v[0:5] 6797; GFX90A-NEXT: ;;#ASMEND 6798; GFX90A-NEXT: ;;#ASMSTART 6799; GFX90A-NEXT: ; def v[4:9] 6800; GFX90A-NEXT: ;;#ASMEND 6801; GFX90A-NEXT: v_mov_b32_e32 v10, 0 6802; GFX90A-NEXT: v_mov_b32_e32 v6, v8 6803; GFX90A-NEXT: v_mov_b32_e32 v7, v9 6804; GFX90A-NEXT: v_mov_b32_e32 v4, v8 6805; GFX90A-NEXT: v_mov_b32_e32 v5, v9 6806; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 6807; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 6808; GFX90A-NEXT: s_waitcnt vmcnt(0) 6809; GFX90A-NEXT: s_setpc_b64 s[30:31] 6810; 6811; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_5_5_5: 6812; GFX940: ; %bb.0: 6813; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6814; GFX940-NEXT: ;;#ASMSTART 6815; GFX940-NEXT: ; def v[0:5] 6816; GFX940-NEXT: ;;#ASMEND 6817; GFX940-NEXT: v_mov_b32_e32 v10, 0 6818; GFX940-NEXT: ;;#ASMSTART 6819; GFX940-NEXT: ; def v[4:9] 6820; GFX940-NEXT: ;;#ASMEND 6821; GFX940-NEXT: s_nop 0 6822; GFX940-NEXT: v_mov_b32_e32 v6, v8 6823; GFX940-NEXT: v_mov_b32_e32 v7, v9 6824; GFX940-NEXT: v_mov_b32_e32 v4, v8 6825; GFX940-NEXT: v_mov_b32_e32 v5, v9 6826; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 6827; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 6828; GFX940-NEXT: s_waitcnt vmcnt(0) 6829; GFX940-NEXT: s_setpc_b64 s[30:31] 6830 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6831 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6832 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5> 6833 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6834 ret void 6835} 6836 6837define void @v_shuffle_v4i64_v3i64__2_5_5_5(ptr addrspace(1) inreg %ptr) { 6838; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_5_5_5: 6839; GFX900: ; %bb.0: 6840; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6841; GFX900-NEXT: ;;#ASMSTART 6842; GFX900-NEXT: ; def v[6:11] 6843; GFX900-NEXT: ;;#ASMEND 6844; GFX900-NEXT: v_mov_b32_e32 v12, 0 6845; GFX900-NEXT: v_mov_b32_e32 v8, v10 6846; GFX900-NEXT: v_mov_b32_e32 v9, v11 6847; GFX900-NEXT: ;;#ASMSTART 6848; GFX900-NEXT: ; def v[0:5] 6849; GFX900-NEXT: ;;#ASMEND 6850; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16 6851; GFX900-NEXT: s_nop 0 6852; GFX900-NEXT: v_mov_b32_e32 v8, v4 6853; GFX900-NEXT: v_mov_b32_e32 v9, v5 6854; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 6855; GFX900-NEXT: s_waitcnt vmcnt(0) 6856; GFX900-NEXT: s_setpc_b64 s[30:31] 6857; 6858; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_5_5_5: 6859; GFX90A: ; %bb.0: 6860; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6861; GFX90A-NEXT: ;;#ASMSTART 6862; GFX90A-NEXT: ; def v[6:11] 6863; GFX90A-NEXT: ;;#ASMEND 6864; GFX90A-NEXT: v_mov_b32_e32 v12, 0 6865; GFX90A-NEXT: v_mov_b32_e32 v8, v10 6866; GFX90A-NEXT: v_mov_b32_e32 v9, v11 6867; GFX90A-NEXT: ;;#ASMSTART 6868; GFX90A-NEXT: ; def v[0:5] 6869; GFX90A-NEXT: ;;#ASMEND 6870; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16 6871; GFX90A-NEXT: s_nop 0 6872; GFX90A-NEXT: v_mov_b32_e32 v8, v4 6873; GFX90A-NEXT: v_mov_b32_e32 v9, v5 6874; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 6875; GFX90A-NEXT: s_waitcnt vmcnt(0) 6876; GFX90A-NEXT: s_setpc_b64 s[30:31] 6877; 6878; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_5_5_5: 6879; GFX940: ; %bb.0: 6880; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6881; GFX940-NEXT: ;;#ASMSTART 6882; GFX940-NEXT: ; def v[6:11] 6883; GFX940-NEXT: ;;#ASMEND 6884; GFX940-NEXT: v_mov_b32_e32 v12, 0 6885; GFX940-NEXT: v_mov_b32_e32 v8, v10 6886; GFX940-NEXT: v_mov_b32_e32 v9, v11 6887; GFX940-NEXT: ;;#ASMSTART 6888; GFX940-NEXT: ; def v[0:5] 6889; GFX940-NEXT: ;;#ASMEND 6890; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 6891; GFX940-NEXT: s_nop 1 6892; GFX940-NEXT: v_mov_b32_e32 v8, v4 6893; GFX940-NEXT: v_mov_b32_e32 v9, v5 6894; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 6895; GFX940-NEXT: s_waitcnt vmcnt(0) 6896; GFX940-NEXT: s_setpc_b64 s[30:31] 6897 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6898 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6899 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5> 6900 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6901 ret void 6902} 6903 6904define void @v_shuffle_v4i64_v3i64__3_5_5_5(ptr addrspace(1) inreg %ptr) { 6905; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_5_5_5: 6906; GFX900: ; %bb.0: 6907; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6908; GFX900-NEXT: ;;#ASMSTART 6909; GFX900-NEXT: ; def v[0:5] 6910; GFX900-NEXT: ;;#ASMEND 6911; GFX900-NEXT: v_mov_b32_e32 v6, 0 6912; GFX900-NEXT: v_mov_b32_e32 v2, v4 6913; GFX900-NEXT: v_mov_b32_e32 v3, v5 6914; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 6915; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 6916; GFX900-NEXT: s_waitcnt vmcnt(0) 6917; GFX900-NEXT: s_setpc_b64 s[30:31] 6918; 6919; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_5_5_5: 6920; GFX90A: ; %bb.0: 6921; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6922; GFX90A-NEXT: ;;#ASMSTART 6923; GFX90A-NEXT: ; def v[0:5] 6924; GFX90A-NEXT: ;;#ASMEND 6925; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6926; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6927; GFX90A-NEXT: v_mov_b32_e32 v3, v5 6928; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 6929; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 6930; GFX90A-NEXT: s_waitcnt vmcnt(0) 6931; GFX90A-NEXT: s_setpc_b64 s[30:31] 6932; 6933; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_5_5_5: 6934; GFX940: ; %bb.0: 6935; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6936; GFX940-NEXT: ;;#ASMSTART 6937; GFX940-NEXT: ; def v[0:5] 6938; GFX940-NEXT: ;;#ASMEND 6939; GFX940-NEXT: v_mov_b32_e32 v6, 0 6940; GFX940-NEXT: v_mov_b32_e32 v2, v4 6941; GFX940-NEXT: v_mov_b32_e32 v3, v5 6942; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 6943; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 6944; GFX940-NEXT: s_waitcnt vmcnt(0) 6945; GFX940-NEXT: s_setpc_b64 s[30:31] 6946 %vec0 = call <3 x i64> asm "; def $0", "=v"() 6947 %vec1 = call <3 x i64> asm "; def $0", "=v"() 6948 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5> 6949 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 6950 ret void 6951} 6952 6953define void @v_shuffle_v4i64_v3i64__4_5_5_5(ptr addrspace(1) inreg %ptr) { 6954; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5: 6955; GFX900: ; %bb.0: 6956; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6957; GFX900-NEXT: v_mov_b32_e32 v10, 0 6958; GFX900-NEXT: ;;#ASMSTART 6959; GFX900-NEXT: ; def v[0:5] 6960; GFX900-NEXT: ;;#ASMEND 6961; GFX900-NEXT: v_mov_b32_e32 v6, v4 6962; GFX900-NEXT: v_mov_b32_e32 v7, v5 6963; GFX900-NEXT: v_mov_b32_e32 v8, v4 6964; GFX900-NEXT: v_mov_b32_e32 v9, v5 6965; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 6966; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 6967; GFX900-NEXT: s_waitcnt vmcnt(0) 6968; GFX900-NEXT: s_setpc_b64 s[30:31] 6969; 6970; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5: 6971; GFX90A: ; %bb.0: 6972; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6973; GFX90A-NEXT: v_mov_b32_e32 v10, 0 6974; GFX90A-NEXT: ;;#ASMSTART 6975; GFX90A-NEXT: ; def v[0:5] 6976; GFX90A-NEXT: ;;#ASMEND 6977; GFX90A-NEXT: v_mov_b32_e32 v6, v4 6978; GFX90A-NEXT: v_mov_b32_e32 v7, v5 6979; GFX90A-NEXT: v_mov_b32_e32 v8, v4 6980; GFX90A-NEXT: v_mov_b32_e32 v9, v5 6981; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 6982; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 6983; GFX90A-NEXT: s_waitcnt vmcnt(0) 6984; GFX90A-NEXT: s_setpc_b64 s[30:31] 6985; 6986; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5: 6987; GFX940: ; %bb.0: 6988; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6989; GFX940-NEXT: v_mov_b32_e32 v10, 0 6990; GFX940-NEXT: ;;#ASMSTART 6991; GFX940-NEXT: ; def v[0:5] 6992; GFX940-NEXT: ;;#ASMEND 6993; GFX940-NEXT: s_nop 0 6994; GFX940-NEXT: v_mov_b32_e32 v6, v4 6995; GFX940-NEXT: v_mov_b32_e32 v7, v5 6996; GFX940-NEXT: v_mov_b32_e32 v8, v4 6997; GFX940-NEXT: v_mov_b32_e32 v9, v5 6998; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 6999; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 7000; GFX940-NEXT: s_waitcnt vmcnt(0) 7001; GFX940-NEXT: s_setpc_b64 s[30:31] 7002 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7003 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7004 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5> 7005 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7006 ret void 7007} 7008 7009define void @v_shuffle_v4i64_v3i64__5_u_5_5(ptr addrspace(1) inreg %ptr) { 7010; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5: 7011; GFX900: ; %bb.0: 7012; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7013; GFX900-NEXT: ;;#ASMSTART 7014; GFX900-NEXT: ; def v[0:5] 7015; GFX900-NEXT: ;;#ASMEND 7016; GFX900-NEXT: v_mov_b32_e32 v6, 0 7017; GFX900-NEXT: v_mov_b32_e32 v2, v4 7018; GFX900-NEXT: v_mov_b32_e32 v3, v5 7019; GFX900-NEXT: v_mov_b32_e32 v0, v4 7020; GFX900-NEXT: v_mov_b32_e32 v1, v5 7021; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 7022; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 7023; GFX900-NEXT: s_waitcnt vmcnt(0) 7024; GFX900-NEXT: s_setpc_b64 s[30:31] 7025; 7026; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5: 7027; GFX90A: ; %bb.0: 7028; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7029; GFX90A-NEXT: ;;#ASMSTART 7030; GFX90A-NEXT: ; def v[0:5] 7031; GFX90A-NEXT: ;;#ASMEND 7032; GFX90A-NEXT: v_mov_b32_e32 v6, 0 7033; GFX90A-NEXT: v_mov_b32_e32 v2, v4 7034; GFX90A-NEXT: v_mov_b32_e32 v3, v5 7035; GFX90A-NEXT: v_mov_b32_e32 v0, v4 7036; GFX90A-NEXT: v_mov_b32_e32 v1, v5 7037; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 7038; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 7039; GFX90A-NEXT: s_waitcnt vmcnt(0) 7040; GFX90A-NEXT: s_setpc_b64 s[30:31] 7041; 7042; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5: 7043; GFX940: ; %bb.0: 7044; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7045; GFX940-NEXT: ;;#ASMSTART 7046; GFX940-NEXT: ; def v[0:5] 7047; GFX940-NEXT: ;;#ASMEND 7048; GFX940-NEXT: v_mov_b32_e32 v6, 0 7049; GFX940-NEXT: v_mov_b32_e32 v2, v4 7050; GFX940-NEXT: v_mov_b32_e32 v3, v5 7051; GFX940-NEXT: v_mov_b32_e32 v0, v4 7052; GFX940-NEXT: v_mov_b32_e32 v1, v5 7053; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 7054; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 7055; GFX940-NEXT: s_waitcnt vmcnt(0) 7056; GFX940-NEXT: s_setpc_b64 s[30:31] 7057 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7058 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7059 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5> 7060 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7061 ret void 7062} 7063 7064define void @v_shuffle_v4i64_v3i64__5_0_5_5(ptr addrspace(1) inreg %ptr) { 7065; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5: 7066; GFX900: ; %bb.0: 7067; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7068; GFX900-NEXT: ;;#ASMSTART 7069; GFX900-NEXT: ; def v[0:5] 7070; GFX900-NEXT: ;;#ASMEND 7071; GFX900-NEXT: ;;#ASMSTART 7072; GFX900-NEXT: ; def v[2:7] 7073; GFX900-NEXT: ;;#ASMEND 7074; GFX900-NEXT: v_mov_b32_e32 v8, 0 7075; GFX900-NEXT: v_mov_b32_e32 v4, v6 7076; GFX900-NEXT: v_mov_b32_e32 v5, v7 7077; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16 7078; GFX900-NEXT: v_mov_b32_e32 v2, v6 7079; GFX900-NEXT: v_mov_b32_e32 v3, v7 7080; GFX900-NEXT: v_mov_b32_e32 v4, v0 7081; GFX900-NEXT: v_mov_b32_e32 v5, v1 7082; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 7083; GFX900-NEXT: s_waitcnt vmcnt(0) 7084; GFX900-NEXT: s_setpc_b64 s[30:31] 7085; 7086; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5: 7087; GFX90A: ; %bb.0: 7088; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7089; GFX90A-NEXT: ;;#ASMSTART 7090; GFX90A-NEXT: ; def v[0:5] 7091; GFX90A-NEXT: ;;#ASMEND 7092; GFX90A-NEXT: ;;#ASMSTART 7093; GFX90A-NEXT: ; def v[2:7] 7094; GFX90A-NEXT: ;;#ASMEND 7095; GFX90A-NEXT: v_mov_b32_e32 v8, 0 7096; GFX90A-NEXT: v_mov_b32_e32 v4, v6 7097; GFX90A-NEXT: v_mov_b32_e32 v5, v7 7098; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16 7099; GFX90A-NEXT: v_mov_b32_e32 v2, v6 7100; GFX90A-NEXT: v_mov_b32_e32 v3, v7 7101; GFX90A-NEXT: v_mov_b32_e32 v4, v0 7102; GFX90A-NEXT: v_mov_b32_e32 v5, v1 7103; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 7104; GFX90A-NEXT: s_waitcnt vmcnt(0) 7105; GFX90A-NEXT: s_setpc_b64 s[30:31] 7106; 7107; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5: 7108; GFX940: ; %bb.0: 7109; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7110; GFX940-NEXT: ;;#ASMSTART 7111; GFX940-NEXT: ; def v[0:5] 7112; GFX940-NEXT: ;;#ASMEND 7113; GFX940-NEXT: v_mov_b32_e32 v8, 0 7114; GFX940-NEXT: ;;#ASMSTART 7115; GFX940-NEXT: ; def v[2:7] 7116; GFX940-NEXT: ;;#ASMEND 7117; GFX940-NEXT: s_nop 0 7118; GFX940-NEXT: v_mov_b32_e32 v4, v6 7119; GFX940-NEXT: v_mov_b32_e32 v5, v7 7120; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1 7121; GFX940-NEXT: v_mov_b32_e32 v2, v6 7122; GFX940-NEXT: v_mov_b32_e32 v3, v7 7123; GFX940-NEXT: v_mov_b32_e32 v4, v0 7124; GFX940-NEXT: v_mov_b32_e32 v5, v1 7125; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 7126; GFX940-NEXT: s_waitcnt vmcnt(0) 7127; GFX940-NEXT: s_setpc_b64 s[30:31] 7128 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7129 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7130 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5> 7131 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7132 ret void 7133} 7134 7135define void @v_shuffle_v4i64_v3i64__5_1_5_5(ptr addrspace(1) inreg %ptr) { 7136; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_5_5: 7137; GFX900: ; %bb.0: 7138; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7139; GFX900-NEXT: ;;#ASMSTART 7140; GFX900-NEXT: ; def v[0:5] 7141; GFX900-NEXT: ;;#ASMEND 7142; GFX900-NEXT: ;;#ASMSTART 7143; GFX900-NEXT: ; def v[4:9] 7144; GFX900-NEXT: ;;#ASMEND 7145; GFX900-NEXT: v_mov_b32_e32 v10, 0 7146; GFX900-NEXT: v_mov_b32_e32 v6, v8 7147; GFX900-NEXT: v_mov_b32_e32 v7, v9 7148; GFX900-NEXT: v_mov_b32_e32 v0, v8 7149; GFX900-NEXT: v_mov_b32_e32 v1, v9 7150; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 7151; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 7152; GFX900-NEXT: s_waitcnt vmcnt(0) 7153; GFX900-NEXT: s_setpc_b64 s[30:31] 7154; 7155; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_5_5: 7156; GFX90A: ; %bb.0: 7157; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7158; GFX90A-NEXT: ;;#ASMSTART 7159; GFX90A-NEXT: ; def v[0:5] 7160; GFX90A-NEXT: ;;#ASMEND 7161; GFX90A-NEXT: ;;#ASMSTART 7162; GFX90A-NEXT: ; def v[4:9] 7163; GFX90A-NEXT: ;;#ASMEND 7164; GFX90A-NEXT: v_mov_b32_e32 v10, 0 7165; GFX90A-NEXT: v_mov_b32_e32 v6, v8 7166; GFX90A-NEXT: v_mov_b32_e32 v7, v9 7167; GFX90A-NEXT: v_mov_b32_e32 v0, v8 7168; GFX90A-NEXT: v_mov_b32_e32 v1, v9 7169; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 7170; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 7171; GFX90A-NEXT: s_waitcnt vmcnt(0) 7172; GFX90A-NEXT: s_setpc_b64 s[30:31] 7173; 7174; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_5_5: 7175; GFX940: ; %bb.0: 7176; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7177; GFX940-NEXT: ;;#ASMSTART 7178; GFX940-NEXT: ; def v[0:5] 7179; GFX940-NEXT: ;;#ASMEND 7180; GFX940-NEXT: v_mov_b32_e32 v10, 0 7181; GFX940-NEXT: ;;#ASMSTART 7182; GFX940-NEXT: ; def v[4:9] 7183; GFX940-NEXT: ;;#ASMEND 7184; GFX940-NEXT: s_nop 0 7185; GFX940-NEXT: v_mov_b32_e32 v6, v8 7186; GFX940-NEXT: v_mov_b32_e32 v7, v9 7187; GFX940-NEXT: v_mov_b32_e32 v0, v8 7188; GFX940-NEXT: v_mov_b32_e32 v1, v9 7189; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 7190; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 7191; GFX940-NEXT: s_waitcnt vmcnt(0) 7192; GFX940-NEXT: s_setpc_b64 s[30:31] 7193 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7194 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7195 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5> 7196 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7197 ret void 7198} 7199 7200define void @v_shuffle_v4i64_v3i64__5_2_5_5(ptr addrspace(1) inreg %ptr) { 7201; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_5_5: 7202; GFX900: ; %bb.0: 7203; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7204; GFX900-NEXT: ;;#ASMSTART 7205; GFX900-NEXT: ; def v[0:5] 7206; GFX900-NEXT: ;;#ASMEND 7207; GFX900-NEXT: ;;#ASMSTART 7208; GFX900-NEXT: ; def v[6:11] 7209; GFX900-NEXT: ;;#ASMEND 7210; GFX900-NEXT: v_mov_b32_e32 v12, 0 7211; GFX900-NEXT: v_mov_b32_e32 v8, v10 7212; GFX900-NEXT: v_mov_b32_e32 v9, v11 7213; GFX900-NEXT: v_mov_b32_e32 v2, v10 7214; GFX900-NEXT: v_mov_b32_e32 v3, v11 7215; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16 7216; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 7217; GFX900-NEXT: s_waitcnt vmcnt(0) 7218; GFX900-NEXT: s_setpc_b64 s[30:31] 7219; 7220; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_5_5: 7221; GFX90A: ; %bb.0: 7222; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7223; GFX90A-NEXT: ;;#ASMSTART 7224; GFX90A-NEXT: ; def v[0:5] 7225; GFX90A-NEXT: ;;#ASMEND 7226; GFX90A-NEXT: ;;#ASMSTART 7227; GFX90A-NEXT: ; def v[6:11] 7228; GFX90A-NEXT: ;;#ASMEND 7229; GFX90A-NEXT: v_mov_b32_e32 v12, 0 7230; GFX90A-NEXT: v_mov_b32_e32 v8, v10 7231; GFX90A-NEXT: v_mov_b32_e32 v9, v11 7232; GFX90A-NEXT: v_mov_b32_e32 v2, v10 7233; GFX90A-NEXT: v_mov_b32_e32 v3, v11 7234; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16 7235; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 7236; GFX90A-NEXT: s_waitcnt vmcnt(0) 7237; GFX90A-NEXT: s_setpc_b64 s[30:31] 7238; 7239; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_5_5: 7240; GFX940: ; %bb.0: 7241; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7242; GFX940-NEXT: ;;#ASMSTART 7243; GFX940-NEXT: ; def v[0:5] 7244; GFX940-NEXT: ;;#ASMEND 7245; GFX940-NEXT: ;;#ASMSTART 7246; GFX940-NEXT: ; def v[6:11] 7247; GFX940-NEXT: ;;#ASMEND 7248; GFX940-NEXT: v_mov_b32_e32 v12, 0 7249; GFX940-NEXT: v_mov_b32_e32 v8, v10 7250; GFX940-NEXT: v_mov_b32_e32 v9, v11 7251; GFX940-NEXT: v_mov_b32_e32 v2, v10 7252; GFX940-NEXT: v_mov_b32_e32 v3, v11 7253; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 7254; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 7255; GFX940-NEXT: s_waitcnt vmcnt(0) 7256; GFX940-NEXT: s_setpc_b64 s[30:31] 7257 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7258 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7259 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5> 7260 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7261 ret void 7262} 7263 7264define void @v_shuffle_v4i64_v3i64__5_3_5_5(ptr addrspace(1) inreg %ptr) { 7265; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5: 7266; GFX900: ; %bb.0: 7267; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7268; GFX900-NEXT: ;;#ASMSTART 7269; GFX900-NEXT: ; def v[0:5] 7270; GFX900-NEXT: ;;#ASMEND 7271; GFX900-NEXT: v_mov_b32_e32 v6, 0 7272; GFX900-NEXT: v_mov_b32_e32 v2, v4 7273; GFX900-NEXT: v_mov_b32_e32 v3, v5 7274; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 7275; GFX900-NEXT: s_nop 0 7276; GFX900-NEXT: v_mov_b32_e32 v4, v0 7277; GFX900-NEXT: v_mov_b32_e32 v5, v1 7278; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 7279; GFX900-NEXT: s_waitcnt vmcnt(0) 7280; GFX900-NEXT: s_setpc_b64 s[30:31] 7281; 7282; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5: 7283; GFX90A: ; %bb.0: 7284; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7285; GFX90A-NEXT: ;;#ASMSTART 7286; GFX90A-NEXT: ; def v[0:5] 7287; GFX90A-NEXT: ;;#ASMEND 7288; GFX90A-NEXT: v_mov_b32_e32 v6, 0 7289; GFX90A-NEXT: v_mov_b32_e32 v2, v4 7290; GFX90A-NEXT: v_mov_b32_e32 v3, v5 7291; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 7292; GFX90A-NEXT: s_nop 0 7293; GFX90A-NEXT: v_mov_b32_e32 v4, v0 7294; GFX90A-NEXT: v_mov_b32_e32 v5, v1 7295; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 7296; GFX90A-NEXT: s_waitcnt vmcnt(0) 7297; GFX90A-NEXT: s_setpc_b64 s[30:31] 7298; 7299; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5: 7300; GFX940: ; %bb.0: 7301; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7302; GFX940-NEXT: ;;#ASMSTART 7303; GFX940-NEXT: ; def v[0:5] 7304; GFX940-NEXT: ;;#ASMEND 7305; GFX940-NEXT: v_mov_b32_e32 v6, 0 7306; GFX940-NEXT: v_mov_b32_e32 v2, v4 7307; GFX940-NEXT: v_mov_b32_e32 v3, v5 7308; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 7309; GFX940-NEXT: s_nop 1 7310; GFX940-NEXT: v_mov_b32_e32 v4, v0 7311; GFX940-NEXT: v_mov_b32_e32 v5, v1 7312; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 7313; GFX940-NEXT: s_waitcnt vmcnt(0) 7314; GFX940-NEXT: s_setpc_b64 s[30:31] 7315 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7316 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7317 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5> 7318 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7319 ret void 7320} 7321 7322define void @v_shuffle_v4i64_v3i64__5_4_5_5(ptr addrspace(1) inreg %ptr) { 7323; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5: 7324; GFX900: ; %bb.0: 7325; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7326; GFX900-NEXT: ;;#ASMSTART 7327; GFX900-NEXT: ; def v[0:5] 7328; GFX900-NEXT: ;;#ASMEND 7329; GFX900-NEXT: v_mov_b32_e32 v10, 0 7330; GFX900-NEXT: v_mov_b32_e32 v6, v4 7331; GFX900-NEXT: v_mov_b32_e32 v7, v5 7332; GFX900-NEXT: v_mov_b32_e32 v8, v4 7333; GFX900-NEXT: v_mov_b32_e32 v9, v5 7334; GFX900-NEXT: v_mov_b32_e32 v0, v4 7335; GFX900-NEXT: v_mov_b32_e32 v1, v5 7336; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 7337; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 7338; GFX900-NEXT: s_waitcnt vmcnt(0) 7339; GFX900-NEXT: s_setpc_b64 s[30:31] 7340; 7341; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5: 7342; GFX90A: ; %bb.0: 7343; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7344; GFX90A-NEXT: ;;#ASMSTART 7345; GFX90A-NEXT: ; def v[0:5] 7346; GFX90A-NEXT: ;;#ASMEND 7347; GFX90A-NEXT: v_mov_b32_e32 v10, 0 7348; GFX90A-NEXT: v_mov_b32_e32 v6, v4 7349; GFX90A-NEXT: v_mov_b32_e32 v7, v5 7350; GFX90A-NEXT: v_mov_b32_e32 v8, v4 7351; GFX90A-NEXT: v_mov_b32_e32 v9, v5 7352; GFX90A-NEXT: v_mov_b32_e32 v0, v4 7353; GFX90A-NEXT: v_mov_b32_e32 v1, v5 7354; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16 7355; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 7356; GFX90A-NEXT: s_waitcnt vmcnt(0) 7357; GFX90A-NEXT: s_setpc_b64 s[30:31] 7358; 7359; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5: 7360; GFX940: ; %bb.0: 7361; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7362; GFX940-NEXT: ;;#ASMSTART 7363; GFX940-NEXT: ; def v[0:5] 7364; GFX940-NEXT: ;;#ASMEND 7365; GFX940-NEXT: v_mov_b32_e32 v10, 0 7366; GFX940-NEXT: v_mov_b32_e32 v6, v4 7367; GFX940-NEXT: v_mov_b32_e32 v7, v5 7368; GFX940-NEXT: v_mov_b32_e32 v8, v4 7369; GFX940-NEXT: v_mov_b32_e32 v9, v5 7370; GFX940-NEXT: v_mov_b32_e32 v0, v4 7371; GFX940-NEXT: v_mov_b32_e32 v1, v5 7372; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1 7373; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 7374; GFX940-NEXT: s_waitcnt vmcnt(0) 7375; GFX940-NEXT: s_setpc_b64 s[30:31] 7376 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7377 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7378 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5> 7379 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7380 ret void 7381} 7382 7383define void @v_shuffle_v4i64_v3i64__5_5_u_5(ptr addrspace(1) inreg %ptr) { 7384; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5: 7385; GFX900: ; %bb.0: 7386; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7387; GFX900-NEXT: v_mov_b32_e32 v6, 0 7388; GFX900-NEXT: ;;#ASMSTART 7389; GFX900-NEXT: ; def v[0:5] 7390; GFX900-NEXT: ;;#ASMEND 7391; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 7392; GFX900-NEXT: s_nop 0 7393; GFX900-NEXT: v_mov_b32_e32 v2, v4 7394; GFX900-NEXT: v_mov_b32_e32 v3, v5 7395; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 7396; GFX900-NEXT: s_waitcnt vmcnt(0) 7397; GFX900-NEXT: s_setpc_b64 s[30:31] 7398; 7399; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5: 7400; GFX90A: ; %bb.0: 7401; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7402; GFX90A-NEXT: v_mov_b32_e32 v6, 0 7403; GFX90A-NEXT: ;;#ASMSTART 7404; GFX90A-NEXT: ; def v[0:5] 7405; GFX90A-NEXT: ;;#ASMEND 7406; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 7407; GFX90A-NEXT: s_nop 0 7408; GFX90A-NEXT: v_mov_b32_e32 v2, v4 7409; GFX90A-NEXT: v_mov_b32_e32 v3, v5 7410; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 7411; GFX90A-NEXT: s_waitcnt vmcnt(0) 7412; GFX90A-NEXT: s_setpc_b64 s[30:31] 7413; 7414; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5: 7415; GFX940: ; %bb.0: 7416; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7417; GFX940-NEXT: v_mov_b32_e32 v6, 0 7418; GFX940-NEXT: ;;#ASMSTART 7419; GFX940-NEXT: ; def v[0:5] 7420; GFX940-NEXT: ;;#ASMEND 7421; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 7422; GFX940-NEXT: s_nop 1 7423; GFX940-NEXT: v_mov_b32_e32 v2, v4 7424; GFX940-NEXT: v_mov_b32_e32 v3, v5 7425; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 7426; GFX940-NEXT: s_waitcnt vmcnt(0) 7427; GFX940-NEXT: s_setpc_b64 s[30:31] 7428 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7429 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7430 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5> 7431 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7432 ret void 7433} 7434 7435define void @v_shuffle_v4i64_v3i64__5_5_0_5(ptr addrspace(1) inreg %ptr) { 7436; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_5: 7437; GFX900: ; %bb.0: 7438; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7439; GFX900-NEXT: ;;#ASMSTART 7440; GFX900-NEXT: ; def v[0:5] 7441; GFX900-NEXT: ;;#ASMEND 7442; GFX900-NEXT: ;;#ASMSTART 7443; GFX900-NEXT: ; def v[2:7] 7444; GFX900-NEXT: ;;#ASMEND 7445; GFX900-NEXT: v_mov_b32_e32 v8, 0 7446; GFX900-NEXT: v_mov_b32_e32 v2, v6 7447; GFX900-NEXT: v_mov_b32_e32 v3, v7 7448; GFX900-NEXT: v_mov_b32_e32 v4, v6 7449; GFX900-NEXT: v_mov_b32_e32 v5, v7 7450; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 7451; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 7452; GFX900-NEXT: s_waitcnt vmcnt(0) 7453; GFX900-NEXT: s_setpc_b64 s[30:31] 7454; 7455; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_5: 7456; GFX90A: ; %bb.0: 7457; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7458; GFX90A-NEXT: ;;#ASMSTART 7459; GFX90A-NEXT: ; def v[0:5] 7460; GFX90A-NEXT: ;;#ASMEND 7461; GFX90A-NEXT: ;;#ASMSTART 7462; GFX90A-NEXT: ; def v[2:7] 7463; GFX90A-NEXT: ;;#ASMEND 7464; GFX90A-NEXT: v_mov_b32_e32 v8, 0 7465; GFX90A-NEXT: v_mov_b32_e32 v2, v6 7466; GFX90A-NEXT: v_mov_b32_e32 v3, v7 7467; GFX90A-NEXT: v_mov_b32_e32 v4, v6 7468; GFX90A-NEXT: v_mov_b32_e32 v5, v7 7469; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16 7470; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 7471; GFX90A-NEXT: s_waitcnt vmcnt(0) 7472; GFX90A-NEXT: s_setpc_b64 s[30:31] 7473; 7474; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_5: 7475; GFX940: ; %bb.0: 7476; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7477; GFX940-NEXT: ;;#ASMSTART 7478; GFX940-NEXT: ; def v[0:5] 7479; GFX940-NEXT: ;;#ASMEND 7480; GFX940-NEXT: v_mov_b32_e32 v8, 0 7481; GFX940-NEXT: ;;#ASMSTART 7482; GFX940-NEXT: ; def v[2:7] 7483; GFX940-NEXT: ;;#ASMEND 7484; GFX940-NEXT: s_nop 0 7485; GFX940-NEXT: v_mov_b32_e32 v2, v6 7486; GFX940-NEXT: v_mov_b32_e32 v3, v7 7487; GFX940-NEXT: v_mov_b32_e32 v4, v6 7488; GFX940-NEXT: v_mov_b32_e32 v5, v7 7489; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1 7490; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 7491; GFX940-NEXT: s_waitcnt vmcnt(0) 7492; GFX940-NEXT: s_setpc_b64 s[30:31] 7493 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7494 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7495 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5> 7496 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7497 ret void 7498} 7499 7500define void @v_shuffle_v4i64_v3i64__5_5_1_5(ptr addrspace(1) inreg %ptr) { 7501; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_5: 7502; GFX900: ; %bb.0: 7503; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7504; GFX900-NEXT: ;;#ASMSTART 7505; GFX900-NEXT: ; def v[0:5] 7506; GFX900-NEXT: ;;#ASMEND 7507; GFX900-NEXT: ;;#ASMSTART 7508; GFX900-NEXT: ; def v[4:9] 7509; GFX900-NEXT: ;;#ASMEND 7510; GFX900-NEXT: v_mov_b32_e32 v10, 0 7511; GFX900-NEXT: v_mov_b32_e32 v4, v8 7512; GFX900-NEXT: v_mov_b32_e32 v5, v9 7513; GFX900-NEXT: v_mov_b32_e32 v6, v8 7514; GFX900-NEXT: v_mov_b32_e32 v7, v9 7515; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 7516; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 7517; GFX900-NEXT: s_waitcnt vmcnt(0) 7518; GFX900-NEXT: s_setpc_b64 s[30:31] 7519; 7520; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_5: 7521; GFX90A: ; %bb.0: 7522; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7523; GFX90A-NEXT: ;;#ASMSTART 7524; GFX90A-NEXT: ; def v[0:5] 7525; GFX90A-NEXT: ;;#ASMEND 7526; GFX90A-NEXT: ;;#ASMSTART 7527; GFX90A-NEXT: ; def v[4:9] 7528; GFX90A-NEXT: ;;#ASMEND 7529; GFX90A-NEXT: v_mov_b32_e32 v10, 0 7530; GFX90A-NEXT: v_mov_b32_e32 v4, v8 7531; GFX90A-NEXT: v_mov_b32_e32 v5, v9 7532; GFX90A-NEXT: v_mov_b32_e32 v6, v8 7533; GFX90A-NEXT: v_mov_b32_e32 v7, v9 7534; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16 7535; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 7536; GFX90A-NEXT: s_waitcnt vmcnt(0) 7537; GFX90A-NEXT: s_setpc_b64 s[30:31] 7538; 7539; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_5: 7540; GFX940: ; %bb.0: 7541; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7542; GFX940-NEXT: ;;#ASMSTART 7543; GFX940-NEXT: ; def v[0:5] 7544; GFX940-NEXT: ;;#ASMEND 7545; GFX940-NEXT: v_mov_b32_e32 v10, 0 7546; GFX940-NEXT: ;;#ASMSTART 7547; GFX940-NEXT: ; def v[4:9] 7548; GFX940-NEXT: ;;#ASMEND 7549; GFX940-NEXT: s_nop 0 7550; GFX940-NEXT: v_mov_b32_e32 v4, v8 7551; GFX940-NEXT: v_mov_b32_e32 v5, v9 7552; GFX940-NEXT: v_mov_b32_e32 v6, v8 7553; GFX940-NEXT: v_mov_b32_e32 v7, v9 7554; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1 7555; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 7556; GFX940-NEXT: s_waitcnt vmcnt(0) 7557; GFX940-NEXT: s_setpc_b64 s[30:31] 7558 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7559 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7560 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5> 7561 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7562 ret void 7563} 7564 7565define void @v_shuffle_v4i64_v3i64__5_5_2_5(ptr addrspace(1) inreg %ptr) { 7566; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_5: 7567; GFX900: ; %bb.0: 7568; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7569; GFX900-NEXT: ;;#ASMSTART 7570; GFX900-NEXT: ; def v[6:11] 7571; GFX900-NEXT: ;;#ASMEND 7572; GFX900-NEXT: v_mov_b32_e32 v12, 0 7573; GFX900-NEXT: ;;#ASMSTART 7574; GFX900-NEXT: ; def v[0:5] 7575; GFX900-NEXT: ;;#ASMEND 7576; GFX900-NEXT: v_mov_b32_e32 v8, v4 7577; GFX900-NEXT: v_mov_b32_e32 v9, v5 7578; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16 7579; GFX900-NEXT: s_nop 0 7580; GFX900-NEXT: v_mov_b32_e32 v8, v10 7581; GFX900-NEXT: v_mov_b32_e32 v9, v11 7582; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 7583; GFX900-NEXT: s_waitcnt vmcnt(0) 7584; GFX900-NEXT: s_setpc_b64 s[30:31] 7585; 7586; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_5: 7587; GFX90A: ; %bb.0: 7588; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7589; GFX90A-NEXT: ;;#ASMSTART 7590; GFX90A-NEXT: ; def v[6:11] 7591; GFX90A-NEXT: ;;#ASMEND 7592; GFX90A-NEXT: v_mov_b32_e32 v12, 0 7593; GFX90A-NEXT: ;;#ASMSTART 7594; GFX90A-NEXT: ; def v[0:5] 7595; GFX90A-NEXT: ;;#ASMEND 7596; GFX90A-NEXT: v_mov_b32_e32 v8, v4 7597; GFX90A-NEXT: v_mov_b32_e32 v9, v5 7598; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16 7599; GFX90A-NEXT: s_nop 0 7600; GFX90A-NEXT: v_mov_b32_e32 v8, v10 7601; GFX90A-NEXT: v_mov_b32_e32 v9, v11 7602; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 7603; GFX90A-NEXT: s_waitcnt vmcnt(0) 7604; GFX90A-NEXT: s_setpc_b64 s[30:31] 7605; 7606; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_5: 7607; GFX940: ; %bb.0: 7608; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7609; GFX940-NEXT: ;;#ASMSTART 7610; GFX940-NEXT: ; def v[6:11] 7611; GFX940-NEXT: ;;#ASMEND 7612; GFX940-NEXT: v_mov_b32_e32 v12, 0 7613; GFX940-NEXT: ;;#ASMSTART 7614; GFX940-NEXT: ; def v[0:5] 7615; GFX940-NEXT: ;;#ASMEND 7616; GFX940-NEXT: s_nop 0 7617; GFX940-NEXT: v_mov_b32_e32 v8, v4 7618; GFX940-NEXT: v_mov_b32_e32 v9, v5 7619; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1 7620; GFX940-NEXT: s_nop 1 7621; GFX940-NEXT: v_mov_b32_e32 v8, v10 7622; GFX940-NEXT: v_mov_b32_e32 v9, v11 7623; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 7624; GFX940-NEXT: s_waitcnt vmcnt(0) 7625; GFX940-NEXT: s_setpc_b64 s[30:31] 7626 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7627 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7628 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5> 7629 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7630 ret void 7631} 7632 7633define void @v_shuffle_v4i64_v3i64__5_5_3_5(ptr addrspace(1) inreg %ptr) { 7634; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_5: 7635; GFX900: ; %bb.0: 7636; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7637; GFX900-NEXT: ;;#ASMSTART 7638; GFX900-NEXT: ; def v[0:5] 7639; GFX900-NEXT: ;;#ASMEND 7640; GFX900-NEXT: v_mov_b32_e32 v6, 0 7641; GFX900-NEXT: v_mov_b32_e32 v2, v4 7642; GFX900-NEXT: v_mov_b32_e32 v3, v5 7643; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 7644; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 7645; GFX900-NEXT: s_waitcnt vmcnt(0) 7646; GFX900-NEXT: s_setpc_b64 s[30:31] 7647; 7648; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_5: 7649; GFX90A: ; %bb.0: 7650; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7651; GFX90A-NEXT: ;;#ASMSTART 7652; GFX90A-NEXT: ; def v[0:5] 7653; GFX90A-NEXT: ;;#ASMEND 7654; GFX90A-NEXT: v_mov_b32_e32 v6, 0 7655; GFX90A-NEXT: v_mov_b32_e32 v2, v4 7656; GFX90A-NEXT: v_mov_b32_e32 v3, v5 7657; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16 7658; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 7659; GFX90A-NEXT: s_waitcnt vmcnt(0) 7660; GFX90A-NEXT: s_setpc_b64 s[30:31] 7661; 7662; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_5: 7663; GFX940: ; %bb.0: 7664; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7665; GFX940-NEXT: ;;#ASMSTART 7666; GFX940-NEXT: ; def v[0:5] 7667; GFX940-NEXT: ;;#ASMEND 7668; GFX940-NEXT: v_mov_b32_e32 v6, 0 7669; GFX940-NEXT: v_mov_b32_e32 v2, v4 7670; GFX940-NEXT: v_mov_b32_e32 v3, v5 7671; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1 7672; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 7673; GFX940-NEXT: s_waitcnt vmcnt(0) 7674; GFX940-NEXT: s_setpc_b64 s[30:31] 7675 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7676 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7677 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5> 7678 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7679 ret void 7680} 7681 7682define void @v_shuffle_v4i64_v3i64__5_5_4_5(ptr addrspace(1) inreg %ptr) { 7683; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5: 7684; GFX900: ; %bb.0: 7685; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7686; GFX900-NEXT: v_mov_b32_e32 v6, 0 7687; GFX900-NEXT: ;;#ASMSTART 7688; GFX900-NEXT: ; def v[0:5] 7689; GFX900-NEXT: ;;#ASMEND 7690; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 7691; GFX900-NEXT: s_nop 0 7692; GFX900-NEXT: v_mov_b32_e32 v2, v4 7693; GFX900-NEXT: v_mov_b32_e32 v3, v5 7694; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 7695; GFX900-NEXT: s_waitcnt vmcnt(0) 7696; GFX900-NEXT: s_setpc_b64 s[30:31] 7697; 7698; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5: 7699; GFX90A: ; %bb.0: 7700; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7701; GFX90A-NEXT: v_mov_b32_e32 v6, 0 7702; GFX90A-NEXT: ;;#ASMSTART 7703; GFX90A-NEXT: ; def v[0:5] 7704; GFX90A-NEXT: ;;#ASMEND 7705; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16 7706; GFX90A-NEXT: s_nop 0 7707; GFX90A-NEXT: v_mov_b32_e32 v2, v4 7708; GFX90A-NEXT: v_mov_b32_e32 v3, v5 7709; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 7710; GFX90A-NEXT: s_waitcnt vmcnt(0) 7711; GFX90A-NEXT: s_setpc_b64 s[30:31] 7712; 7713; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5: 7714; GFX940: ; %bb.0: 7715; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7716; GFX940-NEXT: v_mov_b32_e32 v6, 0 7717; GFX940-NEXT: ;;#ASMSTART 7718; GFX940-NEXT: ; def v[0:5] 7719; GFX940-NEXT: ;;#ASMEND 7720; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1 7721; GFX940-NEXT: s_nop 1 7722; GFX940-NEXT: v_mov_b32_e32 v2, v4 7723; GFX940-NEXT: v_mov_b32_e32 v3, v5 7724; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 7725; GFX940-NEXT: s_waitcnt vmcnt(0) 7726; GFX940-NEXT: s_setpc_b64 s[30:31] 7727 %vec0 = call <3 x i64> asm "; def $0", "=v"() 7728 %vec1 = call <3 x i64> asm "; def $0", "=v"() 7729 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5> 7730 store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32 7731 ret void 7732} 7733 7734define void @s_shuffle_v4i64_v3i64__u_u_u_u() { 7735; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_u_u_u: 7736; GFX9: ; %bb.0: 7737; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7738; GFX9-NEXT: ;;#ASMSTART 7739; GFX9-NEXT: ; use s[8:15] 7740; GFX9-NEXT: ;;#ASMEND 7741; GFX9-NEXT: s_setpc_b64 s[30:31] 7742 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7743 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> poison 7744 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 7745 ret void 7746} 7747 7748define void @s_shuffle_v4i64_v3i64__0_u_u_u() { 7749; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_u_u_u: 7750; GFX900: ; %bb.0: 7751; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7752; GFX900-NEXT: ;;#ASMSTART 7753; GFX900-NEXT: ; def s[8:13] 7754; GFX900-NEXT: ;;#ASMEND 7755; GFX900-NEXT: ;;#ASMSTART 7756; GFX900-NEXT: ; use s[8:15] 7757; GFX900-NEXT: ;;#ASMEND 7758; GFX900-NEXT: s_setpc_b64 s[30:31] 7759; 7760; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_u_u_u: 7761; GFX90A: ; %bb.0: 7762; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7763; GFX90A-NEXT: ;;#ASMSTART 7764; GFX90A-NEXT: ; def s[8:13] 7765; GFX90A-NEXT: ;;#ASMEND 7766; GFX90A-NEXT: ;;#ASMSTART 7767; GFX90A-NEXT: ; use s[8:15] 7768; GFX90A-NEXT: ;;#ASMEND 7769; GFX90A-NEXT: s_setpc_b64 s[30:31] 7770; 7771; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_u_u_u: 7772; GFX940: ; %bb.0: 7773; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7774; GFX940-NEXT: ;;#ASMSTART 7775; GFX940-NEXT: ; def s[8:13] 7776; GFX940-NEXT: ;;#ASMEND 7777; GFX940-NEXT: s_nop 0 7778; GFX940-NEXT: ;;#ASMSTART 7779; GFX940-NEXT: ; use s[8:15] 7780; GFX940-NEXT: ;;#ASMEND 7781; GFX940-NEXT: s_setpc_b64 s[30:31] 7782 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7783 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 7784 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 7785 ret void 7786} 7787 7788define void @s_shuffle_v4i64_v3i64__1_u_u_u() { 7789; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u: 7790; GFX900: ; %bb.0: 7791; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7792; GFX900-NEXT: ;;#ASMSTART 7793; GFX900-NEXT: ; def s[4:9] 7794; GFX900-NEXT: ;;#ASMEND 7795; GFX900-NEXT: s_mov_b32 s8, s6 7796; GFX900-NEXT: s_mov_b32 s9, s7 7797; GFX900-NEXT: ;;#ASMSTART 7798; GFX900-NEXT: ; use s[8:15] 7799; GFX900-NEXT: ;;#ASMEND 7800; GFX900-NEXT: s_setpc_b64 s[30:31] 7801; 7802; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u: 7803; GFX90A: ; %bb.0: 7804; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7805; GFX90A-NEXT: ;;#ASMSTART 7806; GFX90A-NEXT: ; def s[4:9] 7807; GFX90A-NEXT: ;;#ASMEND 7808; GFX90A-NEXT: s_mov_b32 s8, s6 7809; GFX90A-NEXT: s_mov_b32 s9, s7 7810; GFX90A-NEXT: ;;#ASMSTART 7811; GFX90A-NEXT: ; use s[8:15] 7812; GFX90A-NEXT: ;;#ASMEND 7813; GFX90A-NEXT: s_setpc_b64 s[30:31] 7814; 7815; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u: 7816; GFX940: ; %bb.0: 7817; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7818; GFX940-NEXT: ;;#ASMSTART 7819; GFX940-NEXT: ; def s[0:5] 7820; GFX940-NEXT: ;;#ASMEND 7821; GFX940-NEXT: s_mov_b32 s8, s2 7822; GFX940-NEXT: s_mov_b32 s9, s3 7823; GFX940-NEXT: ;;#ASMSTART 7824; GFX940-NEXT: ; use s[8:15] 7825; GFX940-NEXT: ;;#ASMEND 7826; GFX940-NEXT: s_setpc_b64 s[30:31] 7827 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7828 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 7829 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 7830 ret void 7831} 7832 7833define void @s_shuffle_v4i64_v3i64__2_u_u_u() { 7834; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u: 7835; GFX900: ; %bb.0: 7836; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7837; GFX900-NEXT: ;;#ASMSTART 7838; GFX900-NEXT: ; def s[4:9] 7839; GFX900-NEXT: ;;#ASMEND 7840; GFX900-NEXT: ;;#ASMSTART 7841; GFX900-NEXT: ; use s[8:15] 7842; GFX900-NEXT: ;;#ASMEND 7843; GFX900-NEXT: s_setpc_b64 s[30:31] 7844; 7845; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u: 7846; GFX90A: ; %bb.0: 7847; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7848; GFX90A-NEXT: ;;#ASMSTART 7849; GFX90A-NEXT: ; def s[4:9] 7850; GFX90A-NEXT: ;;#ASMEND 7851; GFX90A-NEXT: ;;#ASMSTART 7852; GFX90A-NEXT: ; use s[8:15] 7853; GFX90A-NEXT: ;;#ASMEND 7854; GFX90A-NEXT: s_setpc_b64 s[30:31] 7855; 7856; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u: 7857; GFX940: ; %bb.0: 7858; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7859; GFX940-NEXT: ;;#ASMSTART 7860; GFX940-NEXT: ; def s[0:5] 7861; GFX940-NEXT: ;;#ASMEND 7862; GFX940-NEXT: s_mov_b32 s8, s4 7863; GFX940-NEXT: s_mov_b32 s9, s5 7864; GFX940-NEXT: ;;#ASMSTART 7865; GFX940-NEXT: ; use s[8:15] 7866; GFX940-NEXT: ;;#ASMEND 7867; GFX940-NEXT: s_setpc_b64 s[30:31] 7868 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7869 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 7870 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 7871 ret void 7872} 7873 7874define void @s_shuffle_v4i64_v3i64__3_u_u_u() { 7875; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_u_u_u: 7876; GFX9: ; %bb.0: 7877; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7878; GFX9-NEXT: ;;#ASMSTART 7879; GFX9-NEXT: ; use s[8:15] 7880; GFX9-NEXT: ;;#ASMEND 7881; GFX9-NEXT: s_setpc_b64 s[30:31] 7882 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7883 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison> 7884 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 7885 ret void 7886} 7887 7888define void @s_shuffle_v4i64_v3i64__4_u_u_u() { 7889; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u: 7890; GFX900: ; %bb.0: 7891; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7892; GFX900-NEXT: ;;#ASMSTART 7893; GFX900-NEXT: ; def s[4:9] 7894; GFX900-NEXT: ;;#ASMEND 7895; GFX900-NEXT: s_mov_b32 s8, s6 7896; GFX900-NEXT: s_mov_b32 s9, s7 7897; GFX900-NEXT: ;;#ASMSTART 7898; GFX900-NEXT: ; use s[8:15] 7899; GFX900-NEXT: ;;#ASMEND 7900; GFX900-NEXT: s_setpc_b64 s[30:31] 7901; 7902; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u: 7903; GFX90A: ; %bb.0: 7904; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7905; GFX90A-NEXT: ;;#ASMSTART 7906; GFX90A-NEXT: ; def s[4:9] 7907; GFX90A-NEXT: ;;#ASMEND 7908; GFX90A-NEXT: s_mov_b32 s8, s6 7909; GFX90A-NEXT: s_mov_b32 s9, s7 7910; GFX90A-NEXT: ;;#ASMSTART 7911; GFX90A-NEXT: ; use s[8:15] 7912; GFX90A-NEXT: ;;#ASMEND 7913; GFX90A-NEXT: s_setpc_b64 s[30:31] 7914; 7915; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u: 7916; GFX940: ; %bb.0: 7917; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7918; GFX940-NEXT: ;;#ASMSTART 7919; GFX940-NEXT: ; def s[0:5] 7920; GFX940-NEXT: ;;#ASMEND 7921; GFX940-NEXT: s_mov_b32 s8, s2 7922; GFX940-NEXT: s_mov_b32 s9, s3 7923; GFX940-NEXT: ;;#ASMSTART 7924; GFX940-NEXT: ; use s[8:15] 7925; GFX940-NEXT: ;;#ASMEND 7926; GFX940-NEXT: s_setpc_b64 s[30:31] 7927 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7928 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7929 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison> 7930 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 7931 ret void 7932} 7933 7934define void @s_shuffle_v4i64_v3i64__5_u_u_u() { 7935; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u: 7936; GFX900: ; %bb.0: 7937; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7938; GFX900-NEXT: ;;#ASMSTART 7939; GFX900-NEXT: ; def s[4:9] 7940; GFX900-NEXT: ;;#ASMEND 7941; GFX900-NEXT: ;;#ASMSTART 7942; GFX900-NEXT: ; use s[8:15] 7943; GFX900-NEXT: ;;#ASMEND 7944; GFX900-NEXT: s_setpc_b64 s[30:31] 7945; 7946; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u: 7947; GFX90A: ; %bb.0: 7948; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7949; GFX90A-NEXT: ;;#ASMSTART 7950; GFX90A-NEXT: ; def s[4:9] 7951; GFX90A-NEXT: ;;#ASMEND 7952; GFX90A-NEXT: ;;#ASMSTART 7953; GFX90A-NEXT: ; use s[8:15] 7954; GFX90A-NEXT: ;;#ASMEND 7955; GFX90A-NEXT: s_setpc_b64 s[30:31] 7956; 7957; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u: 7958; GFX940: ; %bb.0: 7959; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7960; GFX940-NEXT: ;;#ASMSTART 7961; GFX940-NEXT: ; def s[0:5] 7962; GFX940-NEXT: ;;#ASMEND 7963; GFX940-NEXT: s_mov_b32 s8, s4 7964; GFX940-NEXT: s_mov_b32 s9, s5 7965; GFX940-NEXT: ;;#ASMSTART 7966; GFX940-NEXT: ; use s[8:15] 7967; GFX940-NEXT: ;;#ASMEND 7968; GFX940-NEXT: s_setpc_b64 s[30:31] 7969 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7970 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7971 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison> 7972 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 7973 ret void 7974} 7975 7976define void @s_shuffle_v4i64_v3i64__5_0_u_u() { 7977; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_u_u: 7978; GFX900: ; %bb.0: 7979; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7980; GFX900-NEXT: ;;#ASMSTART 7981; GFX900-NEXT: ; def s[4:9] 7982; GFX900-NEXT: ;;#ASMEND 7983; GFX900-NEXT: ;;#ASMSTART 7984; GFX900-NEXT: ; def s[8:13] 7985; GFX900-NEXT: ;;#ASMEND 7986; GFX900-NEXT: s_mov_b32 s8, s12 7987; GFX900-NEXT: s_mov_b32 s9, s13 7988; GFX900-NEXT: s_mov_b32 s10, s4 7989; GFX900-NEXT: s_mov_b32 s11, s5 7990; GFX900-NEXT: ;;#ASMSTART 7991; GFX900-NEXT: ; use s[8:15] 7992; GFX900-NEXT: ;;#ASMEND 7993; GFX900-NEXT: s_setpc_b64 s[30:31] 7994; 7995; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_u_u: 7996; GFX90A: ; %bb.0: 7997; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7998; GFX90A-NEXT: ;;#ASMSTART 7999; GFX90A-NEXT: ; def s[4:9] 8000; GFX90A-NEXT: ;;#ASMEND 8001; GFX90A-NEXT: ;;#ASMSTART 8002; GFX90A-NEXT: ; def s[8:13] 8003; GFX90A-NEXT: ;;#ASMEND 8004; GFX90A-NEXT: s_mov_b32 s8, s12 8005; GFX90A-NEXT: s_mov_b32 s9, s13 8006; GFX90A-NEXT: s_mov_b32 s10, s4 8007; GFX90A-NEXT: s_mov_b32 s11, s5 8008; GFX90A-NEXT: ;;#ASMSTART 8009; GFX90A-NEXT: ; use s[8:15] 8010; GFX90A-NEXT: ;;#ASMEND 8011; GFX90A-NEXT: s_setpc_b64 s[30:31] 8012; 8013; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_u_u: 8014; GFX940: ; %bb.0: 8015; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8016; GFX940-NEXT: ;;#ASMSTART 8017; GFX940-NEXT: ; def s[0:5] 8018; GFX940-NEXT: ;;#ASMEND 8019; GFX940-NEXT: s_mov_b32 s10, s0 8020; GFX940-NEXT: ;;#ASMSTART 8021; GFX940-NEXT: ; def s[4:9] 8022; GFX940-NEXT: ;;#ASMEND 8023; GFX940-NEXT: s_mov_b32 s11, s1 8024; GFX940-NEXT: ;;#ASMSTART 8025; GFX940-NEXT: ; use s[8:15] 8026; GFX940-NEXT: ;;#ASMEND 8027; GFX940-NEXT: s_setpc_b64 s[30:31] 8028 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8029 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8030 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison> 8031 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8032 ret void 8033} 8034 8035define void @s_shuffle_v4i64_v3i64__5_1_u_u() { 8036; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_u_u: 8037; GFX900: ; %bb.0: 8038; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8039; GFX900-NEXT: ;;#ASMSTART 8040; GFX900-NEXT: ; def s[8:13] 8041; GFX900-NEXT: ;;#ASMEND 8042; GFX900-NEXT: ;;#ASMSTART 8043; GFX900-NEXT: ; def s[4:9] 8044; GFX900-NEXT: ;;#ASMEND 8045; GFX900-NEXT: ;;#ASMSTART 8046; GFX900-NEXT: ; use s[8:15] 8047; GFX900-NEXT: ;;#ASMEND 8048; GFX900-NEXT: s_setpc_b64 s[30:31] 8049; 8050; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_u_u: 8051; GFX90A: ; %bb.0: 8052; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8053; GFX90A-NEXT: ;;#ASMSTART 8054; GFX90A-NEXT: ; def s[8:13] 8055; GFX90A-NEXT: ;;#ASMEND 8056; GFX90A-NEXT: ;;#ASMSTART 8057; GFX90A-NEXT: ; def s[4:9] 8058; GFX90A-NEXT: ;;#ASMEND 8059; GFX90A-NEXT: ;;#ASMSTART 8060; GFX90A-NEXT: ; use s[8:15] 8061; GFX90A-NEXT: ;;#ASMEND 8062; GFX90A-NEXT: s_setpc_b64 s[30:31] 8063; 8064; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_u_u: 8065; GFX940: ; %bb.0: 8066; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8067; GFX940-NEXT: ;;#ASMSTART 8068; GFX940-NEXT: ; def s[8:13] 8069; GFX940-NEXT: ;;#ASMEND 8070; GFX940-NEXT: ;;#ASMSTART 8071; GFX940-NEXT: ; def s[0:5] 8072; GFX940-NEXT: ;;#ASMEND 8073; GFX940-NEXT: s_mov_b32 s8, s4 8074; GFX940-NEXT: s_mov_b32 s9, s5 8075; GFX940-NEXT: ;;#ASMSTART 8076; GFX940-NEXT: ; use s[8:15] 8077; GFX940-NEXT: ;;#ASMEND 8078; GFX940-NEXT: s_setpc_b64 s[30:31] 8079 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8080 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8081 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison> 8082 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8083 ret void 8084} 8085 8086define void @s_shuffle_v4i64_v3i64__5_2_u_u() { 8087; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_u_u: 8088; GFX900: ; %bb.0: 8089; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8090; GFX900-NEXT: ;;#ASMSTART 8091; GFX900-NEXT: ; def s[8:13] 8092; GFX900-NEXT: ;;#ASMEND 8093; GFX900-NEXT: ;;#ASMSTART 8094; GFX900-NEXT: ; def s[4:9] 8095; GFX900-NEXT: ;;#ASMEND 8096; GFX900-NEXT: s_mov_b32 s10, s12 8097; GFX900-NEXT: s_mov_b32 s11, s13 8098; GFX900-NEXT: ;;#ASMSTART 8099; GFX900-NEXT: ; use s[8:15] 8100; GFX900-NEXT: ;;#ASMEND 8101; GFX900-NEXT: s_setpc_b64 s[30:31] 8102; 8103; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_u_u: 8104; GFX90A: ; %bb.0: 8105; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8106; GFX90A-NEXT: ;;#ASMSTART 8107; GFX90A-NEXT: ; def s[8:13] 8108; GFX90A-NEXT: ;;#ASMEND 8109; GFX90A-NEXT: ;;#ASMSTART 8110; GFX90A-NEXT: ; def s[4:9] 8111; GFX90A-NEXT: ;;#ASMEND 8112; GFX90A-NEXT: s_mov_b32 s10, s12 8113; GFX90A-NEXT: s_mov_b32 s11, s13 8114; GFX90A-NEXT: ;;#ASMSTART 8115; GFX90A-NEXT: ; use s[8:15] 8116; GFX90A-NEXT: ;;#ASMEND 8117; GFX90A-NEXT: s_setpc_b64 s[30:31] 8118; 8119; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_u_u: 8120; GFX940: ; %bb.0: 8121; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8122; GFX940-NEXT: ;;#ASMSTART 8123; GFX940-NEXT: ; def s[8:13] 8124; GFX940-NEXT: ;;#ASMEND 8125; GFX940-NEXT: ;;#ASMSTART 8126; GFX940-NEXT: ; def s[0:5] 8127; GFX940-NEXT: ;;#ASMEND 8128; GFX940-NEXT: s_mov_b32 s8, s12 8129; GFX940-NEXT: s_mov_b32 s9, s13 8130; GFX940-NEXT: s_mov_b32 s10, s4 8131; GFX940-NEXT: s_mov_b32 s11, s5 8132; GFX940-NEXT: ;;#ASMSTART 8133; GFX940-NEXT: ; use s[8:15] 8134; GFX940-NEXT: ;;#ASMEND 8135; GFX940-NEXT: s_setpc_b64 s[30:31] 8136 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8137 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8138 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison> 8139 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8140 ret void 8141} 8142 8143define void @s_shuffle_v4i64_v3i64__5_3_u_u() { 8144; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u: 8145; GFX900: ; %bb.0: 8146; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8147; GFX900-NEXT: ;;#ASMSTART 8148; GFX900-NEXT: ; def s[4:9] 8149; GFX900-NEXT: ;;#ASMEND 8150; GFX900-NEXT: s_mov_b32 s10, s4 8151; GFX900-NEXT: s_mov_b32 s11, s5 8152; GFX900-NEXT: ;;#ASMSTART 8153; GFX900-NEXT: ; use s[8:15] 8154; GFX900-NEXT: ;;#ASMEND 8155; GFX900-NEXT: s_setpc_b64 s[30:31] 8156; 8157; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u: 8158; GFX90A: ; %bb.0: 8159; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8160; GFX90A-NEXT: ;;#ASMSTART 8161; GFX90A-NEXT: ; def s[4:9] 8162; GFX90A-NEXT: ;;#ASMEND 8163; GFX90A-NEXT: s_mov_b32 s10, s4 8164; GFX90A-NEXT: s_mov_b32 s11, s5 8165; GFX90A-NEXT: ;;#ASMSTART 8166; GFX90A-NEXT: ; use s[8:15] 8167; GFX90A-NEXT: ;;#ASMEND 8168; GFX90A-NEXT: s_setpc_b64 s[30:31] 8169; 8170; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u: 8171; GFX940: ; %bb.0: 8172; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8173; GFX940-NEXT: ;;#ASMSTART 8174; GFX940-NEXT: ; def s[0:5] 8175; GFX940-NEXT: ;;#ASMEND 8176; GFX940-NEXT: s_mov_b32 s8, s4 8177; GFX940-NEXT: s_mov_b32 s9, s5 8178; GFX940-NEXT: s_mov_b32 s10, s0 8179; GFX940-NEXT: s_mov_b32 s11, s1 8180; GFX940-NEXT: ;;#ASMSTART 8181; GFX940-NEXT: ; use s[8:15] 8182; GFX940-NEXT: ;;#ASMEND 8183; GFX940-NEXT: s_setpc_b64 s[30:31] 8184 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8185 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8186 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison> 8187 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8188 ret void 8189} 8190 8191define void @s_shuffle_v4i64_v3i64__5_4_u_u() { 8192; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u: 8193; GFX9: ; %bb.0: 8194; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8195; GFX9-NEXT: ;;#ASMSTART 8196; GFX9-NEXT: ; def s[8:13] 8197; GFX9-NEXT: ;;#ASMEND 8198; GFX9-NEXT: s_mov_b32 s8, s12 8199; GFX9-NEXT: s_mov_b32 s9, s13 8200; GFX9-NEXT: ;;#ASMSTART 8201; GFX9-NEXT: ; use s[8:15] 8202; GFX9-NEXT: ;;#ASMEND 8203; GFX9-NEXT: s_setpc_b64 s[30:31] 8204 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8205 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8206 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison> 8207 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8208 ret void 8209} 8210 8211define void @s_shuffle_v4i64_v3i64__5_5_u_u() { 8212; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u: 8213; GFX900: ; %bb.0: 8214; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8215; GFX900-NEXT: ;;#ASMSTART 8216; GFX900-NEXT: ; def s[8:13] 8217; GFX900-NEXT: ;;#ASMEND 8218; GFX900-NEXT: s_mov_b32 s8, s12 8219; GFX900-NEXT: s_mov_b32 s9, s13 8220; GFX900-NEXT: s_mov_b32 s10, s12 8221; GFX900-NEXT: s_mov_b32 s11, s13 8222; GFX900-NEXT: ;;#ASMSTART 8223; GFX900-NEXT: ; use s[8:15] 8224; GFX900-NEXT: ;;#ASMEND 8225; GFX900-NEXT: s_setpc_b64 s[30:31] 8226; 8227; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u: 8228; GFX90A: ; %bb.0: 8229; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8230; GFX90A-NEXT: ;;#ASMSTART 8231; GFX90A-NEXT: ; def s[8:13] 8232; GFX90A-NEXT: ;;#ASMEND 8233; GFX90A-NEXT: s_mov_b32 s8, s12 8234; GFX90A-NEXT: s_mov_b32 s9, s13 8235; GFX90A-NEXT: s_mov_b32 s10, s12 8236; GFX90A-NEXT: s_mov_b32 s11, s13 8237; GFX90A-NEXT: ;;#ASMSTART 8238; GFX90A-NEXT: ; use s[8:15] 8239; GFX90A-NEXT: ;;#ASMEND 8240; GFX90A-NEXT: s_setpc_b64 s[30:31] 8241; 8242; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u: 8243; GFX940: ; %bb.0: 8244; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8245; GFX940-NEXT: ;;#ASMSTART 8246; GFX940-NEXT: ; def s[0:5] 8247; GFX940-NEXT: ;;#ASMEND 8248; GFX940-NEXT: s_mov_b32 s8, s4 8249; GFX940-NEXT: s_mov_b32 s9, s5 8250; GFX940-NEXT: s_mov_b32 s10, s4 8251; GFX940-NEXT: s_mov_b32 s11, s5 8252; GFX940-NEXT: ;;#ASMSTART 8253; GFX940-NEXT: ; use s[8:15] 8254; GFX940-NEXT: ;;#ASMEND 8255; GFX940-NEXT: s_setpc_b64 s[30:31] 8256 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8257 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8258 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison> 8259 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8260 ret void 8261} 8262 8263define void @s_shuffle_v4i64_v3i64__5_5_0_u() { 8264; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u: 8265; GFX900: ; %bb.0: 8266; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8267; GFX900-NEXT: ;;#ASMSTART 8268; GFX900-NEXT: ; def s[4:9] 8269; GFX900-NEXT: ;;#ASMEND 8270; GFX900-NEXT: ;;#ASMSTART 8271; GFX900-NEXT: ; def s[8:13] 8272; GFX900-NEXT: ;;#ASMEND 8273; GFX900-NEXT: s_mov_b32 s8, s12 8274; GFX900-NEXT: s_mov_b32 s9, s13 8275; GFX900-NEXT: s_mov_b32 s10, s12 8276; GFX900-NEXT: s_mov_b32 s11, s13 8277; GFX900-NEXT: s_mov_b32 s12, s4 8278; GFX900-NEXT: s_mov_b32 s13, s5 8279; GFX900-NEXT: ;;#ASMSTART 8280; GFX900-NEXT: ; use s[8:15] 8281; GFX900-NEXT: ;;#ASMEND 8282; GFX900-NEXT: s_setpc_b64 s[30:31] 8283; 8284; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u: 8285; GFX90A: ; %bb.0: 8286; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8287; GFX90A-NEXT: ;;#ASMSTART 8288; GFX90A-NEXT: ; def s[4:9] 8289; GFX90A-NEXT: ;;#ASMEND 8290; GFX90A-NEXT: ;;#ASMSTART 8291; GFX90A-NEXT: ; def s[8:13] 8292; GFX90A-NEXT: ;;#ASMEND 8293; GFX90A-NEXT: s_mov_b32 s8, s12 8294; GFX90A-NEXT: s_mov_b32 s9, s13 8295; GFX90A-NEXT: s_mov_b32 s10, s12 8296; GFX90A-NEXT: s_mov_b32 s11, s13 8297; GFX90A-NEXT: s_mov_b32 s12, s4 8298; GFX90A-NEXT: s_mov_b32 s13, s5 8299; GFX90A-NEXT: ;;#ASMSTART 8300; GFX90A-NEXT: ; use s[8:15] 8301; GFX90A-NEXT: ;;#ASMEND 8302; GFX90A-NEXT: s_setpc_b64 s[30:31] 8303; 8304; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u: 8305; GFX940: ; %bb.0: 8306; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8307; GFX940-NEXT: ;;#ASMSTART 8308; GFX940-NEXT: ; def s[8:13] 8309; GFX940-NEXT: ;;#ASMEND 8310; GFX940-NEXT: ;;#ASMSTART 8311; GFX940-NEXT: ; def s[0:5] 8312; GFX940-NEXT: ;;#ASMEND 8313; GFX940-NEXT: s_mov_b32 s8, s12 8314; GFX940-NEXT: s_mov_b32 s9, s13 8315; GFX940-NEXT: s_mov_b32 s10, s12 8316; GFX940-NEXT: s_mov_b32 s11, s13 8317; GFX940-NEXT: s_mov_b32 s12, s0 8318; GFX940-NEXT: s_mov_b32 s13, s1 8319; GFX940-NEXT: ;;#ASMSTART 8320; GFX940-NEXT: ; use s[8:15] 8321; GFX940-NEXT: ;;#ASMEND 8322; GFX940-NEXT: s_setpc_b64 s[30:31] 8323 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8324 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8325 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison> 8326 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8327 ret void 8328} 8329 8330define void @s_shuffle_v4i64_v3i64__5_5_1_u() { 8331; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_u: 8332; GFX900: ; %bb.0: 8333; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8334; GFX900-NEXT: ;;#ASMSTART 8335; GFX900-NEXT: ; def s[4:9] 8336; GFX900-NEXT: ;;#ASMEND 8337; GFX900-NEXT: ;;#ASMSTART 8338; GFX900-NEXT: ; def s[8:13] 8339; GFX900-NEXT: ;;#ASMEND 8340; GFX900-NEXT: s_mov_b32 s8, s12 8341; GFX900-NEXT: s_mov_b32 s9, s13 8342; GFX900-NEXT: s_mov_b32 s10, s12 8343; GFX900-NEXT: s_mov_b32 s11, s13 8344; GFX900-NEXT: s_mov_b32 s12, s6 8345; GFX900-NEXT: s_mov_b32 s13, s7 8346; GFX900-NEXT: ;;#ASMSTART 8347; GFX900-NEXT: ; use s[8:15] 8348; GFX900-NEXT: ;;#ASMEND 8349; GFX900-NEXT: s_setpc_b64 s[30:31] 8350; 8351; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_u: 8352; GFX90A: ; %bb.0: 8353; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8354; GFX90A-NEXT: ;;#ASMSTART 8355; GFX90A-NEXT: ; def s[4:9] 8356; GFX90A-NEXT: ;;#ASMEND 8357; GFX90A-NEXT: ;;#ASMSTART 8358; GFX90A-NEXT: ; def s[8:13] 8359; GFX90A-NEXT: ;;#ASMEND 8360; GFX90A-NEXT: s_mov_b32 s8, s12 8361; GFX90A-NEXT: s_mov_b32 s9, s13 8362; GFX90A-NEXT: s_mov_b32 s10, s12 8363; GFX90A-NEXT: s_mov_b32 s11, s13 8364; GFX90A-NEXT: s_mov_b32 s12, s6 8365; GFX90A-NEXT: s_mov_b32 s13, s7 8366; GFX90A-NEXT: ;;#ASMSTART 8367; GFX90A-NEXT: ; use s[8:15] 8368; GFX90A-NEXT: ;;#ASMEND 8369; GFX90A-NEXT: s_setpc_b64 s[30:31] 8370; 8371; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_u: 8372; GFX940: ; %bb.0: 8373; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8374; GFX940-NEXT: ;;#ASMSTART 8375; GFX940-NEXT: ; def s[8:13] 8376; GFX940-NEXT: ;;#ASMEND 8377; GFX940-NEXT: ;;#ASMSTART 8378; GFX940-NEXT: ; def s[0:5] 8379; GFX940-NEXT: ;;#ASMEND 8380; GFX940-NEXT: s_mov_b32 s8, s12 8381; GFX940-NEXT: s_mov_b32 s9, s13 8382; GFX940-NEXT: s_mov_b32 s10, s12 8383; GFX940-NEXT: s_mov_b32 s11, s13 8384; GFX940-NEXT: s_mov_b32 s12, s2 8385; GFX940-NEXT: s_mov_b32 s13, s3 8386; GFX940-NEXT: ;;#ASMSTART 8387; GFX940-NEXT: ; use s[8:15] 8388; GFX940-NEXT: ;;#ASMEND 8389; GFX940-NEXT: s_setpc_b64 s[30:31] 8390 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8391 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8392 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 poison> 8393 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8394 ret void 8395} 8396 8397define void @s_shuffle_v4i64_v3i64__5_5_2_u() { 8398; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_u: 8399; GFX900: ; %bb.0: 8400; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8401; GFX900-NEXT: ;;#ASMSTART 8402; GFX900-NEXT: ; def s[8:13] 8403; GFX900-NEXT: ;;#ASMEND 8404; GFX900-NEXT: ;;#ASMSTART 8405; GFX900-NEXT: ; def s[16:21] 8406; GFX900-NEXT: ;;#ASMEND 8407; GFX900-NEXT: s_mov_b32 s8, s20 8408; GFX900-NEXT: s_mov_b32 s9, s21 8409; GFX900-NEXT: s_mov_b32 s10, s20 8410; GFX900-NEXT: s_mov_b32 s11, s21 8411; GFX900-NEXT: ;;#ASMSTART 8412; GFX900-NEXT: ; use s[8:15] 8413; GFX900-NEXT: ;;#ASMEND 8414; GFX900-NEXT: s_setpc_b64 s[30:31] 8415; 8416; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_u: 8417; GFX90A: ; %bb.0: 8418; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8419; GFX90A-NEXT: ;;#ASMSTART 8420; GFX90A-NEXT: ; def s[8:13] 8421; GFX90A-NEXT: ;;#ASMEND 8422; GFX90A-NEXT: ;;#ASMSTART 8423; GFX90A-NEXT: ; def s[16:21] 8424; GFX90A-NEXT: ;;#ASMEND 8425; GFX90A-NEXT: s_mov_b32 s8, s20 8426; GFX90A-NEXT: s_mov_b32 s9, s21 8427; GFX90A-NEXT: s_mov_b32 s10, s20 8428; GFX90A-NEXT: s_mov_b32 s11, s21 8429; GFX90A-NEXT: ;;#ASMSTART 8430; GFX90A-NEXT: ; use s[8:15] 8431; GFX90A-NEXT: ;;#ASMEND 8432; GFX90A-NEXT: s_setpc_b64 s[30:31] 8433; 8434; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_u: 8435; GFX940: ; %bb.0: 8436; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8437; GFX940-NEXT: ;;#ASMSTART 8438; GFX940-NEXT: ; def s[8:13] 8439; GFX940-NEXT: ;;#ASMEND 8440; GFX940-NEXT: ;;#ASMSTART 8441; GFX940-NEXT: ; def s[0:5] 8442; GFX940-NEXT: ;;#ASMEND 8443; GFX940-NEXT: s_mov_b32 s8, s4 8444; GFX940-NEXT: s_mov_b32 s9, s5 8445; GFX940-NEXT: s_mov_b32 s10, s4 8446; GFX940-NEXT: s_mov_b32 s11, s5 8447; GFX940-NEXT: ;;#ASMSTART 8448; GFX940-NEXT: ; use s[8:15] 8449; GFX940-NEXT: ;;#ASMEND 8450; GFX940-NEXT: s_setpc_b64 s[30:31] 8451 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8452 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8453 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 poison> 8454 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8455 ret void 8456} 8457 8458define void @s_shuffle_v4i64_v3i64__5_5_3_u() { 8459; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u: 8460; GFX900: ; %bb.0: 8461; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8462; GFX900-NEXT: ;;#ASMSTART 8463; GFX900-NEXT: ; def s[12:17] 8464; GFX900-NEXT: ;;#ASMEND 8465; GFX900-NEXT: s_mov_b32 s8, s16 8466; GFX900-NEXT: s_mov_b32 s9, s17 8467; GFX900-NEXT: s_mov_b32 s10, s16 8468; GFX900-NEXT: s_mov_b32 s11, s17 8469; GFX900-NEXT: ;;#ASMSTART 8470; GFX900-NEXT: ; use s[8:15] 8471; GFX900-NEXT: ;;#ASMEND 8472; GFX900-NEXT: s_setpc_b64 s[30:31] 8473; 8474; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u: 8475; GFX90A: ; %bb.0: 8476; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8477; GFX90A-NEXT: ;;#ASMSTART 8478; GFX90A-NEXT: ; def s[12:17] 8479; GFX90A-NEXT: ;;#ASMEND 8480; GFX90A-NEXT: s_mov_b32 s8, s16 8481; GFX90A-NEXT: s_mov_b32 s9, s17 8482; GFX90A-NEXT: s_mov_b32 s10, s16 8483; GFX90A-NEXT: s_mov_b32 s11, s17 8484; GFX90A-NEXT: ;;#ASMSTART 8485; GFX90A-NEXT: ; use s[8:15] 8486; GFX90A-NEXT: ;;#ASMEND 8487; GFX90A-NEXT: s_setpc_b64 s[30:31] 8488; 8489; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u: 8490; GFX940: ; %bb.0: 8491; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8492; GFX940-NEXT: ;;#ASMSTART 8493; GFX940-NEXT: ; def s[0:5] 8494; GFX940-NEXT: ;;#ASMEND 8495; GFX940-NEXT: s_mov_b32 s8, s4 8496; GFX940-NEXT: s_mov_b32 s9, s5 8497; GFX940-NEXT: s_mov_b32 s10, s4 8498; GFX940-NEXT: s_mov_b32 s11, s5 8499; GFX940-NEXT: s_mov_b32 s12, s0 8500; GFX940-NEXT: s_mov_b32 s13, s1 8501; GFX940-NEXT: ;;#ASMSTART 8502; GFX940-NEXT: ; use s[8:15] 8503; GFX940-NEXT: ;;#ASMEND 8504; GFX940-NEXT: s_setpc_b64 s[30:31] 8505 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8506 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8507 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison> 8508 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8509 ret void 8510} 8511 8512define void @s_shuffle_v4i64_v3i64__5_5_4_u() { 8513; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u: 8514; GFX900: ; %bb.0: 8515; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8516; GFX900-NEXT: ;;#ASMSTART 8517; GFX900-NEXT: ; def s[12:17] 8518; GFX900-NEXT: ;;#ASMEND 8519; GFX900-NEXT: s_mov_b32 s8, s16 8520; GFX900-NEXT: s_mov_b32 s9, s17 8521; GFX900-NEXT: s_mov_b32 s10, s16 8522; GFX900-NEXT: s_mov_b32 s11, s17 8523; GFX900-NEXT: s_mov_b32 s12, s14 8524; GFX900-NEXT: s_mov_b32 s13, s15 8525; GFX900-NEXT: ;;#ASMSTART 8526; GFX900-NEXT: ; use s[8:15] 8527; GFX900-NEXT: ;;#ASMEND 8528; GFX900-NEXT: s_setpc_b64 s[30:31] 8529; 8530; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u: 8531; GFX90A: ; %bb.0: 8532; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8533; GFX90A-NEXT: ;;#ASMSTART 8534; GFX90A-NEXT: ; def s[12:17] 8535; GFX90A-NEXT: ;;#ASMEND 8536; GFX90A-NEXT: s_mov_b32 s8, s16 8537; GFX90A-NEXT: s_mov_b32 s9, s17 8538; GFX90A-NEXT: s_mov_b32 s10, s16 8539; GFX90A-NEXT: s_mov_b32 s11, s17 8540; GFX90A-NEXT: s_mov_b32 s12, s14 8541; GFX90A-NEXT: s_mov_b32 s13, s15 8542; GFX90A-NEXT: ;;#ASMSTART 8543; GFX90A-NEXT: ; use s[8:15] 8544; GFX90A-NEXT: ;;#ASMEND 8545; GFX90A-NEXT: s_setpc_b64 s[30:31] 8546; 8547; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u: 8548; GFX940: ; %bb.0: 8549; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8550; GFX940-NEXT: ;;#ASMSTART 8551; GFX940-NEXT: ; def s[0:5] 8552; GFX940-NEXT: ;;#ASMEND 8553; GFX940-NEXT: s_mov_b32 s8, s4 8554; GFX940-NEXT: s_mov_b32 s9, s5 8555; GFX940-NEXT: s_mov_b32 s10, s4 8556; GFX940-NEXT: s_mov_b32 s11, s5 8557; GFX940-NEXT: s_mov_b32 s12, s2 8558; GFX940-NEXT: s_mov_b32 s13, s3 8559; GFX940-NEXT: ;;#ASMSTART 8560; GFX940-NEXT: ; use s[8:15] 8561; GFX940-NEXT: ;;#ASMEND 8562; GFX940-NEXT: s_setpc_b64 s[30:31] 8563 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8564 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8565 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison> 8566 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8567 ret void 8568} 8569 8570define void @s_shuffle_v4i64_v3i64__5_5_5_u() { 8571; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u: 8572; GFX9: ; %bb.0: 8573; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8574; GFX9-NEXT: ;;#ASMSTART 8575; GFX9-NEXT: ; def s[8:13] 8576; GFX9-NEXT: ;;#ASMEND 8577; GFX9-NEXT: s_mov_b32 s8, s12 8578; GFX9-NEXT: s_mov_b32 s9, s13 8579; GFX9-NEXT: s_mov_b32 s10, s12 8580; GFX9-NEXT: s_mov_b32 s11, s13 8581; GFX9-NEXT: ;;#ASMSTART 8582; GFX9-NEXT: ; use s[8:15] 8583; GFX9-NEXT: ;;#ASMEND 8584; GFX9-NEXT: s_setpc_b64 s[30:31] 8585 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8586 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8587 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison> 8588 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8589 ret void 8590} 8591 8592define void @s_shuffle_v4i64_v3i64__5_5_5_0() { 8593; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_0: 8594; GFX900: ; %bb.0: 8595; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8596; GFX900-NEXT: ;;#ASMSTART 8597; GFX900-NEXT: ; def s[4:9] 8598; GFX900-NEXT: ;;#ASMEND 8599; GFX900-NEXT: ;;#ASMSTART 8600; GFX900-NEXT: ; def s[8:13] 8601; GFX900-NEXT: ;;#ASMEND 8602; GFX900-NEXT: s_mov_b32 s8, s12 8603; GFX900-NEXT: s_mov_b32 s9, s13 8604; GFX900-NEXT: s_mov_b32 s10, s12 8605; GFX900-NEXT: s_mov_b32 s11, s13 8606; GFX900-NEXT: s_mov_b32 s14, s4 8607; GFX900-NEXT: s_mov_b32 s15, s5 8608; GFX900-NEXT: ;;#ASMSTART 8609; GFX900-NEXT: ; use s[8:15] 8610; GFX900-NEXT: ;;#ASMEND 8611; GFX900-NEXT: s_setpc_b64 s[30:31] 8612; 8613; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_0: 8614; GFX90A: ; %bb.0: 8615; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8616; GFX90A-NEXT: ;;#ASMSTART 8617; GFX90A-NEXT: ; def s[4:9] 8618; GFX90A-NEXT: ;;#ASMEND 8619; GFX90A-NEXT: ;;#ASMSTART 8620; GFX90A-NEXT: ; def s[8:13] 8621; GFX90A-NEXT: ;;#ASMEND 8622; GFX90A-NEXT: s_mov_b32 s8, s12 8623; GFX90A-NEXT: s_mov_b32 s9, s13 8624; GFX90A-NEXT: s_mov_b32 s10, s12 8625; GFX90A-NEXT: s_mov_b32 s11, s13 8626; GFX90A-NEXT: s_mov_b32 s14, s4 8627; GFX90A-NEXT: s_mov_b32 s15, s5 8628; GFX90A-NEXT: ;;#ASMSTART 8629; GFX90A-NEXT: ; use s[8:15] 8630; GFX90A-NEXT: ;;#ASMEND 8631; GFX90A-NEXT: s_setpc_b64 s[30:31] 8632; 8633; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_0: 8634; GFX940: ; %bb.0: 8635; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8636; GFX940-NEXT: ;;#ASMSTART 8637; GFX940-NEXT: ; def s[8:13] 8638; GFX940-NEXT: ;;#ASMEND 8639; GFX940-NEXT: ;;#ASMSTART 8640; GFX940-NEXT: ; def s[0:5] 8641; GFX940-NEXT: ;;#ASMEND 8642; GFX940-NEXT: s_mov_b32 s8, s12 8643; GFX940-NEXT: s_mov_b32 s9, s13 8644; GFX940-NEXT: s_mov_b32 s10, s12 8645; GFX940-NEXT: s_mov_b32 s11, s13 8646; GFX940-NEXT: s_mov_b32 s14, s0 8647; GFX940-NEXT: s_mov_b32 s15, s1 8648; GFX940-NEXT: ;;#ASMSTART 8649; GFX940-NEXT: ; use s[8:15] 8650; GFX940-NEXT: ;;#ASMEND 8651; GFX940-NEXT: s_setpc_b64 s[30:31] 8652 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8653 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8654 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 0> 8655 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8656 ret void 8657} 8658 8659define void @s_shuffle_v4i64_v3i64__5_5_5_1() { 8660; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1: 8661; GFX900: ; %bb.0: 8662; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8663; GFX900-NEXT: ;;#ASMSTART 8664; GFX900-NEXT: ; def s[4:9] 8665; GFX900-NEXT: ;;#ASMEND 8666; GFX900-NEXT: ;;#ASMSTART 8667; GFX900-NEXT: ; def s[8:13] 8668; GFX900-NEXT: ;;#ASMEND 8669; GFX900-NEXT: s_mov_b32 s8, s12 8670; GFX900-NEXT: s_mov_b32 s9, s13 8671; GFX900-NEXT: s_mov_b32 s10, s12 8672; GFX900-NEXT: s_mov_b32 s11, s13 8673; GFX900-NEXT: s_mov_b32 s14, s6 8674; GFX900-NEXT: s_mov_b32 s15, s7 8675; GFX900-NEXT: ;;#ASMSTART 8676; GFX900-NEXT: ; use s[8:15] 8677; GFX900-NEXT: ;;#ASMEND 8678; GFX900-NEXT: s_setpc_b64 s[30:31] 8679; 8680; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1: 8681; GFX90A: ; %bb.0: 8682; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8683; GFX90A-NEXT: ;;#ASMSTART 8684; GFX90A-NEXT: ; def s[4:9] 8685; GFX90A-NEXT: ;;#ASMEND 8686; GFX90A-NEXT: ;;#ASMSTART 8687; GFX90A-NEXT: ; def s[8:13] 8688; GFX90A-NEXT: ;;#ASMEND 8689; GFX90A-NEXT: s_mov_b32 s8, s12 8690; GFX90A-NEXT: s_mov_b32 s9, s13 8691; GFX90A-NEXT: s_mov_b32 s10, s12 8692; GFX90A-NEXT: s_mov_b32 s11, s13 8693; GFX90A-NEXT: s_mov_b32 s14, s6 8694; GFX90A-NEXT: s_mov_b32 s15, s7 8695; GFX90A-NEXT: ;;#ASMSTART 8696; GFX90A-NEXT: ; use s[8:15] 8697; GFX90A-NEXT: ;;#ASMEND 8698; GFX90A-NEXT: s_setpc_b64 s[30:31] 8699; 8700; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1: 8701; GFX940: ; %bb.0: 8702; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8703; GFX940-NEXT: ;;#ASMSTART 8704; GFX940-NEXT: ; def s[8:13] 8705; GFX940-NEXT: ;;#ASMEND 8706; GFX940-NEXT: ;;#ASMSTART 8707; GFX940-NEXT: ; def s[0:5] 8708; GFX940-NEXT: ;;#ASMEND 8709; GFX940-NEXT: s_mov_b32 s8, s12 8710; GFX940-NEXT: s_mov_b32 s9, s13 8711; GFX940-NEXT: s_mov_b32 s10, s12 8712; GFX940-NEXT: s_mov_b32 s11, s13 8713; GFX940-NEXT: s_mov_b32 s14, s2 8714; GFX940-NEXT: s_mov_b32 s15, s3 8715; GFX940-NEXT: ;;#ASMSTART 8716; GFX940-NEXT: ; use s[8:15] 8717; GFX940-NEXT: ;;#ASMEND 8718; GFX940-NEXT: s_setpc_b64 s[30:31] 8719 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8720 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8721 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1> 8722 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8723 ret void 8724} 8725 8726define void @s_shuffle_v4i64_v3i64__5_5_5_2() { 8727; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2: 8728; GFX900: ; %bb.0: 8729; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8730; GFX900-NEXT: ;;#ASMSTART 8731; GFX900-NEXT: ; def s[12:17] 8732; GFX900-NEXT: ;;#ASMEND 8733; GFX900-NEXT: ;;#ASMSTART 8734; GFX900-NEXT: ; def s[8:13] 8735; GFX900-NEXT: ;;#ASMEND 8736; GFX900-NEXT: s_mov_b32 s8, s12 8737; GFX900-NEXT: s_mov_b32 s9, s13 8738; GFX900-NEXT: s_mov_b32 s10, s12 8739; GFX900-NEXT: s_mov_b32 s11, s13 8740; GFX900-NEXT: s_mov_b32 s14, s16 8741; GFX900-NEXT: s_mov_b32 s15, s17 8742; GFX900-NEXT: ;;#ASMSTART 8743; GFX900-NEXT: ; use s[8:15] 8744; GFX900-NEXT: ;;#ASMEND 8745; GFX900-NEXT: s_setpc_b64 s[30:31] 8746; 8747; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2: 8748; GFX90A: ; %bb.0: 8749; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8750; GFX90A-NEXT: ;;#ASMSTART 8751; GFX90A-NEXT: ; def s[12:17] 8752; GFX90A-NEXT: ;;#ASMEND 8753; GFX90A-NEXT: ;;#ASMSTART 8754; GFX90A-NEXT: ; def s[8:13] 8755; GFX90A-NEXT: ;;#ASMEND 8756; GFX90A-NEXT: s_mov_b32 s8, s12 8757; GFX90A-NEXT: s_mov_b32 s9, s13 8758; GFX90A-NEXT: s_mov_b32 s10, s12 8759; GFX90A-NEXT: s_mov_b32 s11, s13 8760; GFX90A-NEXT: s_mov_b32 s14, s16 8761; GFX90A-NEXT: s_mov_b32 s15, s17 8762; GFX90A-NEXT: ;;#ASMSTART 8763; GFX90A-NEXT: ; use s[8:15] 8764; GFX90A-NEXT: ;;#ASMEND 8765; GFX90A-NEXT: s_setpc_b64 s[30:31] 8766; 8767; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2: 8768; GFX940: ; %bb.0: 8769; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8770; GFX940-NEXT: ;;#ASMSTART 8771; GFX940-NEXT: ; def s[8:13] 8772; GFX940-NEXT: ;;#ASMEND 8773; GFX940-NEXT: ;;#ASMSTART 8774; GFX940-NEXT: ; def s[0:5] 8775; GFX940-NEXT: ;;#ASMEND 8776; GFX940-NEXT: s_mov_b32 s8, s12 8777; GFX940-NEXT: s_mov_b32 s9, s13 8778; GFX940-NEXT: s_mov_b32 s10, s12 8779; GFX940-NEXT: s_mov_b32 s11, s13 8780; GFX940-NEXT: s_mov_b32 s14, s4 8781; GFX940-NEXT: s_mov_b32 s15, s5 8782; GFX940-NEXT: ;;#ASMSTART 8783; GFX940-NEXT: ; use s[8:15] 8784; GFX940-NEXT: ;;#ASMEND 8785; GFX940-NEXT: s_setpc_b64 s[30:31] 8786 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8787 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8788 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2> 8789 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8790 ret void 8791} 8792 8793define void @s_shuffle_v4i64_v3i64__5_5_5_3() { 8794; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3: 8795; GFX900: ; %bb.0: 8796; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8797; GFX900-NEXT: ;;#ASMSTART 8798; GFX900-NEXT: ; def s[16:21] 8799; GFX900-NEXT: ;;#ASMEND 8800; GFX900-NEXT: s_mov_b32 s8, s20 8801; GFX900-NEXT: s_mov_b32 s9, s21 8802; GFX900-NEXT: s_mov_b32 s10, s20 8803; GFX900-NEXT: s_mov_b32 s11, s21 8804; GFX900-NEXT: s_mov_b32 s12, s20 8805; GFX900-NEXT: s_mov_b32 s13, s21 8806; GFX900-NEXT: s_mov_b32 s14, s16 8807; GFX900-NEXT: s_mov_b32 s15, s17 8808; GFX900-NEXT: ;;#ASMSTART 8809; GFX900-NEXT: ; use s[8:15] 8810; GFX900-NEXT: ;;#ASMEND 8811; GFX900-NEXT: s_setpc_b64 s[30:31] 8812; 8813; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3: 8814; GFX90A: ; %bb.0: 8815; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8816; GFX90A-NEXT: ;;#ASMSTART 8817; GFX90A-NEXT: ; def s[16:21] 8818; GFX90A-NEXT: ;;#ASMEND 8819; GFX90A-NEXT: s_mov_b32 s8, s20 8820; GFX90A-NEXT: s_mov_b32 s9, s21 8821; GFX90A-NEXT: s_mov_b32 s10, s20 8822; GFX90A-NEXT: s_mov_b32 s11, s21 8823; GFX90A-NEXT: s_mov_b32 s12, s20 8824; GFX90A-NEXT: s_mov_b32 s13, s21 8825; GFX90A-NEXT: s_mov_b32 s14, s16 8826; GFX90A-NEXT: s_mov_b32 s15, s17 8827; GFX90A-NEXT: ;;#ASMSTART 8828; GFX90A-NEXT: ; use s[8:15] 8829; GFX90A-NEXT: ;;#ASMEND 8830; GFX90A-NEXT: s_setpc_b64 s[30:31] 8831; 8832; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3: 8833; GFX940: ; %bb.0: 8834; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8835; GFX940-NEXT: ;;#ASMSTART 8836; GFX940-NEXT: ; def s[0:5] 8837; GFX940-NEXT: ;;#ASMEND 8838; GFX940-NEXT: s_mov_b32 s8, s4 8839; GFX940-NEXT: s_mov_b32 s9, s5 8840; GFX940-NEXT: s_mov_b32 s10, s4 8841; GFX940-NEXT: s_mov_b32 s11, s5 8842; GFX940-NEXT: s_mov_b32 s12, s4 8843; GFX940-NEXT: s_mov_b32 s13, s5 8844; GFX940-NEXT: s_mov_b32 s14, s0 8845; GFX940-NEXT: s_mov_b32 s15, s1 8846; GFX940-NEXT: ;;#ASMSTART 8847; GFX940-NEXT: ; use s[8:15] 8848; GFX940-NEXT: ;;#ASMEND 8849; GFX940-NEXT: s_setpc_b64 s[30:31] 8850 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8851 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8852 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3> 8853 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8854 ret void 8855} 8856 8857define void @s_shuffle_v4i64_v3i64__5_5_5_4() { 8858; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4: 8859; GFX900: ; %bb.0: 8860; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8861; GFX900-NEXT: ;;#ASMSTART 8862; GFX900-NEXT: ; def s[12:17] 8863; GFX900-NEXT: ;;#ASMEND 8864; GFX900-NEXT: s_mov_b32 s8, s16 8865; GFX900-NEXT: s_mov_b32 s9, s17 8866; GFX900-NEXT: s_mov_b32 s10, s16 8867; GFX900-NEXT: s_mov_b32 s11, s17 8868; GFX900-NEXT: s_mov_b32 s12, s16 8869; GFX900-NEXT: s_mov_b32 s13, s17 8870; GFX900-NEXT: ;;#ASMSTART 8871; GFX900-NEXT: ; use s[8:15] 8872; GFX900-NEXT: ;;#ASMEND 8873; GFX900-NEXT: s_setpc_b64 s[30:31] 8874; 8875; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4: 8876; GFX90A: ; %bb.0: 8877; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8878; GFX90A-NEXT: ;;#ASMSTART 8879; GFX90A-NEXT: ; def s[12:17] 8880; GFX90A-NEXT: ;;#ASMEND 8881; GFX90A-NEXT: s_mov_b32 s8, s16 8882; GFX90A-NEXT: s_mov_b32 s9, s17 8883; GFX90A-NEXT: s_mov_b32 s10, s16 8884; GFX90A-NEXT: s_mov_b32 s11, s17 8885; GFX90A-NEXT: s_mov_b32 s12, s16 8886; GFX90A-NEXT: s_mov_b32 s13, s17 8887; GFX90A-NEXT: ;;#ASMSTART 8888; GFX90A-NEXT: ; use s[8:15] 8889; GFX90A-NEXT: ;;#ASMEND 8890; GFX90A-NEXT: s_setpc_b64 s[30:31] 8891; 8892; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4: 8893; GFX940: ; %bb.0: 8894; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8895; GFX940-NEXT: ;;#ASMSTART 8896; GFX940-NEXT: ; def s[0:5] 8897; GFX940-NEXT: ;;#ASMEND 8898; GFX940-NEXT: s_mov_b32 s8, s4 8899; GFX940-NEXT: s_mov_b32 s9, s5 8900; GFX940-NEXT: s_mov_b32 s10, s4 8901; GFX940-NEXT: s_mov_b32 s11, s5 8902; GFX940-NEXT: s_mov_b32 s12, s4 8903; GFX940-NEXT: s_mov_b32 s13, s5 8904; GFX940-NEXT: s_mov_b32 s14, s2 8905; GFX940-NEXT: s_mov_b32 s15, s3 8906; GFX940-NEXT: ;;#ASMSTART 8907; GFX940-NEXT: ; use s[8:15] 8908; GFX940-NEXT: ;;#ASMEND 8909; GFX940-NEXT: s_setpc_b64 s[30:31] 8910 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8911 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8912 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4> 8913 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8914 ret void 8915} 8916 8917define void @s_shuffle_v4i64_v3i64__5_5_5_5() { 8918; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5: 8919; GFX9: ; %bb.0: 8920; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8921; GFX9-NEXT: ;;#ASMSTART 8922; GFX9-NEXT: ; def s[8:13] 8923; GFX9-NEXT: ;;#ASMEND 8924; GFX9-NEXT: s_mov_b32 s8, s12 8925; GFX9-NEXT: s_mov_b32 s9, s13 8926; GFX9-NEXT: s_mov_b32 s10, s12 8927; GFX9-NEXT: s_mov_b32 s11, s13 8928; GFX9-NEXT: s_mov_b32 s14, s12 8929; GFX9-NEXT: s_mov_b32 s15, s13 8930; GFX9-NEXT: ;;#ASMSTART 8931; GFX9-NEXT: ; use s[8:15] 8932; GFX9-NEXT: ;;#ASMEND 8933; GFX9-NEXT: s_setpc_b64 s[30:31] 8934 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8935 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8936 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5> 8937 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8938 ret void 8939} 8940 8941define void @s_shuffle_v4i64_v3i64__u_0_0_0() { 8942; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0: 8943; GFX900: ; %bb.0: 8944; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8945; GFX900-NEXT: ;;#ASMSTART 8946; GFX900-NEXT: ; def s[4:9] 8947; GFX900-NEXT: ;;#ASMEND 8948; GFX900-NEXT: s_mov_b32 s10, s4 8949; GFX900-NEXT: s_mov_b32 s11, s5 8950; GFX900-NEXT: s_mov_b32 s12, s4 8951; GFX900-NEXT: s_mov_b32 s13, s5 8952; GFX900-NEXT: s_mov_b32 s14, s4 8953; GFX900-NEXT: s_mov_b32 s15, s5 8954; GFX900-NEXT: ;;#ASMSTART 8955; GFX900-NEXT: ; use s[8:15] 8956; GFX900-NEXT: ;;#ASMEND 8957; GFX900-NEXT: s_setpc_b64 s[30:31] 8958; 8959; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0: 8960; GFX90A: ; %bb.0: 8961; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8962; GFX90A-NEXT: ;;#ASMSTART 8963; GFX90A-NEXT: ; def s[4:9] 8964; GFX90A-NEXT: ;;#ASMEND 8965; GFX90A-NEXT: s_mov_b32 s10, s4 8966; GFX90A-NEXT: s_mov_b32 s11, s5 8967; GFX90A-NEXT: s_mov_b32 s12, s4 8968; GFX90A-NEXT: s_mov_b32 s13, s5 8969; GFX90A-NEXT: s_mov_b32 s14, s4 8970; GFX90A-NEXT: s_mov_b32 s15, s5 8971; GFX90A-NEXT: ;;#ASMSTART 8972; GFX90A-NEXT: ; use s[8:15] 8973; GFX90A-NEXT: ;;#ASMEND 8974; GFX90A-NEXT: s_setpc_b64 s[30:31] 8975; 8976; GFX940-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0: 8977; GFX940: ; %bb.0: 8978; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8979; GFX940-NEXT: ;;#ASMSTART 8980; GFX940-NEXT: ; def s[0:5] 8981; GFX940-NEXT: ;;#ASMEND 8982; GFX940-NEXT: s_mov_b32 s10, s0 8983; GFX940-NEXT: s_mov_b32 s11, s1 8984; GFX940-NEXT: s_mov_b32 s12, s0 8985; GFX940-NEXT: s_mov_b32 s13, s1 8986; GFX940-NEXT: s_mov_b32 s14, s0 8987; GFX940-NEXT: s_mov_b32 s15, s1 8988; GFX940-NEXT: ;;#ASMSTART 8989; GFX940-NEXT: ; use s[8:15] 8990; GFX940-NEXT: ;;#ASMEND 8991; GFX940-NEXT: s_setpc_b64 s[30:31] 8992 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8993 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0> 8994 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 8995 ret void 8996} 8997 8998define void @s_shuffle_v4i64_v3i64__0_0_0_0() { 8999; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0: 9000; GFX9: ; %bb.0: 9001; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9002; GFX9-NEXT: ;;#ASMSTART 9003; GFX9-NEXT: ; def s[8:13] 9004; GFX9-NEXT: ;;#ASMEND 9005; GFX9-NEXT: s_mov_b32 s10, s8 9006; GFX9-NEXT: s_mov_b32 s11, s9 9007; GFX9-NEXT: s_mov_b32 s12, s8 9008; GFX9-NEXT: s_mov_b32 s13, s9 9009; GFX9-NEXT: s_mov_b32 s14, s8 9010; GFX9-NEXT: s_mov_b32 s15, s9 9011; GFX9-NEXT: ;;#ASMSTART 9012; GFX9-NEXT: ; use s[8:15] 9013; GFX9-NEXT: ;;#ASMEND 9014; GFX9-NEXT: s_setpc_b64 s[30:31] 9015 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9016 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> zeroinitializer 9017 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9018 ret void 9019} 9020 9021define void @s_shuffle_v4i64_v3i64__1_0_0_0() { 9022; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0: 9023; GFX900: ; %bb.0: 9024; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9025; GFX900-NEXT: ;;#ASMSTART 9026; GFX900-NEXT: ; def s[4:9] 9027; GFX900-NEXT: ;;#ASMEND 9028; GFX900-NEXT: s_mov_b32 s8, s6 9029; GFX900-NEXT: s_mov_b32 s9, s7 9030; GFX900-NEXT: s_mov_b32 s10, s4 9031; GFX900-NEXT: s_mov_b32 s11, s5 9032; GFX900-NEXT: s_mov_b32 s12, s4 9033; GFX900-NEXT: s_mov_b32 s13, s5 9034; GFX900-NEXT: s_mov_b32 s14, s4 9035; GFX900-NEXT: s_mov_b32 s15, s5 9036; GFX900-NEXT: ;;#ASMSTART 9037; GFX900-NEXT: ; use s[8:15] 9038; GFX900-NEXT: ;;#ASMEND 9039; GFX900-NEXT: s_setpc_b64 s[30:31] 9040; 9041; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0: 9042; GFX90A: ; %bb.0: 9043; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9044; GFX90A-NEXT: ;;#ASMSTART 9045; GFX90A-NEXT: ; def s[4:9] 9046; GFX90A-NEXT: ;;#ASMEND 9047; GFX90A-NEXT: s_mov_b32 s8, s6 9048; GFX90A-NEXT: s_mov_b32 s9, s7 9049; GFX90A-NEXT: s_mov_b32 s10, s4 9050; GFX90A-NEXT: s_mov_b32 s11, s5 9051; GFX90A-NEXT: s_mov_b32 s12, s4 9052; GFX90A-NEXT: s_mov_b32 s13, s5 9053; GFX90A-NEXT: s_mov_b32 s14, s4 9054; GFX90A-NEXT: s_mov_b32 s15, s5 9055; GFX90A-NEXT: ;;#ASMSTART 9056; GFX90A-NEXT: ; use s[8:15] 9057; GFX90A-NEXT: ;;#ASMEND 9058; GFX90A-NEXT: s_setpc_b64 s[30:31] 9059; 9060; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0: 9061; GFX940: ; %bb.0: 9062; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9063; GFX940-NEXT: ;;#ASMSTART 9064; GFX940-NEXT: ; def s[0:5] 9065; GFX940-NEXT: ;;#ASMEND 9066; GFX940-NEXT: s_mov_b32 s8, s2 9067; GFX940-NEXT: s_mov_b32 s9, s3 9068; GFX940-NEXT: s_mov_b32 s10, s0 9069; GFX940-NEXT: s_mov_b32 s11, s1 9070; GFX940-NEXT: s_mov_b32 s12, s0 9071; GFX940-NEXT: s_mov_b32 s13, s1 9072; GFX940-NEXT: s_mov_b32 s14, s0 9073; GFX940-NEXT: s_mov_b32 s15, s1 9074; GFX940-NEXT: ;;#ASMSTART 9075; GFX940-NEXT: ; use s[8:15] 9076; GFX940-NEXT: ;;#ASMEND 9077; GFX940-NEXT: s_setpc_b64 s[30:31] 9078 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9079 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 9080 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9081 ret void 9082} 9083 9084define void @s_shuffle_v4i64_v3i64__2_0_0_0() { 9085; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0: 9086; GFX900: ; %bb.0: 9087; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9088; GFX900-NEXT: ;;#ASMSTART 9089; GFX900-NEXT: ; def s[4:9] 9090; GFX900-NEXT: ;;#ASMEND 9091; GFX900-NEXT: s_mov_b32 s10, s4 9092; GFX900-NEXT: s_mov_b32 s11, s5 9093; GFX900-NEXT: s_mov_b32 s12, s4 9094; GFX900-NEXT: s_mov_b32 s13, s5 9095; GFX900-NEXT: s_mov_b32 s14, s4 9096; GFX900-NEXT: s_mov_b32 s15, s5 9097; GFX900-NEXT: ;;#ASMSTART 9098; GFX900-NEXT: ; use s[8:15] 9099; GFX900-NEXT: ;;#ASMEND 9100; GFX900-NEXT: s_setpc_b64 s[30:31] 9101; 9102; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0: 9103; GFX90A: ; %bb.0: 9104; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9105; GFX90A-NEXT: ;;#ASMSTART 9106; GFX90A-NEXT: ; def s[4:9] 9107; GFX90A-NEXT: ;;#ASMEND 9108; GFX90A-NEXT: s_mov_b32 s10, s4 9109; GFX90A-NEXT: s_mov_b32 s11, s5 9110; GFX90A-NEXT: s_mov_b32 s12, s4 9111; GFX90A-NEXT: s_mov_b32 s13, s5 9112; GFX90A-NEXT: s_mov_b32 s14, s4 9113; GFX90A-NEXT: s_mov_b32 s15, s5 9114; GFX90A-NEXT: ;;#ASMSTART 9115; GFX90A-NEXT: ; use s[8:15] 9116; GFX90A-NEXT: ;;#ASMEND 9117; GFX90A-NEXT: s_setpc_b64 s[30:31] 9118; 9119; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0: 9120; GFX940: ; %bb.0: 9121; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9122; GFX940-NEXT: ;;#ASMSTART 9123; GFX940-NEXT: ; def s[0:5] 9124; GFX940-NEXT: ;;#ASMEND 9125; GFX940-NEXT: s_mov_b32 s8, s4 9126; GFX940-NEXT: s_mov_b32 s9, s5 9127; GFX940-NEXT: s_mov_b32 s10, s0 9128; GFX940-NEXT: s_mov_b32 s11, s1 9129; GFX940-NEXT: s_mov_b32 s12, s0 9130; GFX940-NEXT: s_mov_b32 s13, s1 9131; GFX940-NEXT: s_mov_b32 s14, s0 9132; GFX940-NEXT: s_mov_b32 s15, s1 9133; GFX940-NEXT: ;;#ASMSTART 9134; GFX940-NEXT: ; use s[8:15] 9135; GFX940-NEXT: ;;#ASMEND 9136; GFX940-NEXT: s_setpc_b64 s[30:31] 9137 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9138 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0> 9139 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9140 ret void 9141} 9142 9143define void @s_shuffle_v4i64_v3i64__3_0_0_0() { 9144; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0: 9145; GFX900: ; %bb.0: 9146; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9147; GFX900-NEXT: ;;#ASMSTART 9148; GFX900-NEXT: ; def s[4:9] 9149; GFX900-NEXT: ;;#ASMEND 9150; GFX900-NEXT: s_mov_b32 s10, s4 9151; GFX900-NEXT: s_mov_b32 s11, s5 9152; GFX900-NEXT: s_mov_b32 s12, s4 9153; GFX900-NEXT: s_mov_b32 s13, s5 9154; GFX900-NEXT: s_mov_b32 s14, s4 9155; GFX900-NEXT: s_mov_b32 s15, s5 9156; GFX900-NEXT: ;;#ASMSTART 9157; GFX900-NEXT: ; use s[8:15] 9158; GFX900-NEXT: ;;#ASMEND 9159; GFX900-NEXT: s_setpc_b64 s[30:31] 9160; 9161; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0: 9162; GFX90A: ; %bb.0: 9163; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9164; GFX90A-NEXT: ;;#ASMSTART 9165; GFX90A-NEXT: ; def s[4:9] 9166; GFX90A-NEXT: ;;#ASMEND 9167; GFX90A-NEXT: s_mov_b32 s10, s4 9168; GFX90A-NEXT: s_mov_b32 s11, s5 9169; GFX90A-NEXT: s_mov_b32 s12, s4 9170; GFX90A-NEXT: s_mov_b32 s13, s5 9171; GFX90A-NEXT: s_mov_b32 s14, s4 9172; GFX90A-NEXT: s_mov_b32 s15, s5 9173; GFX90A-NEXT: ;;#ASMSTART 9174; GFX90A-NEXT: ; use s[8:15] 9175; GFX90A-NEXT: ;;#ASMEND 9176; GFX90A-NEXT: s_setpc_b64 s[30:31] 9177; 9178; GFX940-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0: 9179; GFX940: ; %bb.0: 9180; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9181; GFX940-NEXT: ;;#ASMSTART 9182; GFX940-NEXT: ; def s[0:5] 9183; GFX940-NEXT: ;;#ASMEND 9184; GFX940-NEXT: s_mov_b32 s10, s0 9185; GFX940-NEXT: s_mov_b32 s11, s1 9186; GFX940-NEXT: s_mov_b32 s12, s0 9187; GFX940-NEXT: s_mov_b32 s13, s1 9188; GFX940-NEXT: s_mov_b32 s14, s0 9189; GFX940-NEXT: s_mov_b32 s15, s1 9190; GFX940-NEXT: ;;#ASMSTART 9191; GFX940-NEXT: ; use s[8:15] 9192; GFX940-NEXT: ;;#ASMEND 9193; GFX940-NEXT: s_setpc_b64 s[30:31] 9194 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9195 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 9196 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9197 ret void 9198} 9199 9200define void @s_shuffle_v4i64_v3i64__4_0_0_0() { 9201; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0: 9202; GFX900: ; %bb.0: 9203; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9204; GFX900-NEXT: ;;#ASMSTART 9205; GFX900-NEXT: ; def s[4:9] 9206; GFX900-NEXT: ;;#ASMEND 9207; GFX900-NEXT: ;;#ASMSTART 9208; GFX900-NEXT: ; def s[8:13] 9209; GFX900-NEXT: ;;#ASMEND 9210; GFX900-NEXT: s_mov_b32 s8, s10 9211; GFX900-NEXT: s_mov_b32 s9, s11 9212; GFX900-NEXT: s_mov_b32 s10, s4 9213; GFX900-NEXT: s_mov_b32 s11, s5 9214; GFX900-NEXT: s_mov_b32 s12, s4 9215; GFX900-NEXT: s_mov_b32 s13, s5 9216; GFX900-NEXT: s_mov_b32 s14, s4 9217; GFX900-NEXT: s_mov_b32 s15, s5 9218; GFX900-NEXT: ;;#ASMSTART 9219; GFX900-NEXT: ; use s[8:15] 9220; GFX900-NEXT: ;;#ASMEND 9221; GFX900-NEXT: s_setpc_b64 s[30:31] 9222; 9223; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0: 9224; GFX90A: ; %bb.0: 9225; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9226; GFX90A-NEXT: ;;#ASMSTART 9227; GFX90A-NEXT: ; def s[4:9] 9228; GFX90A-NEXT: ;;#ASMEND 9229; GFX90A-NEXT: ;;#ASMSTART 9230; GFX90A-NEXT: ; def s[8:13] 9231; GFX90A-NEXT: ;;#ASMEND 9232; GFX90A-NEXT: s_mov_b32 s8, s10 9233; GFX90A-NEXT: s_mov_b32 s9, s11 9234; GFX90A-NEXT: s_mov_b32 s10, s4 9235; GFX90A-NEXT: s_mov_b32 s11, s5 9236; GFX90A-NEXT: s_mov_b32 s12, s4 9237; GFX90A-NEXT: s_mov_b32 s13, s5 9238; GFX90A-NEXT: s_mov_b32 s14, s4 9239; GFX90A-NEXT: s_mov_b32 s15, s5 9240; GFX90A-NEXT: ;;#ASMSTART 9241; GFX90A-NEXT: ; use s[8:15] 9242; GFX90A-NEXT: ;;#ASMEND 9243; GFX90A-NEXT: s_setpc_b64 s[30:31] 9244; 9245; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0: 9246; GFX940: ; %bb.0: 9247; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9248; GFX940-NEXT: ;;#ASMSTART 9249; GFX940-NEXT: ; def s[0:5] 9250; GFX940-NEXT: ;;#ASMEND 9251; GFX940-NEXT: s_mov_b32 s10, s0 9252; GFX940-NEXT: ;;#ASMSTART 9253; GFX940-NEXT: ; def s[4:9] 9254; GFX940-NEXT: ;;#ASMEND 9255; GFX940-NEXT: s_mov_b32 s8, s6 9256; GFX940-NEXT: s_mov_b32 s9, s7 9257; GFX940-NEXT: s_mov_b32 s11, s1 9258; GFX940-NEXT: s_mov_b32 s12, s0 9259; GFX940-NEXT: s_mov_b32 s13, s1 9260; GFX940-NEXT: s_mov_b32 s14, s0 9261; GFX940-NEXT: s_mov_b32 s15, s1 9262; GFX940-NEXT: ;;#ASMSTART 9263; GFX940-NEXT: ; use s[8:15] 9264; GFX940-NEXT: ;;#ASMEND 9265; GFX940-NEXT: s_setpc_b64 s[30:31] 9266 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9267 %vec1 = call <3 x i64> asm "; def $0", "=s"() 9268 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0> 9269 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9270 ret void 9271} 9272 9273define void @s_shuffle_v4i64_v3i64__5_0_0_0() { 9274; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0: 9275; GFX900: ; %bb.0: 9276; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9277; GFX900-NEXT: ;;#ASMSTART 9278; GFX900-NEXT: ; def s[4:9] 9279; GFX900-NEXT: ;;#ASMEND 9280; GFX900-NEXT: ;;#ASMSTART 9281; GFX900-NEXT: ; def s[8:13] 9282; GFX900-NEXT: ;;#ASMEND 9283; GFX900-NEXT: s_mov_b32 s8, s12 9284; GFX900-NEXT: s_mov_b32 s9, s13 9285; GFX900-NEXT: s_mov_b32 s10, s4 9286; GFX900-NEXT: s_mov_b32 s11, s5 9287; GFX900-NEXT: s_mov_b32 s12, s4 9288; GFX900-NEXT: s_mov_b32 s13, s5 9289; GFX900-NEXT: s_mov_b32 s14, s4 9290; GFX900-NEXT: s_mov_b32 s15, s5 9291; GFX900-NEXT: ;;#ASMSTART 9292; GFX900-NEXT: ; use s[8:15] 9293; GFX900-NEXT: ;;#ASMEND 9294; GFX900-NEXT: s_setpc_b64 s[30:31] 9295; 9296; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0: 9297; GFX90A: ; %bb.0: 9298; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9299; GFX90A-NEXT: ;;#ASMSTART 9300; GFX90A-NEXT: ; def s[4:9] 9301; GFX90A-NEXT: ;;#ASMEND 9302; GFX90A-NEXT: ;;#ASMSTART 9303; GFX90A-NEXT: ; def s[8:13] 9304; GFX90A-NEXT: ;;#ASMEND 9305; GFX90A-NEXT: s_mov_b32 s8, s12 9306; GFX90A-NEXT: s_mov_b32 s9, s13 9307; GFX90A-NEXT: s_mov_b32 s10, s4 9308; GFX90A-NEXT: s_mov_b32 s11, s5 9309; GFX90A-NEXT: s_mov_b32 s12, s4 9310; GFX90A-NEXT: s_mov_b32 s13, s5 9311; GFX90A-NEXT: s_mov_b32 s14, s4 9312; GFX90A-NEXT: s_mov_b32 s15, s5 9313; GFX90A-NEXT: ;;#ASMSTART 9314; GFX90A-NEXT: ; use s[8:15] 9315; GFX90A-NEXT: ;;#ASMEND 9316; GFX90A-NEXT: s_setpc_b64 s[30:31] 9317; 9318; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0: 9319; GFX940: ; %bb.0: 9320; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9321; GFX940-NEXT: ;;#ASMSTART 9322; GFX940-NEXT: ; def s[0:5] 9323; GFX940-NEXT: ;;#ASMEND 9324; GFX940-NEXT: s_mov_b32 s10, s0 9325; GFX940-NEXT: ;;#ASMSTART 9326; GFX940-NEXT: ; def s[4:9] 9327; GFX940-NEXT: ;;#ASMEND 9328; GFX940-NEXT: s_mov_b32 s11, s1 9329; GFX940-NEXT: s_mov_b32 s12, s0 9330; GFX940-NEXT: s_mov_b32 s13, s1 9331; GFX940-NEXT: s_mov_b32 s14, s0 9332; GFX940-NEXT: s_mov_b32 s15, s1 9333; GFX940-NEXT: ;;#ASMSTART 9334; GFX940-NEXT: ; use s[8:15] 9335; GFX940-NEXT: ;;#ASMEND 9336; GFX940-NEXT: s_setpc_b64 s[30:31] 9337 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9338 %vec1 = call <3 x i64> asm "; def $0", "=s"() 9339 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0> 9340 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9341 ret void 9342} 9343 9344define void @s_shuffle_v4i64_v3i64__5_u_0_0() { 9345; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0: 9346; GFX900: ; %bb.0: 9347; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9348; GFX900-NEXT: ;;#ASMSTART 9349; GFX900-NEXT: ; def s[4:9] 9350; GFX900-NEXT: ;;#ASMEND 9351; GFX900-NEXT: ;;#ASMSTART 9352; GFX900-NEXT: ; def s[8:13] 9353; GFX900-NEXT: ;;#ASMEND 9354; GFX900-NEXT: s_mov_b32 s8, s12 9355; GFX900-NEXT: s_mov_b32 s9, s13 9356; GFX900-NEXT: s_mov_b32 s12, s4 9357; GFX900-NEXT: s_mov_b32 s13, s5 9358; GFX900-NEXT: s_mov_b32 s14, s4 9359; GFX900-NEXT: s_mov_b32 s15, s5 9360; GFX900-NEXT: ;;#ASMSTART 9361; GFX900-NEXT: ; use s[8:15] 9362; GFX900-NEXT: ;;#ASMEND 9363; GFX900-NEXT: s_setpc_b64 s[30:31] 9364; 9365; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0: 9366; GFX90A: ; %bb.0: 9367; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9368; GFX90A-NEXT: ;;#ASMSTART 9369; GFX90A-NEXT: ; def s[4:9] 9370; GFX90A-NEXT: ;;#ASMEND 9371; GFX90A-NEXT: ;;#ASMSTART 9372; GFX90A-NEXT: ; def s[8:13] 9373; GFX90A-NEXT: ;;#ASMEND 9374; GFX90A-NEXT: s_mov_b32 s8, s12 9375; GFX90A-NEXT: s_mov_b32 s9, s13 9376; GFX90A-NEXT: s_mov_b32 s12, s4 9377; GFX90A-NEXT: s_mov_b32 s13, s5 9378; GFX90A-NEXT: s_mov_b32 s14, s4 9379; GFX90A-NEXT: s_mov_b32 s15, s5 9380; GFX90A-NEXT: ;;#ASMSTART 9381; GFX90A-NEXT: ; use s[8:15] 9382; GFX90A-NEXT: ;;#ASMEND 9383; GFX90A-NEXT: s_setpc_b64 s[30:31] 9384; 9385; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0: 9386; GFX940: ; %bb.0: 9387; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9388; GFX940-NEXT: ;;#ASMSTART 9389; GFX940-NEXT: ; def s[0:5] 9390; GFX940-NEXT: ;;#ASMEND 9391; GFX940-NEXT: s_mov_b32 s12, s0 9392; GFX940-NEXT: ;;#ASMSTART 9393; GFX940-NEXT: ; def s[4:9] 9394; GFX940-NEXT: ;;#ASMEND 9395; GFX940-NEXT: s_mov_b32 s13, s1 9396; GFX940-NEXT: s_mov_b32 s14, s0 9397; GFX940-NEXT: s_mov_b32 s15, s1 9398; GFX940-NEXT: ;;#ASMSTART 9399; GFX940-NEXT: ; use s[8:15] 9400; GFX940-NEXT: ;;#ASMEND 9401; GFX940-NEXT: s_setpc_b64 s[30:31] 9402 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9403 %vec1 = call <3 x i64> asm "; def $0", "=s"() 9404 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0> 9405 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9406 ret void 9407} 9408 9409define void @s_shuffle_v4i64_v3i64__5_1_0_0() { 9410; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0: 9411; GFX900: ; %bb.0: 9412; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9413; GFX900-NEXT: ;;#ASMSTART 9414; GFX900-NEXT: ; def s[4:9] 9415; GFX900-NEXT: ;;#ASMEND 9416; GFX900-NEXT: ;;#ASMSTART 9417; GFX900-NEXT: ; def s[8:13] 9418; GFX900-NEXT: ;;#ASMEND 9419; GFX900-NEXT: s_mov_b32 s8, s12 9420; GFX900-NEXT: s_mov_b32 s9, s13 9421; GFX900-NEXT: s_mov_b32 s10, s6 9422; GFX900-NEXT: s_mov_b32 s11, s7 9423; GFX900-NEXT: s_mov_b32 s12, s4 9424; GFX900-NEXT: s_mov_b32 s13, s5 9425; GFX900-NEXT: s_mov_b32 s14, s4 9426; GFX900-NEXT: s_mov_b32 s15, s5 9427; GFX900-NEXT: ;;#ASMSTART 9428; GFX900-NEXT: ; use s[8:15] 9429; GFX900-NEXT: ;;#ASMEND 9430; GFX900-NEXT: s_setpc_b64 s[30:31] 9431; 9432; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0: 9433; GFX90A: ; %bb.0: 9434; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9435; GFX90A-NEXT: ;;#ASMSTART 9436; GFX90A-NEXT: ; def s[4:9] 9437; GFX90A-NEXT: ;;#ASMEND 9438; GFX90A-NEXT: ;;#ASMSTART 9439; GFX90A-NEXT: ; def s[8:13] 9440; GFX90A-NEXT: ;;#ASMEND 9441; GFX90A-NEXT: s_mov_b32 s8, s12 9442; GFX90A-NEXT: s_mov_b32 s9, s13 9443; GFX90A-NEXT: s_mov_b32 s10, s6 9444; GFX90A-NEXT: s_mov_b32 s11, s7 9445; GFX90A-NEXT: s_mov_b32 s12, s4 9446; GFX90A-NEXT: s_mov_b32 s13, s5 9447; GFX90A-NEXT: s_mov_b32 s14, s4 9448; GFX90A-NEXT: s_mov_b32 s15, s5 9449; GFX90A-NEXT: ;;#ASMSTART 9450; GFX90A-NEXT: ; use s[8:15] 9451; GFX90A-NEXT: ;;#ASMEND 9452; GFX90A-NEXT: s_setpc_b64 s[30:31] 9453; 9454; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0: 9455; GFX940: ; %bb.0: 9456; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9457; GFX940-NEXT: ;;#ASMSTART 9458; GFX940-NEXT: ; def s[0:5] 9459; GFX940-NEXT: ;;#ASMEND 9460; GFX940-NEXT: s_mov_b32 s10, s2 9461; GFX940-NEXT: ;;#ASMSTART 9462; GFX940-NEXT: ; def s[4:9] 9463; GFX940-NEXT: ;;#ASMEND 9464; GFX940-NEXT: s_mov_b32 s11, s3 9465; GFX940-NEXT: s_mov_b32 s12, s0 9466; GFX940-NEXT: s_mov_b32 s13, s1 9467; GFX940-NEXT: s_mov_b32 s14, s0 9468; GFX940-NEXT: s_mov_b32 s15, s1 9469; GFX940-NEXT: ;;#ASMSTART 9470; GFX940-NEXT: ; use s[8:15] 9471; GFX940-NEXT: ;;#ASMEND 9472; GFX940-NEXT: s_setpc_b64 s[30:31] 9473 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9474 %vec1 = call <3 x i64> asm "; def $0", "=s"() 9475 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0> 9476 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9477 ret void 9478} 9479 9480define void @s_shuffle_v4i64_v3i64__5_2_0_0() { 9481; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0: 9482; GFX900: ; %bb.0: 9483; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9484; GFX900-NEXT: ;;#ASMSTART 9485; GFX900-NEXT: ; def s[16:21] 9486; GFX900-NEXT: ;;#ASMEND 9487; GFX900-NEXT: ;;#ASMSTART 9488; GFX900-NEXT: ; def s[4:9] 9489; GFX900-NEXT: ;;#ASMEND 9490; GFX900-NEXT: s_mov_b32 s10, s20 9491; GFX900-NEXT: s_mov_b32 s11, s21 9492; GFX900-NEXT: s_mov_b32 s12, s16 9493; GFX900-NEXT: s_mov_b32 s13, s17 9494; GFX900-NEXT: s_mov_b32 s14, s16 9495; GFX900-NEXT: s_mov_b32 s15, s17 9496; GFX900-NEXT: ;;#ASMSTART 9497; GFX900-NEXT: ; use s[8:15] 9498; GFX900-NEXT: ;;#ASMEND 9499; GFX900-NEXT: s_setpc_b64 s[30:31] 9500; 9501; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0: 9502; GFX90A: ; %bb.0: 9503; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9504; GFX90A-NEXT: ;;#ASMSTART 9505; GFX90A-NEXT: ; def s[16:21] 9506; GFX90A-NEXT: ;;#ASMEND 9507; GFX90A-NEXT: ;;#ASMSTART 9508; GFX90A-NEXT: ; def s[4:9] 9509; GFX90A-NEXT: ;;#ASMEND 9510; GFX90A-NEXT: s_mov_b32 s10, s20 9511; GFX90A-NEXT: s_mov_b32 s11, s21 9512; GFX90A-NEXT: s_mov_b32 s12, s16 9513; GFX90A-NEXT: s_mov_b32 s13, s17 9514; GFX90A-NEXT: s_mov_b32 s14, s16 9515; GFX90A-NEXT: s_mov_b32 s15, s17 9516; GFX90A-NEXT: ;;#ASMSTART 9517; GFX90A-NEXT: ; use s[8:15] 9518; GFX90A-NEXT: ;;#ASMEND 9519; GFX90A-NEXT: s_setpc_b64 s[30:31] 9520; 9521; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0: 9522; GFX940: ; %bb.0: 9523; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9524; GFX940-NEXT: ;;#ASMSTART 9525; GFX940-NEXT: ; def s[8:13] 9526; GFX940-NEXT: ;;#ASMEND 9527; GFX940-NEXT: ;;#ASMSTART 9528; GFX940-NEXT: ; def s[0:5] 9529; GFX940-NEXT: ;;#ASMEND 9530; GFX940-NEXT: s_mov_b32 s8, s12 9531; GFX940-NEXT: s_mov_b32 s9, s13 9532; GFX940-NEXT: s_mov_b32 s10, s4 9533; GFX940-NEXT: s_mov_b32 s11, s5 9534; GFX940-NEXT: s_mov_b32 s12, s0 9535; GFX940-NEXT: s_mov_b32 s13, s1 9536; GFX940-NEXT: s_mov_b32 s14, s0 9537; GFX940-NEXT: s_mov_b32 s15, s1 9538; GFX940-NEXT: ;;#ASMSTART 9539; GFX940-NEXT: ; use s[8:15] 9540; GFX940-NEXT: ;;#ASMEND 9541; GFX940-NEXT: s_setpc_b64 s[30:31] 9542 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9543 %vec1 = call <3 x i64> asm "; def $0", "=s"() 9544 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0> 9545 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9546 ret void 9547} 9548 9549define void @s_shuffle_v4i64_v3i64__5_3_0_0() { 9550; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0: 9551; GFX900: ; %bb.0: 9552; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9553; GFX900-NEXT: ;;#ASMSTART 9554; GFX900-NEXT: ; def s[4:9] 9555; GFX900-NEXT: ;;#ASMEND 9556; GFX900-NEXT: ;;#ASMSTART 9557; GFX900-NEXT: ; def s[12:17] 9558; GFX900-NEXT: ;;#ASMEND 9559; GFX900-NEXT: s_mov_b32 s8, s16 9560; GFX900-NEXT: s_mov_b32 s9, s17 9561; GFX900-NEXT: s_mov_b32 s10, s12 9562; GFX900-NEXT: s_mov_b32 s11, s13 9563; GFX900-NEXT: s_mov_b32 s12, s4 9564; GFX900-NEXT: s_mov_b32 s13, s5 9565; GFX900-NEXT: s_mov_b32 s14, s4 9566; GFX900-NEXT: s_mov_b32 s15, s5 9567; GFX900-NEXT: ;;#ASMSTART 9568; GFX900-NEXT: ; use s[8:15] 9569; GFX900-NEXT: ;;#ASMEND 9570; GFX900-NEXT: s_setpc_b64 s[30:31] 9571; 9572; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0: 9573; GFX90A: ; %bb.0: 9574; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9575; GFX90A-NEXT: ;;#ASMSTART 9576; GFX90A-NEXT: ; def s[4:9] 9577; GFX90A-NEXT: ;;#ASMEND 9578; GFX90A-NEXT: ;;#ASMSTART 9579; GFX90A-NEXT: ; def s[12:17] 9580; GFX90A-NEXT: ;;#ASMEND 9581; GFX90A-NEXT: s_mov_b32 s8, s16 9582; GFX90A-NEXT: s_mov_b32 s9, s17 9583; GFX90A-NEXT: s_mov_b32 s10, s12 9584; GFX90A-NEXT: s_mov_b32 s11, s13 9585; GFX90A-NEXT: s_mov_b32 s12, s4 9586; GFX90A-NEXT: s_mov_b32 s13, s5 9587; GFX90A-NEXT: s_mov_b32 s14, s4 9588; GFX90A-NEXT: s_mov_b32 s15, s5 9589; GFX90A-NEXT: ;;#ASMSTART 9590; GFX90A-NEXT: ; use s[8:15] 9591; GFX90A-NEXT: ;;#ASMEND 9592; GFX90A-NEXT: s_setpc_b64 s[30:31] 9593; 9594; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0: 9595; GFX940: ; %bb.0: 9596; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9597; GFX940-NEXT: ;;#ASMSTART 9598; GFX940-NEXT: ; def s[0:5] 9599; GFX940-NEXT: ;;#ASMEND 9600; GFX940-NEXT: s_mov_b32 s12, s0 9601; GFX940-NEXT: ;;#ASMSTART 9602; GFX940-NEXT: ; def s[4:9] 9603; GFX940-NEXT: ;;#ASMEND 9604; GFX940-NEXT: s_mov_b32 s10, s4 9605; GFX940-NEXT: s_mov_b32 s11, s5 9606; GFX940-NEXT: s_mov_b32 s13, s1 9607; GFX940-NEXT: s_mov_b32 s14, s0 9608; GFX940-NEXT: s_mov_b32 s15, s1 9609; GFX940-NEXT: ;;#ASMSTART 9610; GFX940-NEXT: ; use s[8:15] 9611; GFX940-NEXT: ;;#ASMEND 9612; GFX940-NEXT: s_setpc_b64 s[30:31] 9613 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9614 %vec1 = call <3 x i64> asm "; def $0", "=s"() 9615 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0> 9616 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9617 ret void 9618} 9619 9620define void @s_shuffle_v4i64_v3i64__5_4_0_0() { 9621; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0: 9622; GFX900: ; %bb.0: 9623; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9624; GFX900-NEXT: ;;#ASMSTART 9625; GFX900-NEXT: ; def s[4:9] 9626; GFX900-NEXT: ;;#ASMEND 9627; GFX900-NEXT: ;;#ASMSTART 9628; GFX900-NEXT: ; def s[8:13] 9629; GFX900-NEXT: ;;#ASMEND 9630; GFX900-NEXT: s_mov_b32 s8, s12 9631; GFX900-NEXT: s_mov_b32 s9, s13 9632; GFX900-NEXT: s_mov_b32 s12, s4 9633; GFX900-NEXT: s_mov_b32 s13, s5 9634; GFX900-NEXT: s_mov_b32 s14, s4 9635; GFX900-NEXT: s_mov_b32 s15, s5 9636; GFX900-NEXT: ;;#ASMSTART 9637; GFX900-NEXT: ; use s[8:15] 9638; GFX900-NEXT: ;;#ASMEND 9639; GFX900-NEXT: s_setpc_b64 s[30:31] 9640; 9641; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0: 9642; GFX90A: ; %bb.0: 9643; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9644; GFX90A-NEXT: ;;#ASMSTART 9645; GFX90A-NEXT: ; def s[4:9] 9646; GFX90A-NEXT: ;;#ASMEND 9647; GFX90A-NEXT: ;;#ASMSTART 9648; GFX90A-NEXT: ; def s[8:13] 9649; GFX90A-NEXT: ;;#ASMEND 9650; GFX90A-NEXT: s_mov_b32 s8, s12 9651; GFX90A-NEXT: s_mov_b32 s9, s13 9652; GFX90A-NEXT: s_mov_b32 s12, s4 9653; GFX90A-NEXT: s_mov_b32 s13, s5 9654; GFX90A-NEXT: s_mov_b32 s14, s4 9655; GFX90A-NEXT: s_mov_b32 s15, s5 9656; GFX90A-NEXT: ;;#ASMSTART 9657; GFX90A-NEXT: ; use s[8:15] 9658; GFX90A-NEXT: ;;#ASMEND 9659; GFX90A-NEXT: s_setpc_b64 s[30:31] 9660; 9661; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0: 9662; GFX940: ; %bb.0: 9663; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9664; GFX940-NEXT: ;;#ASMSTART 9665; GFX940-NEXT: ; def s[8:13] 9666; GFX940-NEXT: ;;#ASMEND 9667; GFX940-NEXT: ;;#ASMSTART 9668; GFX940-NEXT: ; def s[0:5] 9669; GFX940-NEXT: ;;#ASMEND 9670; GFX940-NEXT: s_mov_b32 s8, s12 9671; GFX940-NEXT: s_mov_b32 s9, s13 9672; GFX940-NEXT: s_mov_b32 s12, s0 9673; GFX940-NEXT: s_mov_b32 s13, s1 9674; GFX940-NEXT: s_mov_b32 s14, s0 9675; GFX940-NEXT: s_mov_b32 s15, s1 9676; GFX940-NEXT: ;;#ASMSTART 9677; GFX940-NEXT: ; use s[8:15] 9678; GFX940-NEXT: ;;#ASMEND 9679; GFX940-NEXT: s_setpc_b64 s[30:31] 9680 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9681 %vec1 = call <3 x i64> asm "; def $0", "=s"() 9682 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0> 9683 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9684 ret void 9685} 9686 9687define void @s_shuffle_v4i64_v3i64__5_5_0_0() { 9688; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0: 9689; GFX900: ; %bb.0: 9690; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9691; GFX900-NEXT: ;;#ASMSTART 9692; GFX900-NEXT: ; def s[4:9] 9693; GFX900-NEXT: ;;#ASMEND 9694; GFX900-NEXT: ;;#ASMSTART 9695; GFX900-NEXT: ; def s[8:13] 9696; GFX900-NEXT: ;;#ASMEND 9697; GFX900-NEXT: s_mov_b32 s8, s12 9698; GFX900-NEXT: s_mov_b32 s9, s13 9699; GFX900-NEXT: s_mov_b32 s10, s12 9700; GFX900-NEXT: s_mov_b32 s11, s13 9701; GFX900-NEXT: s_mov_b32 s12, s4 9702; GFX900-NEXT: s_mov_b32 s13, s5 9703; GFX900-NEXT: s_mov_b32 s14, s4 9704; GFX900-NEXT: s_mov_b32 s15, s5 9705; GFX900-NEXT: ;;#ASMSTART 9706; GFX900-NEXT: ; use s[8:15] 9707; GFX900-NEXT: ;;#ASMEND 9708; GFX900-NEXT: s_setpc_b64 s[30:31] 9709; 9710; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0: 9711; GFX90A: ; %bb.0: 9712; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9713; GFX90A-NEXT: ;;#ASMSTART 9714; GFX90A-NEXT: ; def s[4:9] 9715; GFX90A-NEXT: ;;#ASMEND 9716; GFX90A-NEXT: ;;#ASMSTART 9717; GFX90A-NEXT: ; def s[8:13] 9718; GFX90A-NEXT: ;;#ASMEND 9719; GFX90A-NEXT: s_mov_b32 s8, s12 9720; GFX90A-NEXT: s_mov_b32 s9, s13 9721; GFX90A-NEXT: s_mov_b32 s10, s12 9722; GFX90A-NEXT: s_mov_b32 s11, s13 9723; GFX90A-NEXT: s_mov_b32 s12, s4 9724; GFX90A-NEXT: s_mov_b32 s13, s5 9725; GFX90A-NEXT: s_mov_b32 s14, s4 9726; GFX90A-NEXT: s_mov_b32 s15, s5 9727; GFX90A-NEXT: ;;#ASMSTART 9728; GFX90A-NEXT: ; use s[8:15] 9729; GFX90A-NEXT: ;;#ASMEND 9730; GFX90A-NEXT: s_setpc_b64 s[30:31] 9731; 9732; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0: 9733; GFX940: ; %bb.0: 9734; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9735; GFX940-NEXT: ;;#ASMSTART 9736; GFX940-NEXT: ; def s[8:13] 9737; GFX940-NEXT: ;;#ASMEND 9738; GFX940-NEXT: ;;#ASMSTART 9739; GFX940-NEXT: ; def s[0:5] 9740; GFX940-NEXT: ;;#ASMEND 9741; GFX940-NEXT: s_mov_b32 s8, s12 9742; GFX940-NEXT: s_mov_b32 s9, s13 9743; GFX940-NEXT: s_mov_b32 s10, s12 9744; GFX940-NEXT: s_mov_b32 s11, s13 9745; GFX940-NEXT: s_mov_b32 s12, s0 9746; GFX940-NEXT: s_mov_b32 s13, s1 9747; GFX940-NEXT: s_mov_b32 s14, s0 9748; GFX940-NEXT: s_mov_b32 s15, s1 9749; GFX940-NEXT: ;;#ASMSTART 9750; GFX940-NEXT: ; use s[8:15] 9751; GFX940-NEXT: ;;#ASMEND 9752; GFX940-NEXT: s_setpc_b64 s[30:31] 9753 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9754 %vec1 = call <3 x i64> asm "; def $0", "=s"() 9755 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0> 9756 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9757 ret void 9758} 9759 9760define void @s_shuffle_v4i64_v3i64__5_5_u_0() { 9761; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0: 9762; GFX900: ; %bb.0: 9763; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9764; GFX900-NEXT: ;;#ASMSTART 9765; GFX900-NEXT: ; def s[4:9] 9766; GFX900-NEXT: ;;#ASMEND 9767; GFX900-NEXT: ;;#ASMSTART 9768; GFX900-NEXT: ; def s[8:13] 9769; GFX900-NEXT: ;;#ASMEND 9770; GFX900-NEXT: s_mov_b32 s8, s12 9771; GFX900-NEXT: s_mov_b32 s9, s13 9772; GFX900-NEXT: s_mov_b32 s10, s12 9773; GFX900-NEXT: s_mov_b32 s11, s13 9774; GFX900-NEXT: s_mov_b32 s14, s4 9775; GFX900-NEXT: s_mov_b32 s15, s5 9776; GFX900-NEXT: ;;#ASMSTART 9777; GFX900-NEXT: ; use s[8:15] 9778; GFX900-NEXT: ;;#ASMEND 9779; GFX900-NEXT: s_setpc_b64 s[30:31] 9780; 9781; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0: 9782; GFX90A: ; %bb.0: 9783; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9784; GFX90A-NEXT: ;;#ASMSTART 9785; GFX90A-NEXT: ; def s[4:9] 9786; GFX90A-NEXT: ;;#ASMEND 9787; GFX90A-NEXT: ;;#ASMSTART 9788; GFX90A-NEXT: ; def s[8:13] 9789; GFX90A-NEXT: ;;#ASMEND 9790; GFX90A-NEXT: s_mov_b32 s8, s12 9791; GFX90A-NEXT: s_mov_b32 s9, s13 9792; GFX90A-NEXT: s_mov_b32 s10, s12 9793; GFX90A-NEXT: s_mov_b32 s11, s13 9794; GFX90A-NEXT: s_mov_b32 s14, s4 9795; GFX90A-NEXT: s_mov_b32 s15, s5 9796; GFX90A-NEXT: ;;#ASMSTART 9797; GFX90A-NEXT: ; use s[8:15] 9798; GFX90A-NEXT: ;;#ASMEND 9799; GFX90A-NEXT: s_setpc_b64 s[30:31] 9800; 9801; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0: 9802; GFX940: ; %bb.0: 9803; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9804; GFX940-NEXT: ;;#ASMSTART 9805; GFX940-NEXT: ; def s[8:13] 9806; GFX940-NEXT: ;;#ASMEND 9807; GFX940-NEXT: ;;#ASMSTART 9808; GFX940-NEXT: ; def s[0:5] 9809; GFX940-NEXT: ;;#ASMEND 9810; GFX940-NEXT: s_mov_b32 s8, s12 9811; GFX940-NEXT: s_mov_b32 s9, s13 9812; GFX940-NEXT: s_mov_b32 s10, s12 9813; GFX940-NEXT: s_mov_b32 s11, s13 9814; GFX940-NEXT: s_mov_b32 s14, s0 9815; GFX940-NEXT: s_mov_b32 s15, s1 9816; GFX940-NEXT: ;;#ASMSTART 9817; GFX940-NEXT: ; use s[8:15] 9818; GFX940-NEXT: ;;#ASMEND 9819; GFX940-NEXT: s_setpc_b64 s[30:31] 9820 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9821 %vec1 = call <3 x i64> asm "; def $0", "=s"() 9822 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0> 9823 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9824 ret void 9825} 9826 9827define void @s_shuffle_v4i64_v3i64__5_5_1_0() { 9828; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0: 9829; GFX900: ; %bb.0: 9830; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9831; GFX900-NEXT: ;;#ASMSTART 9832; GFX900-NEXT: ; def s[4:9] 9833; GFX900-NEXT: ;;#ASMEND 9834; GFX900-NEXT: ;;#ASMSTART 9835; GFX900-NEXT: ; def s[8:13] 9836; GFX900-NEXT: ;;#ASMEND 9837; GFX900-NEXT: s_mov_b32 s8, s12 9838; GFX900-NEXT: s_mov_b32 s9, s13 9839; GFX900-NEXT: s_mov_b32 s10, s12 9840; GFX900-NEXT: s_mov_b32 s11, s13 9841; GFX900-NEXT: s_mov_b32 s12, s6 9842; GFX900-NEXT: s_mov_b32 s13, s7 9843; GFX900-NEXT: s_mov_b32 s14, s4 9844; GFX900-NEXT: s_mov_b32 s15, s5 9845; GFX900-NEXT: ;;#ASMSTART 9846; GFX900-NEXT: ; use s[8:15] 9847; GFX900-NEXT: ;;#ASMEND 9848; GFX900-NEXT: s_setpc_b64 s[30:31] 9849; 9850; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0: 9851; GFX90A: ; %bb.0: 9852; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9853; GFX90A-NEXT: ;;#ASMSTART 9854; GFX90A-NEXT: ; def s[4:9] 9855; GFX90A-NEXT: ;;#ASMEND 9856; GFX90A-NEXT: ;;#ASMSTART 9857; GFX90A-NEXT: ; def s[8:13] 9858; GFX90A-NEXT: ;;#ASMEND 9859; GFX90A-NEXT: s_mov_b32 s8, s12 9860; GFX90A-NEXT: s_mov_b32 s9, s13 9861; GFX90A-NEXT: s_mov_b32 s10, s12 9862; GFX90A-NEXT: s_mov_b32 s11, s13 9863; GFX90A-NEXT: s_mov_b32 s12, s6 9864; GFX90A-NEXT: s_mov_b32 s13, s7 9865; GFX90A-NEXT: s_mov_b32 s14, s4 9866; GFX90A-NEXT: s_mov_b32 s15, s5 9867; GFX90A-NEXT: ;;#ASMSTART 9868; GFX90A-NEXT: ; use s[8:15] 9869; GFX90A-NEXT: ;;#ASMEND 9870; GFX90A-NEXT: s_setpc_b64 s[30:31] 9871; 9872; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0: 9873; GFX940: ; %bb.0: 9874; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9875; GFX940-NEXT: ;;#ASMSTART 9876; GFX940-NEXT: ; def s[8:13] 9877; GFX940-NEXT: ;;#ASMEND 9878; GFX940-NEXT: ;;#ASMSTART 9879; GFX940-NEXT: ; def s[0:5] 9880; GFX940-NEXT: ;;#ASMEND 9881; GFX940-NEXT: s_mov_b32 s8, s12 9882; GFX940-NEXT: s_mov_b32 s9, s13 9883; GFX940-NEXT: s_mov_b32 s10, s12 9884; GFX940-NEXT: s_mov_b32 s11, s13 9885; GFX940-NEXT: s_mov_b32 s12, s2 9886; GFX940-NEXT: s_mov_b32 s13, s3 9887; GFX940-NEXT: s_mov_b32 s14, s0 9888; GFX940-NEXT: s_mov_b32 s15, s1 9889; GFX940-NEXT: ;;#ASMSTART 9890; GFX940-NEXT: ; use s[8:15] 9891; GFX940-NEXT: ;;#ASMEND 9892; GFX940-NEXT: s_setpc_b64 s[30:31] 9893 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9894 %vec1 = call <3 x i64> asm "; def $0", "=s"() 9895 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0> 9896 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9897 ret void 9898} 9899 9900define void @s_shuffle_v4i64_v3i64__5_5_2_0() { 9901; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0: 9902; GFX900: ; %bb.0: 9903; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9904; GFX900-NEXT: ;;#ASMSTART 9905; GFX900-NEXT: ; def s[8:13] 9906; GFX900-NEXT: ;;#ASMEND 9907; GFX900-NEXT: ;;#ASMSTART 9908; GFX900-NEXT: ; def s[16:21] 9909; GFX900-NEXT: ;;#ASMEND 9910; GFX900-NEXT: s_mov_b32 s8, s12 9911; GFX900-NEXT: s_mov_b32 s9, s13 9912; GFX900-NEXT: s_mov_b32 s10, s12 9913; GFX900-NEXT: s_mov_b32 s11, s13 9914; GFX900-NEXT: s_mov_b32 s12, s20 9915; GFX900-NEXT: s_mov_b32 s13, s21 9916; GFX900-NEXT: s_mov_b32 s14, s16 9917; GFX900-NEXT: s_mov_b32 s15, s17 9918; GFX900-NEXT: ;;#ASMSTART 9919; GFX900-NEXT: ; use s[8:15] 9920; GFX900-NEXT: ;;#ASMEND 9921; GFX900-NEXT: s_setpc_b64 s[30:31] 9922; 9923; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0: 9924; GFX90A: ; %bb.0: 9925; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9926; GFX90A-NEXT: ;;#ASMSTART 9927; GFX90A-NEXT: ; def s[8:13] 9928; GFX90A-NEXT: ;;#ASMEND 9929; GFX90A-NEXT: ;;#ASMSTART 9930; GFX90A-NEXT: ; def s[16:21] 9931; GFX90A-NEXT: ;;#ASMEND 9932; GFX90A-NEXT: s_mov_b32 s8, s12 9933; GFX90A-NEXT: s_mov_b32 s9, s13 9934; GFX90A-NEXT: s_mov_b32 s10, s12 9935; GFX90A-NEXT: s_mov_b32 s11, s13 9936; GFX90A-NEXT: s_mov_b32 s12, s20 9937; GFX90A-NEXT: s_mov_b32 s13, s21 9938; GFX90A-NEXT: s_mov_b32 s14, s16 9939; GFX90A-NEXT: s_mov_b32 s15, s17 9940; GFX90A-NEXT: ;;#ASMSTART 9941; GFX90A-NEXT: ; use s[8:15] 9942; GFX90A-NEXT: ;;#ASMEND 9943; GFX90A-NEXT: s_setpc_b64 s[30:31] 9944; 9945; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0: 9946; GFX940: ; %bb.0: 9947; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9948; GFX940-NEXT: ;;#ASMSTART 9949; GFX940-NEXT: ; def s[8:13] 9950; GFX940-NEXT: ;;#ASMEND 9951; GFX940-NEXT: ;;#ASMSTART 9952; GFX940-NEXT: ; def s[0:5] 9953; GFX940-NEXT: ;;#ASMEND 9954; GFX940-NEXT: s_mov_b32 s8, s12 9955; GFX940-NEXT: s_mov_b32 s9, s13 9956; GFX940-NEXT: s_mov_b32 s10, s12 9957; GFX940-NEXT: s_mov_b32 s11, s13 9958; GFX940-NEXT: s_mov_b32 s12, s4 9959; GFX940-NEXT: s_mov_b32 s13, s5 9960; GFX940-NEXT: s_mov_b32 s14, s0 9961; GFX940-NEXT: s_mov_b32 s15, s1 9962; GFX940-NEXT: ;;#ASMSTART 9963; GFX940-NEXT: ; use s[8:15] 9964; GFX940-NEXT: ;;#ASMEND 9965; GFX940-NEXT: s_setpc_b64 s[30:31] 9966 %vec0 = call <3 x i64> asm "; def $0", "=s"() 9967 %vec1 = call <3 x i64> asm "; def $0", "=s"() 9968 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0> 9969 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 9970 ret void 9971} 9972 9973define void @s_shuffle_v4i64_v3i64__5_5_3_0() { 9974; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0: 9975; GFX900: ; %bb.0: 9976; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9977; GFX900-NEXT: ;;#ASMSTART 9978; GFX900-NEXT: ; def s[4:9] 9979; GFX900-NEXT: ;;#ASMEND 9980; GFX900-NEXT: ;;#ASMSTART 9981; GFX900-NEXT: ; def s[12:17] 9982; GFX900-NEXT: ;;#ASMEND 9983; GFX900-NEXT: s_mov_b32 s8, s16 9984; GFX900-NEXT: s_mov_b32 s9, s17 9985; GFX900-NEXT: s_mov_b32 s10, s16 9986; GFX900-NEXT: s_mov_b32 s11, s17 9987; GFX900-NEXT: s_mov_b32 s14, s4 9988; GFX900-NEXT: s_mov_b32 s15, s5 9989; GFX900-NEXT: ;;#ASMSTART 9990; GFX900-NEXT: ; use s[8:15] 9991; GFX900-NEXT: ;;#ASMEND 9992; GFX900-NEXT: s_setpc_b64 s[30:31] 9993; 9994; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0: 9995; GFX90A: ; %bb.0: 9996; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9997; GFX90A-NEXT: ;;#ASMSTART 9998; GFX90A-NEXT: ; def s[4:9] 9999; GFX90A-NEXT: ;;#ASMEND 10000; GFX90A-NEXT: ;;#ASMSTART 10001; GFX90A-NEXT: ; def s[12:17] 10002; GFX90A-NEXT: ;;#ASMEND 10003; GFX90A-NEXT: s_mov_b32 s8, s16 10004; GFX90A-NEXT: s_mov_b32 s9, s17 10005; GFX90A-NEXT: s_mov_b32 s10, s16 10006; GFX90A-NEXT: s_mov_b32 s11, s17 10007; GFX90A-NEXT: s_mov_b32 s14, s4 10008; GFX90A-NEXT: s_mov_b32 s15, s5 10009; GFX90A-NEXT: ;;#ASMSTART 10010; GFX90A-NEXT: ; use s[8:15] 10011; GFX90A-NEXT: ;;#ASMEND 10012; GFX90A-NEXT: s_setpc_b64 s[30:31] 10013; 10014; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0: 10015; GFX940: ; %bb.0: 10016; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10017; GFX940-NEXT: ;;#ASMSTART 10018; GFX940-NEXT: ; def s[12:17] 10019; GFX940-NEXT: ;;#ASMEND 10020; GFX940-NEXT: ;;#ASMSTART 10021; GFX940-NEXT: ; def s[0:5] 10022; GFX940-NEXT: ;;#ASMEND 10023; GFX940-NEXT: s_mov_b32 s8, s16 10024; GFX940-NEXT: s_mov_b32 s9, s17 10025; GFX940-NEXT: s_mov_b32 s10, s16 10026; GFX940-NEXT: s_mov_b32 s11, s17 10027; GFX940-NEXT: s_mov_b32 s14, s0 10028; GFX940-NEXT: s_mov_b32 s15, s1 10029; GFX940-NEXT: ;;#ASMSTART 10030; GFX940-NEXT: ; use s[8:15] 10031; GFX940-NEXT: ;;#ASMEND 10032; GFX940-NEXT: s_setpc_b64 s[30:31] 10033 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10034 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10035 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0> 10036 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10037 ret void 10038} 10039 10040define void @s_shuffle_v4i64_v3i64__5_5_4_0() { 10041; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0: 10042; GFX900: ; %bb.0: 10043; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10044; GFX900-NEXT: ;;#ASMSTART 10045; GFX900-NEXT: ; def s[4:9] 10046; GFX900-NEXT: ;;#ASMEND 10047; GFX900-NEXT: ;;#ASMSTART 10048; GFX900-NEXT: ; def s[12:17] 10049; GFX900-NEXT: ;;#ASMEND 10050; GFX900-NEXT: s_mov_b32 s8, s16 10051; GFX900-NEXT: s_mov_b32 s9, s17 10052; GFX900-NEXT: s_mov_b32 s10, s16 10053; GFX900-NEXT: s_mov_b32 s11, s17 10054; GFX900-NEXT: s_mov_b32 s12, s14 10055; GFX900-NEXT: s_mov_b32 s13, s15 10056; GFX900-NEXT: s_mov_b32 s14, s4 10057; GFX900-NEXT: s_mov_b32 s15, s5 10058; GFX900-NEXT: ;;#ASMSTART 10059; GFX900-NEXT: ; use s[8:15] 10060; GFX900-NEXT: ;;#ASMEND 10061; GFX900-NEXT: s_setpc_b64 s[30:31] 10062; 10063; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0: 10064; GFX90A: ; %bb.0: 10065; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10066; GFX90A-NEXT: ;;#ASMSTART 10067; GFX90A-NEXT: ; def s[4:9] 10068; GFX90A-NEXT: ;;#ASMEND 10069; GFX90A-NEXT: ;;#ASMSTART 10070; GFX90A-NEXT: ; def s[12:17] 10071; GFX90A-NEXT: ;;#ASMEND 10072; GFX90A-NEXT: s_mov_b32 s8, s16 10073; GFX90A-NEXT: s_mov_b32 s9, s17 10074; GFX90A-NEXT: s_mov_b32 s10, s16 10075; GFX90A-NEXT: s_mov_b32 s11, s17 10076; GFX90A-NEXT: s_mov_b32 s12, s14 10077; GFX90A-NEXT: s_mov_b32 s13, s15 10078; GFX90A-NEXT: s_mov_b32 s14, s4 10079; GFX90A-NEXT: s_mov_b32 s15, s5 10080; GFX90A-NEXT: ;;#ASMSTART 10081; GFX90A-NEXT: ; use s[8:15] 10082; GFX90A-NEXT: ;;#ASMEND 10083; GFX90A-NEXT: s_setpc_b64 s[30:31] 10084; 10085; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0: 10086; GFX940: ; %bb.0: 10087; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10088; GFX940-NEXT: ;;#ASMSTART 10089; GFX940-NEXT: ; def s[12:17] 10090; GFX940-NEXT: ;;#ASMEND 10091; GFX940-NEXT: ;;#ASMSTART 10092; GFX940-NEXT: ; def s[0:5] 10093; GFX940-NEXT: ;;#ASMEND 10094; GFX940-NEXT: s_mov_b32 s8, s16 10095; GFX940-NEXT: s_mov_b32 s9, s17 10096; GFX940-NEXT: s_mov_b32 s10, s16 10097; GFX940-NEXT: s_mov_b32 s11, s17 10098; GFX940-NEXT: s_mov_b32 s12, s14 10099; GFX940-NEXT: s_mov_b32 s13, s15 10100; GFX940-NEXT: s_mov_b32 s14, s0 10101; GFX940-NEXT: s_mov_b32 s15, s1 10102; GFX940-NEXT: ;;#ASMSTART 10103; GFX940-NEXT: ; use s[8:15] 10104; GFX940-NEXT: ;;#ASMEND 10105; GFX940-NEXT: s_setpc_b64 s[30:31] 10106 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10107 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10108 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0> 10109 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10110 ret void 10111} 10112 10113define void @s_shuffle_v4i64_v3i64__u_1_1_1() { 10114; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1: 10115; GFX9: ; %bb.0: 10116; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10117; GFX9-NEXT: ;;#ASMSTART 10118; GFX9-NEXT: ; def s[8:13] 10119; GFX9-NEXT: ;;#ASMEND 10120; GFX9-NEXT: s_mov_b32 s12, s10 10121; GFX9-NEXT: s_mov_b32 s13, s11 10122; GFX9-NEXT: s_mov_b32 s14, s10 10123; GFX9-NEXT: s_mov_b32 s15, s11 10124; GFX9-NEXT: ;;#ASMSTART 10125; GFX9-NEXT: ; use s[8:15] 10126; GFX9-NEXT: ;;#ASMEND 10127; GFX9-NEXT: s_setpc_b64 s[30:31] 10128 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10129 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1> 10130 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10131 ret void 10132} 10133 10134define void @s_shuffle_v4i64_v3i64__0_1_1_1() { 10135; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1: 10136; GFX9: ; %bb.0: 10137; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10138; GFX9-NEXT: ;;#ASMSTART 10139; GFX9-NEXT: ; def s[8:13] 10140; GFX9-NEXT: ;;#ASMEND 10141; GFX9-NEXT: s_mov_b32 s12, s10 10142; GFX9-NEXT: s_mov_b32 s13, s11 10143; GFX9-NEXT: s_mov_b32 s14, s10 10144; GFX9-NEXT: s_mov_b32 s15, s11 10145; GFX9-NEXT: ;;#ASMSTART 10146; GFX9-NEXT: ; use s[8:15] 10147; GFX9-NEXT: ;;#ASMEND 10148; GFX9-NEXT: s_setpc_b64 s[30:31] 10149 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10150 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> 10151 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10152 ret void 10153} 10154 10155define void @s_shuffle_v4i64_v3i64__1_1_1_1() { 10156; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1: 10157; GFX9: ; %bb.0: 10158; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10159; GFX9-NEXT: ;;#ASMSTART 10160; GFX9-NEXT: ; def s[8:13] 10161; GFX9-NEXT: ;;#ASMEND 10162; GFX9-NEXT: s_mov_b32 s8, s10 10163; GFX9-NEXT: s_mov_b32 s9, s11 10164; GFX9-NEXT: s_mov_b32 s12, s10 10165; GFX9-NEXT: s_mov_b32 s13, s11 10166; GFX9-NEXT: s_mov_b32 s14, s10 10167; GFX9-NEXT: s_mov_b32 s15, s11 10168; GFX9-NEXT: ;;#ASMSTART 10169; GFX9-NEXT: ; use s[8:15] 10170; GFX9-NEXT: ;;#ASMEND 10171; GFX9-NEXT: s_setpc_b64 s[30:31] 10172 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10173 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 10174 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10175 ret void 10176} 10177 10178define void @s_shuffle_v4i64_v3i64__2_1_1_1() { 10179; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1: 10180; GFX9: ; %bb.0: 10181; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10182; GFX9-NEXT: ;;#ASMSTART 10183; GFX9-NEXT: ; def s[8:13] 10184; GFX9-NEXT: ;;#ASMEND 10185; GFX9-NEXT: s_mov_b32 s8, s12 10186; GFX9-NEXT: s_mov_b32 s9, s13 10187; GFX9-NEXT: s_mov_b32 s12, s10 10188; GFX9-NEXT: s_mov_b32 s13, s11 10189; GFX9-NEXT: s_mov_b32 s14, s10 10190; GFX9-NEXT: s_mov_b32 s15, s11 10191; GFX9-NEXT: ;;#ASMSTART 10192; GFX9-NEXT: ; use s[8:15] 10193; GFX9-NEXT: ;;#ASMEND 10194; GFX9-NEXT: s_setpc_b64 s[30:31] 10195 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10196 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 10197 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10198 ret void 10199} 10200 10201define void @s_shuffle_v4i64_v3i64__3_1_1_1() { 10202; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1: 10203; GFX9: ; %bb.0: 10204; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10205; GFX9-NEXT: ;;#ASMSTART 10206; GFX9-NEXT: ; def s[8:13] 10207; GFX9-NEXT: ;;#ASMEND 10208; GFX9-NEXT: s_mov_b32 s12, s10 10209; GFX9-NEXT: s_mov_b32 s13, s11 10210; GFX9-NEXT: s_mov_b32 s14, s10 10211; GFX9-NEXT: s_mov_b32 s15, s11 10212; GFX9-NEXT: ;;#ASMSTART 10213; GFX9-NEXT: ; use s[8:15] 10214; GFX9-NEXT: ;;#ASMEND 10215; GFX9-NEXT: s_setpc_b64 s[30:31] 10216 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10217 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 10218 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10219 ret void 10220} 10221 10222define void @s_shuffle_v4i64_v3i64__4_1_1_1() { 10223; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1: 10224; GFX900: ; %bb.0: 10225; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10226; GFX900-NEXT: ;;#ASMSTART 10227; GFX900-NEXT: ; def s[8:13] 10228; GFX900-NEXT: ;;#ASMEND 10229; GFX900-NEXT: ;;#ASMSTART 10230; GFX900-NEXT: ; def s[4:9] 10231; GFX900-NEXT: ;;#ASMEND 10232; GFX900-NEXT: s_mov_b32 s8, s6 10233; GFX900-NEXT: s_mov_b32 s9, s7 10234; GFX900-NEXT: s_mov_b32 s12, s10 10235; GFX900-NEXT: s_mov_b32 s13, s11 10236; GFX900-NEXT: s_mov_b32 s14, s10 10237; GFX900-NEXT: s_mov_b32 s15, s11 10238; GFX900-NEXT: ;;#ASMSTART 10239; GFX900-NEXT: ; use s[8:15] 10240; GFX900-NEXT: ;;#ASMEND 10241; GFX900-NEXT: s_setpc_b64 s[30:31] 10242; 10243; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1: 10244; GFX90A: ; %bb.0: 10245; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10246; GFX90A-NEXT: ;;#ASMSTART 10247; GFX90A-NEXT: ; def s[8:13] 10248; GFX90A-NEXT: ;;#ASMEND 10249; GFX90A-NEXT: ;;#ASMSTART 10250; GFX90A-NEXT: ; def s[4:9] 10251; GFX90A-NEXT: ;;#ASMEND 10252; GFX90A-NEXT: s_mov_b32 s8, s6 10253; GFX90A-NEXT: s_mov_b32 s9, s7 10254; GFX90A-NEXT: s_mov_b32 s12, s10 10255; GFX90A-NEXT: s_mov_b32 s13, s11 10256; GFX90A-NEXT: s_mov_b32 s14, s10 10257; GFX90A-NEXT: s_mov_b32 s15, s11 10258; GFX90A-NEXT: ;;#ASMSTART 10259; GFX90A-NEXT: ; use s[8:15] 10260; GFX90A-NEXT: ;;#ASMEND 10261; GFX90A-NEXT: s_setpc_b64 s[30:31] 10262; 10263; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1: 10264; GFX940: ; %bb.0: 10265; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10266; GFX940-NEXT: ;;#ASMSTART 10267; GFX940-NEXT: ; def s[8:13] 10268; GFX940-NEXT: ;;#ASMEND 10269; GFX940-NEXT: ;;#ASMSTART 10270; GFX940-NEXT: ; def s[0:5] 10271; GFX940-NEXT: ;;#ASMEND 10272; GFX940-NEXT: s_mov_b32 s8, s2 10273; GFX940-NEXT: s_mov_b32 s9, s3 10274; GFX940-NEXT: s_mov_b32 s12, s10 10275; GFX940-NEXT: s_mov_b32 s13, s11 10276; GFX940-NEXT: s_mov_b32 s14, s10 10277; GFX940-NEXT: s_mov_b32 s15, s11 10278; GFX940-NEXT: ;;#ASMSTART 10279; GFX940-NEXT: ; use s[8:15] 10280; GFX940-NEXT: ;;#ASMEND 10281; GFX940-NEXT: s_setpc_b64 s[30:31] 10282 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10283 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10284 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1> 10285 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10286 ret void 10287} 10288 10289define void @s_shuffle_v4i64_v3i64__5_1_1_1() { 10290; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1: 10291; GFX900: ; %bb.0: 10292; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10293; GFX900-NEXT: ;;#ASMSTART 10294; GFX900-NEXT: ; def s[8:13] 10295; GFX900-NEXT: ;;#ASMEND 10296; GFX900-NEXT: ;;#ASMSTART 10297; GFX900-NEXT: ; def s[4:9] 10298; GFX900-NEXT: ;;#ASMEND 10299; GFX900-NEXT: s_mov_b32 s12, s10 10300; GFX900-NEXT: s_mov_b32 s13, s11 10301; GFX900-NEXT: s_mov_b32 s14, s10 10302; GFX900-NEXT: s_mov_b32 s15, s11 10303; GFX900-NEXT: ;;#ASMSTART 10304; GFX900-NEXT: ; use s[8:15] 10305; GFX900-NEXT: ;;#ASMEND 10306; GFX900-NEXT: s_setpc_b64 s[30:31] 10307; 10308; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1: 10309; GFX90A: ; %bb.0: 10310; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10311; GFX90A-NEXT: ;;#ASMSTART 10312; GFX90A-NEXT: ; def s[8:13] 10313; GFX90A-NEXT: ;;#ASMEND 10314; GFX90A-NEXT: ;;#ASMSTART 10315; GFX90A-NEXT: ; def s[4:9] 10316; GFX90A-NEXT: ;;#ASMEND 10317; GFX90A-NEXT: s_mov_b32 s12, s10 10318; GFX90A-NEXT: s_mov_b32 s13, s11 10319; GFX90A-NEXT: s_mov_b32 s14, s10 10320; GFX90A-NEXT: s_mov_b32 s15, s11 10321; GFX90A-NEXT: ;;#ASMSTART 10322; GFX90A-NEXT: ; use s[8:15] 10323; GFX90A-NEXT: ;;#ASMEND 10324; GFX90A-NEXT: s_setpc_b64 s[30:31] 10325; 10326; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1: 10327; GFX940: ; %bb.0: 10328; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10329; GFX940-NEXT: ;;#ASMSTART 10330; GFX940-NEXT: ; def s[8:13] 10331; GFX940-NEXT: ;;#ASMEND 10332; GFX940-NEXT: ;;#ASMSTART 10333; GFX940-NEXT: ; def s[0:5] 10334; GFX940-NEXT: ;;#ASMEND 10335; GFX940-NEXT: s_mov_b32 s8, s4 10336; GFX940-NEXT: s_mov_b32 s9, s5 10337; GFX940-NEXT: s_mov_b32 s12, s10 10338; GFX940-NEXT: s_mov_b32 s13, s11 10339; GFX940-NEXT: s_mov_b32 s14, s10 10340; GFX940-NEXT: s_mov_b32 s15, s11 10341; GFX940-NEXT: ;;#ASMSTART 10342; GFX940-NEXT: ; use s[8:15] 10343; GFX940-NEXT: ;;#ASMEND 10344; GFX940-NEXT: s_setpc_b64 s[30:31] 10345 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10346 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10347 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1> 10348 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10349 ret void 10350} 10351 10352define void @s_shuffle_v4i64_v3i64__5_u_1_1() { 10353; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1: 10354; GFX900: ; %bb.0: 10355; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10356; GFX900-NEXT: ;;#ASMSTART 10357; GFX900-NEXT: ; def s[4:9] 10358; GFX900-NEXT: ;;#ASMEND 10359; GFX900-NEXT: ;;#ASMSTART 10360; GFX900-NEXT: ; def s[8:13] 10361; GFX900-NEXT: ;;#ASMEND 10362; GFX900-NEXT: s_mov_b32 s8, s12 10363; GFX900-NEXT: s_mov_b32 s9, s13 10364; GFX900-NEXT: s_mov_b32 s12, s6 10365; GFX900-NEXT: s_mov_b32 s13, s7 10366; GFX900-NEXT: s_mov_b32 s14, s6 10367; GFX900-NEXT: s_mov_b32 s15, s7 10368; GFX900-NEXT: ;;#ASMSTART 10369; GFX900-NEXT: ; use s[8:15] 10370; GFX900-NEXT: ;;#ASMEND 10371; GFX900-NEXT: s_setpc_b64 s[30:31] 10372; 10373; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1: 10374; GFX90A: ; %bb.0: 10375; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10376; GFX90A-NEXT: ;;#ASMSTART 10377; GFX90A-NEXT: ; def s[4:9] 10378; GFX90A-NEXT: ;;#ASMEND 10379; GFX90A-NEXT: ;;#ASMSTART 10380; GFX90A-NEXT: ; def s[8:13] 10381; GFX90A-NEXT: ;;#ASMEND 10382; GFX90A-NEXT: s_mov_b32 s8, s12 10383; GFX90A-NEXT: s_mov_b32 s9, s13 10384; GFX90A-NEXT: s_mov_b32 s12, s6 10385; GFX90A-NEXT: s_mov_b32 s13, s7 10386; GFX90A-NEXT: s_mov_b32 s14, s6 10387; GFX90A-NEXT: s_mov_b32 s15, s7 10388; GFX90A-NEXT: ;;#ASMSTART 10389; GFX90A-NEXT: ; use s[8:15] 10390; GFX90A-NEXT: ;;#ASMEND 10391; GFX90A-NEXT: s_setpc_b64 s[30:31] 10392; 10393; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1: 10394; GFX940: ; %bb.0: 10395; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10396; GFX940-NEXT: ;;#ASMSTART 10397; GFX940-NEXT: ; def s[0:5] 10398; GFX940-NEXT: ;;#ASMEND 10399; GFX940-NEXT: s_mov_b32 s12, s2 10400; GFX940-NEXT: ;;#ASMSTART 10401; GFX940-NEXT: ; def s[4:9] 10402; GFX940-NEXT: ;;#ASMEND 10403; GFX940-NEXT: s_mov_b32 s13, s3 10404; GFX940-NEXT: s_mov_b32 s14, s2 10405; GFX940-NEXT: s_mov_b32 s15, s3 10406; GFX940-NEXT: ;;#ASMSTART 10407; GFX940-NEXT: ; use s[8:15] 10408; GFX940-NEXT: ;;#ASMEND 10409; GFX940-NEXT: s_setpc_b64 s[30:31] 10410 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10411 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10412 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1> 10413 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10414 ret void 10415} 10416 10417define void @s_shuffle_v4i64_v3i64__5_0_1_1() { 10418; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1: 10419; GFX900: ; %bb.0: 10420; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10421; GFX900-NEXT: ;;#ASMSTART 10422; GFX900-NEXT: ; def s[4:9] 10423; GFX900-NEXT: ;;#ASMEND 10424; GFX900-NEXT: ;;#ASMSTART 10425; GFX900-NEXT: ; def s[8:13] 10426; GFX900-NEXT: ;;#ASMEND 10427; GFX900-NEXT: s_mov_b32 s8, s12 10428; GFX900-NEXT: s_mov_b32 s9, s13 10429; GFX900-NEXT: s_mov_b32 s10, s4 10430; GFX900-NEXT: s_mov_b32 s11, s5 10431; GFX900-NEXT: s_mov_b32 s12, s6 10432; GFX900-NEXT: s_mov_b32 s13, s7 10433; GFX900-NEXT: s_mov_b32 s14, s6 10434; GFX900-NEXT: s_mov_b32 s15, s7 10435; GFX900-NEXT: ;;#ASMSTART 10436; GFX900-NEXT: ; use s[8:15] 10437; GFX900-NEXT: ;;#ASMEND 10438; GFX900-NEXT: s_setpc_b64 s[30:31] 10439; 10440; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1: 10441; GFX90A: ; %bb.0: 10442; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10443; GFX90A-NEXT: ;;#ASMSTART 10444; GFX90A-NEXT: ; def s[4:9] 10445; GFX90A-NEXT: ;;#ASMEND 10446; GFX90A-NEXT: ;;#ASMSTART 10447; GFX90A-NEXT: ; def s[8:13] 10448; GFX90A-NEXT: ;;#ASMEND 10449; GFX90A-NEXT: s_mov_b32 s8, s12 10450; GFX90A-NEXT: s_mov_b32 s9, s13 10451; GFX90A-NEXT: s_mov_b32 s10, s4 10452; GFX90A-NEXT: s_mov_b32 s11, s5 10453; GFX90A-NEXT: s_mov_b32 s12, s6 10454; GFX90A-NEXT: s_mov_b32 s13, s7 10455; GFX90A-NEXT: s_mov_b32 s14, s6 10456; GFX90A-NEXT: s_mov_b32 s15, s7 10457; GFX90A-NEXT: ;;#ASMSTART 10458; GFX90A-NEXT: ; use s[8:15] 10459; GFX90A-NEXT: ;;#ASMEND 10460; GFX90A-NEXT: s_setpc_b64 s[30:31] 10461; 10462; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1: 10463; GFX940: ; %bb.0: 10464; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10465; GFX940-NEXT: ;;#ASMSTART 10466; GFX940-NEXT: ; def s[0:5] 10467; GFX940-NEXT: ;;#ASMEND 10468; GFX940-NEXT: s_mov_b32 s10, s0 10469; GFX940-NEXT: ;;#ASMSTART 10470; GFX940-NEXT: ; def s[4:9] 10471; GFX940-NEXT: ;;#ASMEND 10472; GFX940-NEXT: s_mov_b32 s11, s1 10473; GFX940-NEXT: s_mov_b32 s12, s2 10474; GFX940-NEXT: s_mov_b32 s13, s3 10475; GFX940-NEXT: s_mov_b32 s14, s2 10476; GFX940-NEXT: s_mov_b32 s15, s3 10477; GFX940-NEXT: ;;#ASMSTART 10478; GFX940-NEXT: ; use s[8:15] 10479; GFX940-NEXT: ;;#ASMEND 10480; GFX940-NEXT: s_setpc_b64 s[30:31] 10481 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10482 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10483 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1> 10484 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10485 ret void 10486} 10487 10488define void @s_shuffle_v4i64_v3i64__5_2_1_1() { 10489; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1: 10490; GFX900: ; %bb.0: 10491; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10492; GFX900-NEXT: ;;#ASMSTART 10493; GFX900-NEXT: ; def s[12:17] 10494; GFX900-NEXT: ;;#ASMEND 10495; GFX900-NEXT: ;;#ASMSTART 10496; GFX900-NEXT: ; def s[4:9] 10497; GFX900-NEXT: ;;#ASMEND 10498; GFX900-NEXT: s_mov_b32 s10, s16 10499; GFX900-NEXT: s_mov_b32 s11, s17 10500; GFX900-NEXT: s_mov_b32 s12, s14 10501; GFX900-NEXT: s_mov_b32 s13, s15 10502; GFX900-NEXT: ;;#ASMSTART 10503; GFX900-NEXT: ; use s[8:15] 10504; GFX900-NEXT: ;;#ASMEND 10505; GFX900-NEXT: s_setpc_b64 s[30:31] 10506; 10507; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1: 10508; GFX90A: ; %bb.0: 10509; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10510; GFX90A-NEXT: ;;#ASMSTART 10511; GFX90A-NEXT: ; def s[12:17] 10512; GFX90A-NEXT: ;;#ASMEND 10513; GFX90A-NEXT: ;;#ASMSTART 10514; GFX90A-NEXT: ; def s[4:9] 10515; GFX90A-NEXT: ;;#ASMEND 10516; GFX90A-NEXT: s_mov_b32 s10, s16 10517; GFX90A-NEXT: s_mov_b32 s11, s17 10518; GFX90A-NEXT: s_mov_b32 s12, s14 10519; GFX90A-NEXT: s_mov_b32 s13, s15 10520; GFX90A-NEXT: ;;#ASMSTART 10521; GFX90A-NEXT: ; use s[8:15] 10522; GFX90A-NEXT: ;;#ASMEND 10523; GFX90A-NEXT: s_setpc_b64 s[30:31] 10524; 10525; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1: 10526; GFX940: ; %bb.0: 10527; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10528; GFX940-NEXT: ;;#ASMSTART 10529; GFX940-NEXT: ; def s[8:13] 10530; GFX940-NEXT: ;;#ASMEND 10531; GFX940-NEXT: ;;#ASMSTART 10532; GFX940-NEXT: ; def s[0:5] 10533; GFX940-NEXT: ;;#ASMEND 10534; GFX940-NEXT: s_mov_b32 s8, s12 10535; GFX940-NEXT: s_mov_b32 s9, s13 10536; GFX940-NEXT: s_mov_b32 s10, s4 10537; GFX940-NEXT: s_mov_b32 s11, s5 10538; GFX940-NEXT: s_mov_b32 s12, s2 10539; GFX940-NEXT: s_mov_b32 s13, s3 10540; GFX940-NEXT: s_mov_b32 s14, s2 10541; GFX940-NEXT: s_mov_b32 s15, s3 10542; GFX940-NEXT: ;;#ASMSTART 10543; GFX940-NEXT: ; use s[8:15] 10544; GFX940-NEXT: ;;#ASMEND 10545; GFX940-NEXT: s_setpc_b64 s[30:31] 10546 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10547 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10548 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1> 10549 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10550 ret void 10551} 10552 10553define void @s_shuffle_v4i64_v3i64__5_3_1_1() { 10554; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1: 10555; GFX900: ; %bb.0: 10556; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10557; GFX900-NEXT: ;;#ASMSTART 10558; GFX900-NEXT: ; def s[4:9] 10559; GFX900-NEXT: ;;#ASMEND 10560; GFX900-NEXT: ;;#ASMSTART 10561; GFX900-NEXT: ; def s[12:17] 10562; GFX900-NEXT: ;;#ASMEND 10563; GFX900-NEXT: s_mov_b32 s8, s16 10564; GFX900-NEXT: s_mov_b32 s9, s17 10565; GFX900-NEXT: s_mov_b32 s10, s12 10566; GFX900-NEXT: s_mov_b32 s11, s13 10567; GFX900-NEXT: s_mov_b32 s12, s6 10568; GFX900-NEXT: s_mov_b32 s13, s7 10569; GFX900-NEXT: s_mov_b32 s14, s6 10570; GFX900-NEXT: s_mov_b32 s15, s7 10571; GFX900-NEXT: ;;#ASMSTART 10572; GFX900-NEXT: ; use s[8:15] 10573; GFX900-NEXT: ;;#ASMEND 10574; GFX900-NEXT: s_setpc_b64 s[30:31] 10575; 10576; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1: 10577; GFX90A: ; %bb.0: 10578; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10579; GFX90A-NEXT: ;;#ASMSTART 10580; GFX90A-NEXT: ; def s[4:9] 10581; GFX90A-NEXT: ;;#ASMEND 10582; GFX90A-NEXT: ;;#ASMSTART 10583; GFX90A-NEXT: ; def s[12:17] 10584; GFX90A-NEXT: ;;#ASMEND 10585; GFX90A-NEXT: s_mov_b32 s8, s16 10586; GFX90A-NEXT: s_mov_b32 s9, s17 10587; GFX90A-NEXT: s_mov_b32 s10, s12 10588; GFX90A-NEXT: s_mov_b32 s11, s13 10589; GFX90A-NEXT: s_mov_b32 s12, s6 10590; GFX90A-NEXT: s_mov_b32 s13, s7 10591; GFX90A-NEXT: s_mov_b32 s14, s6 10592; GFX90A-NEXT: s_mov_b32 s15, s7 10593; GFX90A-NEXT: ;;#ASMSTART 10594; GFX90A-NEXT: ; use s[8:15] 10595; GFX90A-NEXT: ;;#ASMEND 10596; GFX90A-NEXT: s_setpc_b64 s[30:31] 10597; 10598; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1: 10599; GFX940: ; %bb.0: 10600; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10601; GFX940-NEXT: ;;#ASMSTART 10602; GFX940-NEXT: ; def s[0:5] 10603; GFX940-NEXT: ;;#ASMEND 10604; GFX940-NEXT: s_mov_b32 s12, s2 10605; GFX940-NEXT: ;;#ASMSTART 10606; GFX940-NEXT: ; def s[4:9] 10607; GFX940-NEXT: ;;#ASMEND 10608; GFX940-NEXT: s_mov_b32 s10, s4 10609; GFX940-NEXT: s_mov_b32 s11, s5 10610; GFX940-NEXT: s_mov_b32 s13, s3 10611; GFX940-NEXT: s_mov_b32 s14, s2 10612; GFX940-NEXT: s_mov_b32 s15, s3 10613; GFX940-NEXT: ;;#ASMSTART 10614; GFX940-NEXT: ; use s[8:15] 10615; GFX940-NEXT: ;;#ASMEND 10616; GFX940-NEXT: s_setpc_b64 s[30:31] 10617 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10618 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10619 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1> 10620 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10621 ret void 10622} 10623 10624define void @s_shuffle_v4i64_v3i64__5_4_1_1() { 10625; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1: 10626; GFX900: ; %bb.0: 10627; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10628; GFX900-NEXT: ;;#ASMSTART 10629; GFX900-NEXT: ; def s[4:9] 10630; GFX900-NEXT: ;;#ASMEND 10631; GFX900-NEXT: ;;#ASMSTART 10632; GFX900-NEXT: ; def s[8:13] 10633; GFX900-NEXT: ;;#ASMEND 10634; GFX900-NEXT: s_mov_b32 s8, s12 10635; GFX900-NEXT: s_mov_b32 s9, s13 10636; GFX900-NEXT: s_mov_b32 s12, s6 10637; GFX900-NEXT: s_mov_b32 s13, s7 10638; GFX900-NEXT: s_mov_b32 s14, s6 10639; GFX900-NEXT: s_mov_b32 s15, s7 10640; GFX900-NEXT: ;;#ASMSTART 10641; GFX900-NEXT: ; use s[8:15] 10642; GFX900-NEXT: ;;#ASMEND 10643; GFX900-NEXT: s_setpc_b64 s[30:31] 10644; 10645; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1: 10646; GFX90A: ; %bb.0: 10647; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10648; GFX90A-NEXT: ;;#ASMSTART 10649; GFX90A-NEXT: ; def s[4:9] 10650; GFX90A-NEXT: ;;#ASMEND 10651; GFX90A-NEXT: ;;#ASMSTART 10652; GFX90A-NEXT: ; def s[8:13] 10653; GFX90A-NEXT: ;;#ASMEND 10654; GFX90A-NEXT: s_mov_b32 s8, s12 10655; GFX90A-NEXT: s_mov_b32 s9, s13 10656; GFX90A-NEXT: s_mov_b32 s12, s6 10657; GFX90A-NEXT: s_mov_b32 s13, s7 10658; GFX90A-NEXT: s_mov_b32 s14, s6 10659; GFX90A-NEXT: s_mov_b32 s15, s7 10660; GFX90A-NEXT: ;;#ASMSTART 10661; GFX90A-NEXT: ; use s[8:15] 10662; GFX90A-NEXT: ;;#ASMEND 10663; GFX90A-NEXT: s_setpc_b64 s[30:31] 10664; 10665; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1: 10666; GFX940: ; %bb.0: 10667; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10668; GFX940-NEXT: ;;#ASMSTART 10669; GFX940-NEXT: ; def s[8:13] 10670; GFX940-NEXT: ;;#ASMEND 10671; GFX940-NEXT: ;;#ASMSTART 10672; GFX940-NEXT: ; def s[0:5] 10673; GFX940-NEXT: ;;#ASMEND 10674; GFX940-NEXT: s_mov_b32 s8, s12 10675; GFX940-NEXT: s_mov_b32 s9, s13 10676; GFX940-NEXT: s_mov_b32 s12, s2 10677; GFX940-NEXT: s_mov_b32 s13, s3 10678; GFX940-NEXT: s_mov_b32 s14, s2 10679; GFX940-NEXT: s_mov_b32 s15, s3 10680; GFX940-NEXT: ;;#ASMSTART 10681; GFX940-NEXT: ; use s[8:15] 10682; GFX940-NEXT: ;;#ASMEND 10683; GFX940-NEXT: s_setpc_b64 s[30:31] 10684 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10685 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10686 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1> 10687 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10688 ret void 10689} 10690 10691define void @s_shuffle_v4i64_v3i64__5_5_1_1() { 10692; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1: 10693; GFX900: ; %bb.0: 10694; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10695; GFX900-NEXT: ;;#ASMSTART 10696; GFX900-NEXT: ; def s[4:9] 10697; GFX900-NEXT: ;;#ASMEND 10698; GFX900-NEXT: ;;#ASMSTART 10699; GFX900-NEXT: ; def s[8:13] 10700; GFX900-NEXT: ;;#ASMEND 10701; GFX900-NEXT: s_mov_b32 s8, s12 10702; GFX900-NEXT: s_mov_b32 s9, s13 10703; GFX900-NEXT: s_mov_b32 s10, s12 10704; GFX900-NEXT: s_mov_b32 s11, s13 10705; GFX900-NEXT: s_mov_b32 s12, s6 10706; GFX900-NEXT: s_mov_b32 s13, s7 10707; GFX900-NEXT: s_mov_b32 s14, s6 10708; GFX900-NEXT: s_mov_b32 s15, s7 10709; GFX900-NEXT: ;;#ASMSTART 10710; GFX900-NEXT: ; use s[8:15] 10711; GFX900-NEXT: ;;#ASMEND 10712; GFX900-NEXT: s_setpc_b64 s[30:31] 10713; 10714; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1: 10715; GFX90A: ; %bb.0: 10716; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10717; GFX90A-NEXT: ;;#ASMSTART 10718; GFX90A-NEXT: ; def s[4:9] 10719; GFX90A-NEXT: ;;#ASMEND 10720; GFX90A-NEXT: ;;#ASMSTART 10721; GFX90A-NEXT: ; def s[8:13] 10722; GFX90A-NEXT: ;;#ASMEND 10723; GFX90A-NEXT: s_mov_b32 s8, s12 10724; GFX90A-NEXT: s_mov_b32 s9, s13 10725; GFX90A-NEXT: s_mov_b32 s10, s12 10726; GFX90A-NEXT: s_mov_b32 s11, s13 10727; GFX90A-NEXT: s_mov_b32 s12, s6 10728; GFX90A-NEXT: s_mov_b32 s13, s7 10729; GFX90A-NEXT: s_mov_b32 s14, s6 10730; GFX90A-NEXT: s_mov_b32 s15, s7 10731; GFX90A-NEXT: ;;#ASMSTART 10732; GFX90A-NEXT: ; use s[8:15] 10733; GFX90A-NEXT: ;;#ASMEND 10734; GFX90A-NEXT: s_setpc_b64 s[30:31] 10735; 10736; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1: 10737; GFX940: ; %bb.0: 10738; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10739; GFX940-NEXT: ;;#ASMSTART 10740; GFX940-NEXT: ; def s[8:13] 10741; GFX940-NEXT: ;;#ASMEND 10742; GFX940-NEXT: ;;#ASMSTART 10743; GFX940-NEXT: ; def s[0:5] 10744; GFX940-NEXT: ;;#ASMEND 10745; GFX940-NEXT: s_mov_b32 s8, s12 10746; GFX940-NEXT: s_mov_b32 s9, s13 10747; GFX940-NEXT: s_mov_b32 s10, s12 10748; GFX940-NEXT: s_mov_b32 s11, s13 10749; GFX940-NEXT: s_mov_b32 s12, s2 10750; GFX940-NEXT: s_mov_b32 s13, s3 10751; GFX940-NEXT: s_mov_b32 s14, s2 10752; GFX940-NEXT: s_mov_b32 s15, s3 10753; GFX940-NEXT: ;;#ASMSTART 10754; GFX940-NEXT: ; use s[8:15] 10755; GFX940-NEXT: ;;#ASMEND 10756; GFX940-NEXT: s_setpc_b64 s[30:31] 10757 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10758 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10759 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1> 10760 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10761 ret void 10762} 10763 10764define void @s_shuffle_v4i64_v3i64__5_5_u_1() { 10765; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1: 10766; GFX900: ; %bb.0: 10767; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10768; GFX900-NEXT: ;;#ASMSTART 10769; GFX900-NEXT: ; def s[4:9] 10770; GFX900-NEXT: ;;#ASMEND 10771; GFX900-NEXT: ;;#ASMSTART 10772; GFX900-NEXT: ; def s[8:13] 10773; GFX900-NEXT: ;;#ASMEND 10774; GFX900-NEXT: s_mov_b32 s8, s12 10775; GFX900-NEXT: s_mov_b32 s9, s13 10776; GFX900-NEXT: s_mov_b32 s10, s12 10777; GFX900-NEXT: s_mov_b32 s11, s13 10778; GFX900-NEXT: s_mov_b32 s14, s6 10779; GFX900-NEXT: s_mov_b32 s15, s7 10780; GFX900-NEXT: ;;#ASMSTART 10781; GFX900-NEXT: ; use s[8:15] 10782; GFX900-NEXT: ;;#ASMEND 10783; GFX900-NEXT: s_setpc_b64 s[30:31] 10784; 10785; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1: 10786; GFX90A: ; %bb.0: 10787; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10788; GFX90A-NEXT: ;;#ASMSTART 10789; GFX90A-NEXT: ; def s[4:9] 10790; GFX90A-NEXT: ;;#ASMEND 10791; GFX90A-NEXT: ;;#ASMSTART 10792; GFX90A-NEXT: ; def s[8:13] 10793; GFX90A-NEXT: ;;#ASMEND 10794; GFX90A-NEXT: s_mov_b32 s8, s12 10795; GFX90A-NEXT: s_mov_b32 s9, s13 10796; GFX90A-NEXT: s_mov_b32 s10, s12 10797; GFX90A-NEXT: s_mov_b32 s11, s13 10798; GFX90A-NEXT: s_mov_b32 s14, s6 10799; GFX90A-NEXT: s_mov_b32 s15, s7 10800; GFX90A-NEXT: ;;#ASMSTART 10801; GFX90A-NEXT: ; use s[8:15] 10802; GFX90A-NEXT: ;;#ASMEND 10803; GFX90A-NEXT: s_setpc_b64 s[30:31] 10804; 10805; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1: 10806; GFX940: ; %bb.0: 10807; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10808; GFX940-NEXT: ;;#ASMSTART 10809; GFX940-NEXT: ; def s[8:13] 10810; GFX940-NEXT: ;;#ASMEND 10811; GFX940-NEXT: ;;#ASMSTART 10812; GFX940-NEXT: ; def s[0:5] 10813; GFX940-NEXT: ;;#ASMEND 10814; GFX940-NEXT: s_mov_b32 s8, s12 10815; GFX940-NEXT: s_mov_b32 s9, s13 10816; GFX940-NEXT: s_mov_b32 s10, s12 10817; GFX940-NEXT: s_mov_b32 s11, s13 10818; GFX940-NEXT: s_mov_b32 s14, s2 10819; GFX940-NEXT: s_mov_b32 s15, s3 10820; GFX940-NEXT: ;;#ASMSTART 10821; GFX940-NEXT: ; use s[8:15] 10822; GFX940-NEXT: ;;#ASMEND 10823; GFX940-NEXT: s_setpc_b64 s[30:31] 10824 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10825 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10826 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1> 10827 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10828 ret void 10829} 10830 10831define void @s_shuffle_v4i64_v3i64__5_5_0_1() { 10832; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1: 10833; GFX900: ; %bb.0: 10834; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10835; GFX900-NEXT: ;;#ASMSTART 10836; GFX900-NEXT: ; def s[4:9] 10837; GFX900-NEXT: ;;#ASMEND 10838; GFX900-NEXT: ;;#ASMSTART 10839; GFX900-NEXT: ; def s[8:13] 10840; GFX900-NEXT: ;;#ASMEND 10841; GFX900-NEXT: s_mov_b32 s8, s12 10842; GFX900-NEXT: s_mov_b32 s9, s13 10843; GFX900-NEXT: s_mov_b32 s10, s12 10844; GFX900-NEXT: s_mov_b32 s11, s13 10845; GFX900-NEXT: s_mov_b32 s12, s4 10846; GFX900-NEXT: s_mov_b32 s13, s5 10847; GFX900-NEXT: s_mov_b32 s14, s6 10848; GFX900-NEXT: s_mov_b32 s15, s7 10849; GFX900-NEXT: ;;#ASMSTART 10850; GFX900-NEXT: ; use s[8:15] 10851; GFX900-NEXT: ;;#ASMEND 10852; GFX900-NEXT: s_setpc_b64 s[30:31] 10853; 10854; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1: 10855; GFX90A: ; %bb.0: 10856; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10857; GFX90A-NEXT: ;;#ASMSTART 10858; GFX90A-NEXT: ; def s[4:9] 10859; GFX90A-NEXT: ;;#ASMEND 10860; GFX90A-NEXT: ;;#ASMSTART 10861; GFX90A-NEXT: ; def s[8:13] 10862; GFX90A-NEXT: ;;#ASMEND 10863; GFX90A-NEXT: s_mov_b32 s8, s12 10864; GFX90A-NEXT: s_mov_b32 s9, s13 10865; GFX90A-NEXT: s_mov_b32 s10, s12 10866; GFX90A-NEXT: s_mov_b32 s11, s13 10867; GFX90A-NEXT: s_mov_b32 s12, s4 10868; GFX90A-NEXT: s_mov_b32 s13, s5 10869; GFX90A-NEXT: s_mov_b32 s14, s6 10870; GFX90A-NEXT: s_mov_b32 s15, s7 10871; GFX90A-NEXT: ;;#ASMSTART 10872; GFX90A-NEXT: ; use s[8:15] 10873; GFX90A-NEXT: ;;#ASMEND 10874; GFX90A-NEXT: s_setpc_b64 s[30:31] 10875; 10876; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1: 10877; GFX940: ; %bb.0: 10878; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10879; GFX940-NEXT: ;;#ASMSTART 10880; GFX940-NEXT: ; def s[8:13] 10881; GFX940-NEXT: ;;#ASMEND 10882; GFX940-NEXT: ;;#ASMSTART 10883; GFX940-NEXT: ; def s[0:5] 10884; GFX940-NEXT: ;;#ASMEND 10885; GFX940-NEXT: s_mov_b32 s8, s12 10886; GFX940-NEXT: s_mov_b32 s9, s13 10887; GFX940-NEXT: s_mov_b32 s10, s12 10888; GFX940-NEXT: s_mov_b32 s11, s13 10889; GFX940-NEXT: s_mov_b32 s12, s0 10890; GFX940-NEXT: s_mov_b32 s13, s1 10891; GFX940-NEXT: s_mov_b32 s14, s2 10892; GFX940-NEXT: s_mov_b32 s15, s3 10893; GFX940-NEXT: ;;#ASMSTART 10894; GFX940-NEXT: ; use s[8:15] 10895; GFX940-NEXT: ;;#ASMEND 10896; GFX940-NEXT: s_setpc_b64 s[30:31] 10897 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10898 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10899 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1> 10900 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10901 ret void 10902} 10903 10904define void @s_shuffle_v4i64_v3i64__5_5_2_1() { 10905; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1: 10906; GFX900: ; %bb.0: 10907; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10908; GFX900-NEXT: ;;#ASMSTART 10909; GFX900-NEXT: ; def s[12:17] 10910; GFX900-NEXT: ;;#ASMEND 10911; GFX900-NEXT: ;;#ASMSTART 10912; GFX900-NEXT: ; def s[8:13] 10913; GFX900-NEXT: ;;#ASMEND 10914; GFX900-NEXT: s_mov_b32 s8, s12 10915; GFX900-NEXT: s_mov_b32 s9, s13 10916; GFX900-NEXT: s_mov_b32 s10, s12 10917; GFX900-NEXT: s_mov_b32 s11, s13 10918; GFX900-NEXT: s_mov_b32 s12, s16 10919; GFX900-NEXT: s_mov_b32 s13, s17 10920; GFX900-NEXT: ;;#ASMSTART 10921; GFX900-NEXT: ; use s[8:15] 10922; GFX900-NEXT: ;;#ASMEND 10923; GFX900-NEXT: s_setpc_b64 s[30:31] 10924; 10925; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1: 10926; GFX90A: ; %bb.0: 10927; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10928; GFX90A-NEXT: ;;#ASMSTART 10929; GFX90A-NEXT: ; def s[12:17] 10930; GFX90A-NEXT: ;;#ASMEND 10931; GFX90A-NEXT: ;;#ASMSTART 10932; GFX90A-NEXT: ; def s[8:13] 10933; GFX90A-NEXT: ;;#ASMEND 10934; GFX90A-NEXT: s_mov_b32 s8, s12 10935; GFX90A-NEXT: s_mov_b32 s9, s13 10936; GFX90A-NEXT: s_mov_b32 s10, s12 10937; GFX90A-NEXT: s_mov_b32 s11, s13 10938; GFX90A-NEXT: s_mov_b32 s12, s16 10939; GFX90A-NEXT: s_mov_b32 s13, s17 10940; GFX90A-NEXT: ;;#ASMSTART 10941; GFX90A-NEXT: ; use s[8:15] 10942; GFX90A-NEXT: ;;#ASMEND 10943; GFX90A-NEXT: s_setpc_b64 s[30:31] 10944; 10945; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1: 10946; GFX940: ; %bb.0: 10947; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10948; GFX940-NEXT: ;;#ASMSTART 10949; GFX940-NEXT: ; def s[8:13] 10950; GFX940-NEXT: ;;#ASMEND 10951; GFX940-NEXT: ;;#ASMSTART 10952; GFX940-NEXT: ; def s[0:5] 10953; GFX940-NEXT: ;;#ASMEND 10954; GFX940-NEXT: s_mov_b32 s8, s12 10955; GFX940-NEXT: s_mov_b32 s9, s13 10956; GFX940-NEXT: s_mov_b32 s10, s12 10957; GFX940-NEXT: s_mov_b32 s11, s13 10958; GFX940-NEXT: s_mov_b32 s12, s4 10959; GFX940-NEXT: s_mov_b32 s13, s5 10960; GFX940-NEXT: s_mov_b32 s14, s2 10961; GFX940-NEXT: s_mov_b32 s15, s3 10962; GFX940-NEXT: ;;#ASMSTART 10963; GFX940-NEXT: ; use s[8:15] 10964; GFX940-NEXT: ;;#ASMEND 10965; GFX940-NEXT: s_setpc_b64 s[30:31] 10966 %vec0 = call <3 x i64> asm "; def $0", "=s"() 10967 %vec1 = call <3 x i64> asm "; def $0", "=s"() 10968 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1> 10969 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 10970 ret void 10971} 10972 10973define void @s_shuffle_v4i64_v3i64__5_5_3_1() { 10974; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1: 10975; GFX900: ; %bb.0: 10976; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10977; GFX900-NEXT: ;;#ASMSTART 10978; GFX900-NEXT: ; def s[4:9] 10979; GFX900-NEXT: ;;#ASMEND 10980; GFX900-NEXT: ;;#ASMSTART 10981; GFX900-NEXT: ; def s[12:17] 10982; GFX900-NEXT: ;;#ASMEND 10983; GFX900-NEXT: s_mov_b32 s8, s16 10984; GFX900-NEXT: s_mov_b32 s9, s17 10985; GFX900-NEXT: s_mov_b32 s10, s16 10986; GFX900-NEXT: s_mov_b32 s11, s17 10987; GFX900-NEXT: s_mov_b32 s14, s6 10988; GFX900-NEXT: s_mov_b32 s15, s7 10989; GFX900-NEXT: ;;#ASMSTART 10990; GFX900-NEXT: ; use s[8:15] 10991; GFX900-NEXT: ;;#ASMEND 10992; GFX900-NEXT: s_setpc_b64 s[30:31] 10993; 10994; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1: 10995; GFX90A: ; %bb.0: 10996; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10997; GFX90A-NEXT: ;;#ASMSTART 10998; GFX90A-NEXT: ; def s[4:9] 10999; GFX90A-NEXT: ;;#ASMEND 11000; GFX90A-NEXT: ;;#ASMSTART 11001; GFX90A-NEXT: ; def s[12:17] 11002; GFX90A-NEXT: ;;#ASMEND 11003; GFX90A-NEXT: s_mov_b32 s8, s16 11004; GFX90A-NEXT: s_mov_b32 s9, s17 11005; GFX90A-NEXT: s_mov_b32 s10, s16 11006; GFX90A-NEXT: s_mov_b32 s11, s17 11007; GFX90A-NEXT: s_mov_b32 s14, s6 11008; GFX90A-NEXT: s_mov_b32 s15, s7 11009; GFX90A-NEXT: ;;#ASMSTART 11010; GFX90A-NEXT: ; use s[8:15] 11011; GFX90A-NEXT: ;;#ASMEND 11012; GFX90A-NEXT: s_setpc_b64 s[30:31] 11013; 11014; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1: 11015; GFX940: ; %bb.0: 11016; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11017; GFX940-NEXT: ;;#ASMSTART 11018; GFX940-NEXT: ; def s[12:17] 11019; GFX940-NEXT: ;;#ASMEND 11020; GFX940-NEXT: ;;#ASMSTART 11021; GFX940-NEXT: ; def s[0:5] 11022; GFX940-NEXT: ;;#ASMEND 11023; GFX940-NEXT: s_mov_b32 s8, s16 11024; GFX940-NEXT: s_mov_b32 s9, s17 11025; GFX940-NEXT: s_mov_b32 s10, s16 11026; GFX940-NEXT: s_mov_b32 s11, s17 11027; GFX940-NEXT: s_mov_b32 s14, s2 11028; GFX940-NEXT: s_mov_b32 s15, s3 11029; GFX940-NEXT: ;;#ASMSTART 11030; GFX940-NEXT: ; use s[8:15] 11031; GFX940-NEXT: ;;#ASMEND 11032; GFX940-NEXT: s_setpc_b64 s[30:31] 11033 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11034 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11035 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1> 11036 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11037 ret void 11038} 11039 11040define void @s_shuffle_v4i64_v3i64__5_5_4_1() { 11041; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1: 11042; GFX900: ; %bb.0: 11043; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11044; GFX900-NEXT: ;;#ASMSTART 11045; GFX900-NEXT: ; def s[4:9] 11046; GFX900-NEXT: ;;#ASMEND 11047; GFX900-NEXT: ;;#ASMSTART 11048; GFX900-NEXT: ; def s[12:17] 11049; GFX900-NEXT: ;;#ASMEND 11050; GFX900-NEXT: s_mov_b32 s8, s16 11051; GFX900-NEXT: s_mov_b32 s9, s17 11052; GFX900-NEXT: s_mov_b32 s10, s16 11053; GFX900-NEXT: s_mov_b32 s11, s17 11054; GFX900-NEXT: s_mov_b32 s12, s14 11055; GFX900-NEXT: s_mov_b32 s13, s15 11056; GFX900-NEXT: s_mov_b32 s14, s6 11057; GFX900-NEXT: s_mov_b32 s15, s7 11058; GFX900-NEXT: ;;#ASMSTART 11059; GFX900-NEXT: ; use s[8:15] 11060; GFX900-NEXT: ;;#ASMEND 11061; GFX900-NEXT: s_setpc_b64 s[30:31] 11062; 11063; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1: 11064; GFX90A: ; %bb.0: 11065; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11066; GFX90A-NEXT: ;;#ASMSTART 11067; GFX90A-NEXT: ; def s[4:9] 11068; GFX90A-NEXT: ;;#ASMEND 11069; GFX90A-NEXT: ;;#ASMSTART 11070; GFX90A-NEXT: ; def s[12:17] 11071; GFX90A-NEXT: ;;#ASMEND 11072; GFX90A-NEXT: s_mov_b32 s8, s16 11073; GFX90A-NEXT: s_mov_b32 s9, s17 11074; GFX90A-NEXT: s_mov_b32 s10, s16 11075; GFX90A-NEXT: s_mov_b32 s11, s17 11076; GFX90A-NEXT: s_mov_b32 s12, s14 11077; GFX90A-NEXT: s_mov_b32 s13, s15 11078; GFX90A-NEXT: s_mov_b32 s14, s6 11079; GFX90A-NEXT: s_mov_b32 s15, s7 11080; GFX90A-NEXT: ;;#ASMSTART 11081; GFX90A-NEXT: ; use s[8:15] 11082; GFX90A-NEXT: ;;#ASMEND 11083; GFX90A-NEXT: s_setpc_b64 s[30:31] 11084; 11085; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1: 11086; GFX940: ; %bb.0: 11087; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11088; GFX940-NEXT: ;;#ASMSTART 11089; GFX940-NEXT: ; def s[12:17] 11090; GFX940-NEXT: ;;#ASMEND 11091; GFX940-NEXT: ;;#ASMSTART 11092; GFX940-NEXT: ; def s[0:5] 11093; GFX940-NEXT: ;;#ASMEND 11094; GFX940-NEXT: s_mov_b32 s8, s16 11095; GFX940-NEXT: s_mov_b32 s9, s17 11096; GFX940-NEXT: s_mov_b32 s10, s16 11097; GFX940-NEXT: s_mov_b32 s11, s17 11098; GFX940-NEXT: s_mov_b32 s12, s14 11099; GFX940-NEXT: s_mov_b32 s13, s15 11100; GFX940-NEXT: s_mov_b32 s14, s2 11101; GFX940-NEXT: s_mov_b32 s15, s3 11102; GFX940-NEXT: ;;#ASMSTART 11103; GFX940-NEXT: ; use s[8:15] 11104; GFX940-NEXT: ;;#ASMEND 11105; GFX940-NEXT: s_setpc_b64 s[30:31] 11106 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11107 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11108 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1> 11109 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11110 ret void 11111} 11112 11113define void @s_shuffle_v4i64_v3i64__u_2_2_2() { 11114; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2: 11115; GFX9: ; %bb.0: 11116; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11117; GFX9-NEXT: ;;#ASMSTART 11118; GFX9-NEXT: ; def s[8:13] 11119; GFX9-NEXT: ;;#ASMEND 11120; GFX9-NEXT: s_mov_b32 s10, s12 11121; GFX9-NEXT: s_mov_b32 s11, s13 11122; GFX9-NEXT: s_mov_b32 s14, s12 11123; GFX9-NEXT: s_mov_b32 s15, s13 11124; GFX9-NEXT: ;;#ASMSTART 11125; GFX9-NEXT: ; use s[8:15] 11126; GFX9-NEXT: ;;#ASMEND 11127; GFX9-NEXT: s_setpc_b64 s[30:31] 11128 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11129 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2> 11130 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11131 ret void 11132} 11133 11134define void @s_shuffle_v4i64_v3i64__0_2_2_2() { 11135; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2: 11136; GFX9: ; %bb.0: 11137; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11138; GFX9-NEXT: ;;#ASMSTART 11139; GFX9-NEXT: ; def s[8:13] 11140; GFX9-NEXT: ;;#ASMEND 11141; GFX9-NEXT: s_mov_b32 s10, s12 11142; GFX9-NEXT: s_mov_b32 s11, s13 11143; GFX9-NEXT: s_mov_b32 s14, s12 11144; GFX9-NEXT: s_mov_b32 s15, s13 11145; GFX9-NEXT: ;;#ASMSTART 11146; GFX9-NEXT: ; use s[8:15] 11147; GFX9-NEXT: ;;#ASMEND 11148; GFX9-NEXT: s_setpc_b64 s[30:31] 11149 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11150 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2> 11151 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11152 ret void 11153} 11154 11155define void @s_shuffle_v4i64_v3i64__1_2_2_2() { 11156; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2: 11157; GFX9: ; %bb.0: 11158; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11159; GFX9-NEXT: ;;#ASMSTART 11160; GFX9-NEXT: ; def s[8:13] 11161; GFX9-NEXT: ;;#ASMEND 11162; GFX9-NEXT: s_mov_b32 s8, s10 11163; GFX9-NEXT: s_mov_b32 s9, s11 11164; GFX9-NEXT: s_mov_b32 s10, s12 11165; GFX9-NEXT: s_mov_b32 s11, s13 11166; GFX9-NEXT: s_mov_b32 s14, s12 11167; GFX9-NEXT: s_mov_b32 s15, s13 11168; GFX9-NEXT: ;;#ASMSTART 11169; GFX9-NEXT: ; use s[8:15] 11170; GFX9-NEXT: ;;#ASMEND 11171; GFX9-NEXT: s_setpc_b64 s[30:31] 11172 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11173 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2> 11174 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11175 ret void 11176} 11177 11178define void @s_shuffle_v4i64_v3i64__2_2_2_2() { 11179; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2: 11180; GFX9: ; %bb.0: 11181; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11182; GFX9-NEXT: ;;#ASMSTART 11183; GFX9-NEXT: ; def s[8:13] 11184; GFX9-NEXT: ;;#ASMEND 11185; GFX9-NEXT: s_mov_b32 s8, s12 11186; GFX9-NEXT: s_mov_b32 s9, s13 11187; GFX9-NEXT: s_mov_b32 s10, s12 11188; GFX9-NEXT: s_mov_b32 s11, s13 11189; GFX9-NEXT: s_mov_b32 s14, s12 11190; GFX9-NEXT: s_mov_b32 s15, s13 11191; GFX9-NEXT: ;;#ASMSTART 11192; GFX9-NEXT: ; use s[8:15] 11193; GFX9-NEXT: ;;#ASMEND 11194; GFX9-NEXT: s_setpc_b64 s[30:31] 11195 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11196 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 11197 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11198 ret void 11199} 11200 11201define void @s_shuffle_v4i64_v3i64__3_2_2_2() { 11202; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2: 11203; GFX9: ; %bb.0: 11204; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11205; GFX9-NEXT: ;;#ASMSTART 11206; GFX9-NEXT: ; def s[8:13] 11207; GFX9-NEXT: ;;#ASMEND 11208; GFX9-NEXT: s_mov_b32 s10, s12 11209; GFX9-NEXT: s_mov_b32 s11, s13 11210; GFX9-NEXT: s_mov_b32 s14, s12 11211; GFX9-NEXT: s_mov_b32 s15, s13 11212; GFX9-NEXT: ;;#ASMSTART 11213; GFX9-NEXT: ; use s[8:15] 11214; GFX9-NEXT: ;;#ASMEND 11215; GFX9-NEXT: s_setpc_b64 s[30:31] 11216 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11217 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2> 11218 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11219 ret void 11220} 11221 11222define void @s_shuffle_v4i64_v3i64__4_2_2_2() { 11223; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2: 11224; GFX900: ; %bb.0: 11225; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11226; GFX900-NEXT: ;;#ASMSTART 11227; GFX900-NEXT: ; def s[8:13] 11228; GFX900-NEXT: ;;#ASMEND 11229; GFX900-NEXT: ;;#ASMSTART 11230; GFX900-NEXT: ; def s[4:9] 11231; GFX900-NEXT: ;;#ASMEND 11232; GFX900-NEXT: s_mov_b32 s8, s6 11233; GFX900-NEXT: s_mov_b32 s9, s7 11234; GFX900-NEXT: s_mov_b32 s10, s12 11235; GFX900-NEXT: s_mov_b32 s11, s13 11236; GFX900-NEXT: s_mov_b32 s14, s12 11237; GFX900-NEXT: s_mov_b32 s15, s13 11238; GFX900-NEXT: ;;#ASMSTART 11239; GFX900-NEXT: ; use s[8:15] 11240; GFX900-NEXT: ;;#ASMEND 11241; GFX900-NEXT: s_setpc_b64 s[30:31] 11242; 11243; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2: 11244; GFX90A: ; %bb.0: 11245; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11246; GFX90A-NEXT: ;;#ASMSTART 11247; GFX90A-NEXT: ; def s[8:13] 11248; GFX90A-NEXT: ;;#ASMEND 11249; GFX90A-NEXT: ;;#ASMSTART 11250; GFX90A-NEXT: ; def s[4:9] 11251; GFX90A-NEXT: ;;#ASMEND 11252; GFX90A-NEXT: s_mov_b32 s8, s6 11253; GFX90A-NEXT: s_mov_b32 s9, s7 11254; GFX90A-NEXT: s_mov_b32 s10, s12 11255; GFX90A-NEXT: s_mov_b32 s11, s13 11256; GFX90A-NEXT: s_mov_b32 s14, s12 11257; GFX90A-NEXT: s_mov_b32 s15, s13 11258; GFX90A-NEXT: ;;#ASMSTART 11259; GFX90A-NEXT: ; use s[8:15] 11260; GFX90A-NEXT: ;;#ASMEND 11261; GFX90A-NEXT: s_setpc_b64 s[30:31] 11262; 11263; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2: 11264; GFX940: ; %bb.0: 11265; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11266; GFX940-NEXT: ;;#ASMSTART 11267; GFX940-NEXT: ; def s[8:13] 11268; GFX940-NEXT: ;;#ASMEND 11269; GFX940-NEXT: ;;#ASMSTART 11270; GFX940-NEXT: ; def s[0:5] 11271; GFX940-NEXT: ;;#ASMEND 11272; GFX940-NEXT: s_mov_b32 s8, s2 11273; GFX940-NEXT: s_mov_b32 s9, s3 11274; GFX940-NEXT: s_mov_b32 s10, s12 11275; GFX940-NEXT: s_mov_b32 s11, s13 11276; GFX940-NEXT: s_mov_b32 s14, s12 11277; GFX940-NEXT: s_mov_b32 s15, s13 11278; GFX940-NEXT: ;;#ASMSTART 11279; GFX940-NEXT: ; use s[8:15] 11280; GFX940-NEXT: ;;#ASMEND 11281; GFX940-NEXT: s_setpc_b64 s[30:31] 11282 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11283 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11284 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2> 11285 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11286 ret void 11287} 11288 11289define void @s_shuffle_v4i64_v3i64__5_2_2_2() { 11290; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2: 11291; GFX900: ; %bb.0: 11292; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11293; GFX900-NEXT: ;;#ASMSTART 11294; GFX900-NEXT: ; def s[8:13] 11295; GFX900-NEXT: ;;#ASMEND 11296; GFX900-NEXT: ;;#ASMSTART 11297; GFX900-NEXT: ; def s[4:9] 11298; GFX900-NEXT: ;;#ASMEND 11299; GFX900-NEXT: s_mov_b32 s10, s12 11300; GFX900-NEXT: s_mov_b32 s11, s13 11301; GFX900-NEXT: s_mov_b32 s14, s12 11302; GFX900-NEXT: s_mov_b32 s15, s13 11303; GFX900-NEXT: ;;#ASMSTART 11304; GFX900-NEXT: ; use s[8:15] 11305; GFX900-NEXT: ;;#ASMEND 11306; GFX900-NEXT: s_setpc_b64 s[30:31] 11307; 11308; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2: 11309; GFX90A: ; %bb.0: 11310; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11311; GFX90A-NEXT: ;;#ASMSTART 11312; GFX90A-NEXT: ; def s[8:13] 11313; GFX90A-NEXT: ;;#ASMEND 11314; GFX90A-NEXT: ;;#ASMSTART 11315; GFX90A-NEXT: ; def s[4:9] 11316; GFX90A-NEXT: ;;#ASMEND 11317; GFX90A-NEXT: s_mov_b32 s10, s12 11318; GFX90A-NEXT: s_mov_b32 s11, s13 11319; GFX90A-NEXT: s_mov_b32 s14, s12 11320; GFX90A-NEXT: s_mov_b32 s15, s13 11321; GFX90A-NEXT: ;;#ASMSTART 11322; GFX90A-NEXT: ; use s[8:15] 11323; GFX90A-NEXT: ;;#ASMEND 11324; GFX90A-NEXT: s_setpc_b64 s[30:31] 11325; 11326; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2: 11327; GFX940: ; %bb.0: 11328; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11329; GFX940-NEXT: ;;#ASMSTART 11330; GFX940-NEXT: ; def s[8:13] 11331; GFX940-NEXT: ;;#ASMEND 11332; GFX940-NEXT: ;;#ASMSTART 11333; GFX940-NEXT: ; def s[0:5] 11334; GFX940-NEXT: ;;#ASMEND 11335; GFX940-NEXT: s_mov_b32 s8, s4 11336; GFX940-NEXT: s_mov_b32 s9, s5 11337; GFX940-NEXT: s_mov_b32 s10, s12 11338; GFX940-NEXT: s_mov_b32 s11, s13 11339; GFX940-NEXT: s_mov_b32 s14, s12 11340; GFX940-NEXT: s_mov_b32 s15, s13 11341; GFX940-NEXT: ;;#ASMSTART 11342; GFX940-NEXT: ; use s[8:15] 11343; GFX940-NEXT: ;;#ASMEND 11344; GFX940-NEXT: s_setpc_b64 s[30:31] 11345 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11346 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11347 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2> 11348 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11349 ret void 11350} 11351 11352define void @s_shuffle_v4i64_v3i64__5_u_2_2() { 11353; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2: 11354; GFX900: ; %bb.0: 11355; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11356; GFX900-NEXT: ;;#ASMSTART 11357; GFX900-NEXT: ; def s[8:13] 11358; GFX900-NEXT: ;;#ASMEND 11359; GFX900-NEXT: ;;#ASMSTART 11360; GFX900-NEXT: ; def s[4:9] 11361; GFX900-NEXT: ;;#ASMEND 11362; GFX900-NEXT: s_mov_b32 s14, s12 11363; GFX900-NEXT: s_mov_b32 s15, s13 11364; GFX900-NEXT: ;;#ASMSTART 11365; GFX900-NEXT: ; use s[8:15] 11366; GFX900-NEXT: ;;#ASMEND 11367; GFX900-NEXT: s_setpc_b64 s[30:31] 11368; 11369; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2: 11370; GFX90A: ; %bb.0: 11371; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11372; GFX90A-NEXT: ;;#ASMSTART 11373; GFX90A-NEXT: ; def s[8:13] 11374; GFX90A-NEXT: ;;#ASMEND 11375; GFX90A-NEXT: ;;#ASMSTART 11376; GFX90A-NEXT: ; def s[4:9] 11377; GFX90A-NEXT: ;;#ASMEND 11378; GFX90A-NEXT: s_mov_b32 s14, s12 11379; GFX90A-NEXT: s_mov_b32 s15, s13 11380; GFX90A-NEXT: ;;#ASMSTART 11381; GFX90A-NEXT: ; use s[8:15] 11382; GFX90A-NEXT: ;;#ASMEND 11383; GFX90A-NEXT: s_setpc_b64 s[30:31] 11384; 11385; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2: 11386; GFX940: ; %bb.0: 11387; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11388; GFX940-NEXT: ;;#ASMSTART 11389; GFX940-NEXT: ; def s[8:13] 11390; GFX940-NEXT: ;;#ASMEND 11391; GFX940-NEXT: ;;#ASMSTART 11392; GFX940-NEXT: ; def s[0:5] 11393; GFX940-NEXT: ;;#ASMEND 11394; GFX940-NEXT: s_mov_b32 s8, s4 11395; GFX940-NEXT: s_mov_b32 s9, s5 11396; GFX940-NEXT: s_mov_b32 s14, s12 11397; GFX940-NEXT: s_mov_b32 s15, s13 11398; GFX940-NEXT: ;;#ASMSTART 11399; GFX940-NEXT: ; use s[8:15] 11400; GFX940-NEXT: ;;#ASMEND 11401; GFX940-NEXT: s_setpc_b64 s[30:31] 11402 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11403 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11404 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2> 11405 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11406 ret void 11407} 11408 11409define void @s_shuffle_v4i64_v3i64__5_0_2_2() { 11410; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2: 11411; GFX900: ; %bb.0: 11412; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11413; GFX900-NEXT: ;;#ASMSTART 11414; GFX900-NEXT: ; def s[12:17] 11415; GFX900-NEXT: ;;#ASMEND 11416; GFX900-NEXT: ;;#ASMSTART 11417; GFX900-NEXT: ; def s[4:9] 11418; GFX900-NEXT: ;;#ASMEND 11419; GFX900-NEXT: s_mov_b32 s10, s12 11420; GFX900-NEXT: s_mov_b32 s11, s13 11421; GFX900-NEXT: s_mov_b32 s12, s16 11422; GFX900-NEXT: s_mov_b32 s13, s17 11423; GFX900-NEXT: s_mov_b32 s14, s16 11424; GFX900-NEXT: s_mov_b32 s15, s17 11425; GFX900-NEXT: ;;#ASMSTART 11426; GFX900-NEXT: ; use s[8:15] 11427; GFX900-NEXT: ;;#ASMEND 11428; GFX900-NEXT: s_setpc_b64 s[30:31] 11429; 11430; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2: 11431; GFX90A: ; %bb.0: 11432; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11433; GFX90A-NEXT: ;;#ASMSTART 11434; GFX90A-NEXT: ; def s[12:17] 11435; GFX90A-NEXT: ;;#ASMEND 11436; GFX90A-NEXT: ;;#ASMSTART 11437; GFX90A-NEXT: ; def s[4:9] 11438; GFX90A-NEXT: ;;#ASMEND 11439; GFX90A-NEXT: s_mov_b32 s10, s12 11440; GFX90A-NEXT: s_mov_b32 s11, s13 11441; GFX90A-NEXT: s_mov_b32 s12, s16 11442; GFX90A-NEXT: s_mov_b32 s13, s17 11443; GFX90A-NEXT: s_mov_b32 s14, s16 11444; GFX90A-NEXT: s_mov_b32 s15, s17 11445; GFX90A-NEXT: ;;#ASMSTART 11446; GFX90A-NEXT: ; use s[8:15] 11447; GFX90A-NEXT: ;;#ASMEND 11448; GFX90A-NEXT: s_setpc_b64 s[30:31] 11449; 11450; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2: 11451; GFX940: ; %bb.0: 11452; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11453; GFX940-NEXT: ;;#ASMSTART 11454; GFX940-NEXT: ; def s[8:13] 11455; GFX940-NEXT: ;;#ASMEND 11456; GFX940-NEXT: ;;#ASMSTART 11457; GFX940-NEXT: ; def s[0:5] 11458; GFX940-NEXT: ;;#ASMEND 11459; GFX940-NEXT: s_mov_b32 s8, s12 11460; GFX940-NEXT: s_mov_b32 s9, s13 11461; GFX940-NEXT: s_mov_b32 s10, s0 11462; GFX940-NEXT: s_mov_b32 s11, s1 11463; GFX940-NEXT: s_mov_b32 s12, s4 11464; GFX940-NEXT: s_mov_b32 s13, s5 11465; GFX940-NEXT: s_mov_b32 s14, s4 11466; GFX940-NEXT: s_mov_b32 s15, s5 11467; GFX940-NEXT: ;;#ASMSTART 11468; GFX940-NEXT: ; use s[8:15] 11469; GFX940-NEXT: ;;#ASMEND 11470; GFX940-NEXT: s_setpc_b64 s[30:31] 11471 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11472 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11473 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2> 11474 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11475 ret void 11476} 11477 11478define void @s_shuffle_v4i64_v3i64__5_1_2_2() { 11479; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2: 11480; GFX900: ; %bb.0: 11481; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11482; GFX900-NEXT: ;;#ASMSTART 11483; GFX900-NEXT: ; def s[8:13] 11484; GFX900-NEXT: ;;#ASMEND 11485; GFX900-NEXT: ;;#ASMSTART 11486; GFX900-NEXT: ; def s[4:9] 11487; GFX900-NEXT: ;;#ASMEND 11488; GFX900-NEXT: s_mov_b32 s14, s12 11489; GFX900-NEXT: s_mov_b32 s15, s13 11490; GFX900-NEXT: ;;#ASMSTART 11491; GFX900-NEXT: ; use s[8:15] 11492; GFX900-NEXT: ;;#ASMEND 11493; GFX900-NEXT: s_setpc_b64 s[30:31] 11494; 11495; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2: 11496; GFX90A: ; %bb.0: 11497; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11498; GFX90A-NEXT: ;;#ASMSTART 11499; GFX90A-NEXT: ; def s[8:13] 11500; GFX90A-NEXT: ;;#ASMEND 11501; GFX90A-NEXT: ;;#ASMSTART 11502; GFX90A-NEXT: ; def s[4:9] 11503; GFX90A-NEXT: ;;#ASMEND 11504; GFX90A-NEXT: s_mov_b32 s14, s12 11505; GFX90A-NEXT: s_mov_b32 s15, s13 11506; GFX90A-NEXT: ;;#ASMSTART 11507; GFX90A-NEXT: ; use s[8:15] 11508; GFX90A-NEXT: ;;#ASMEND 11509; GFX90A-NEXT: s_setpc_b64 s[30:31] 11510; 11511; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2: 11512; GFX940: ; %bb.0: 11513; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11514; GFX940-NEXT: ;;#ASMSTART 11515; GFX940-NEXT: ; def s[8:13] 11516; GFX940-NEXT: ;;#ASMEND 11517; GFX940-NEXT: ;;#ASMSTART 11518; GFX940-NEXT: ; def s[0:5] 11519; GFX940-NEXT: ;;#ASMEND 11520; GFX940-NEXT: s_mov_b32 s8, s4 11521; GFX940-NEXT: s_mov_b32 s9, s5 11522; GFX940-NEXT: s_mov_b32 s14, s12 11523; GFX940-NEXT: s_mov_b32 s15, s13 11524; GFX940-NEXT: ;;#ASMSTART 11525; GFX940-NEXT: ; use s[8:15] 11526; GFX940-NEXT: ;;#ASMEND 11527; GFX940-NEXT: s_setpc_b64 s[30:31] 11528 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11529 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11530 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2> 11531 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11532 ret void 11533} 11534 11535define void @s_shuffle_v4i64_v3i64__5_3_2_2() { 11536; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2: 11537; GFX900: ; %bb.0: 11538; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11539; GFX900-NEXT: ;;#ASMSTART 11540; GFX900-NEXT: ; def s[8:13] 11541; GFX900-NEXT: ;;#ASMEND 11542; GFX900-NEXT: ;;#ASMSTART 11543; GFX900-NEXT: ; def s[4:9] 11544; GFX900-NEXT: ;;#ASMEND 11545; GFX900-NEXT: s_mov_b32 s10, s4 11546; GFX900-NEXT: s_mov_b32 s11, s5 11547; GFX900-NEXT: s_mov_b32 s14, s12 11548; GFX900-NEXT: s_mov_b32 s15, s13 11549; GFX900-NEXT: ;;#ASMSTART 11550; GFX900-NEXT: ; use s[8:15] 11551; GFX900-NEXT: ;;#ASMEND 11552; GFX900-NEXT: s_setpc_b64 s[30:31] 11553; 11554; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2: 11555; GFX90A: ; %bb.0: 11556; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11557; GFX90A-NEXT: ;;#ASMSTART 11558; GFX90A-NEXT: ; def s[8:13] 11559; GFX90A-NEXT: ;;#ASMEND 11560; GFX90A-NEXT: ;;#ASMSTART 11561; GFX90A-NEXT: ; def s[4:9] 11562; GFX90A-NEXT: ;;#ASMEND 11563; GFX90A-NEXT: s_mov_b32 s10, s4 11564; GFX90A-NEXT: s_mov_b32 s11, s5 11565; GFX90A-NEXT: s_mov_b32 s14, s12 11566; GFX90A-NEXT: s_mov_b32 s15, s13 11567; GFX90A-NEXT: ;;#ASMSTART 11568; GFX90A-NEXT: ; use s[8:15] 11569; GFX90A-NEXT: ;;#ASMEND 11570; GFX90A-NEXT: s_setpc_b64 s[30:31] 11571; 11572; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2: 11573; GFX940: ; %bb.0: 11574; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11575; GFX940-NEXT: ;;#ASMSTART 11576; GFX940-NEXT: ; def s[8:13] 11577; GFX940-NEXT: ;;#ASMEND 11578; GFX940-NEXT: ;;#ASMSTART 11579; GFX940-NEXT: ; def s[0:5] 11580; GFX940-NEXT: ;;#ASMEND 11581; GFX940-NEXT: s_mov_b32 s8, s4 11582; GFX940-NEXT: s_mov_b32 s9, s5 11583; GFX940-NEXT: s_mov_b32 s10, s0 11584; GFX940-NEXT: s_mov_b32 s11, s1 11585; GFX940-NEXT: s_mov_b32 s14, s12 11586; GFX940-NEXT: s_mov_b32 s15, s13 11587; GFX940-NEXT: ;;#ASMSTART 11588; GFX940-NEXT: ; use s[8:15] 11589; GFX940-NEXT: ;;#ASMEND 11590; GFX940-NEXT: s_setpc_b64 s[30:31] 11591 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11592 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11593 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2> 11594 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11595 ret void 11596} 11597 11598define void @s_shuffle_v4i64_v3i64__5_4_2_2() { 11599; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2: 11600; GFX900: ; %bb.0: 11601; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11602; GFX900-NEXT: ;;#ASMSTART 11603; GFX900-NEXT: ; def s[12:17] 11604; GFX900-NEXT: ;;#ASMEND 11605; GFX900-NEXT: ;;#ASMSTART 11606; GFX900-NEXT: ; def s[8:13] 11607; GFX900-NEXT: ;;#ASMEND 11608; GFX900-NEXT: s_mov_b32 s8, s12 11609; GFX900-NEXT: s_mov_b32 s9, s13 11610; GFX900-NEXT: s_mov_b32 s12, s16 11611; GFX900-NEXT: s_mov_b32 s13, s17 11612; GFX900-NEXT: s_mov_b32 s14, s16 11613; GFX900-NEXT: s_mov_b32 s15, s17 11614; GFX900-NEXT: ;;#ASMSTART 11615; GFX900-NEXT: ; use s[8:15] 11616; GFX900-NEXT: ;;#ASMEND 11617; GFX900-NEXT: s_setpc_b64 s[30:31] 11618; 11619; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2: 11620; GFX90A: ; %bb.0: 11621; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11622; GFX90A-NEXT: ;;#ASMSTART 11623; GFX90A-NEXT: ; def s[12:17] 11624; GFX90A-NEXT: ;;#ASMEND 11625; GFX90A-NEXT: ;;#ASMSTART 11626; GFX90A-NEXT: ; def s[8:13] 11627; GFX90A-NEXT: ;;#ASMEND 11628; GFX90A-NEXT: s_mov_b32 s8, s12 11629; GFX90A-NEXT: s_mov_b32 s9, s13 11630; GFX90A-NEXT: s_mov_b32 s12, s16 11631; GFX90A-NEXT: s_mov_b32 s13, s17 11632; GFX90A-NEXT: s_mov_b32 s14, s16 11633; GFX90A-NEXT: s_mov_b32 s15, s17 11634; GFX90A-NEXT: ;;#ASMSTART 11635; GFX90A-NEXT: ; use s[8:15] 11636; GFX90A-NEXT: ;;#ASMEND 11637; GFX90A-NEXT: s_setpc_b64 s[30:31] 11638; 11639; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2: 11640; GFX940: ; %bb.0: 11641; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11642; GFX940-NEXT: ;;#ASMSTART 11643; GFX940-NEXT: ; def s[8:13] 11644; GFX940-NEXT: ;;#ASMEND 11645; GFX940-NEXT: ;;#ASMSTART 11646; GFX940-NEXT: ; def s[0:5] 11647; GFX940-NEXT: ;;#ASMEND 11648; GFX940-NEXT: s_mov_b32 s8, s12 11649; GFX940-NEXT: s_mov_b32 s9, s13 11650; GFX940-NEXT: s_mov_b32 s12, s4 11651; GFX940-NEXT: s_mov_b32 s13, s5 11652; GFX940-NEXT: s_mov_b32 s14, s4 11653; GFX940-NEXT: s_mov_b32 s15, s5 11654; GFX940-NEXT: ;;#ASMSTART 11655; GFX940-NEXT: ; use s[8:15] 11656; GFX940-NEXT: ;;#ASMEND 11657; GFX940-NEXT: s_setpc_b64 s[30:31] 11658 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11659 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11660 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2> 11661 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11662 ret void 11663} 11664 11665define void @s_shuffle_v4i64_v3i64__5_5_2_2() { 11666; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2: 11667; GFX900: ; %bb.0: 11668; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11669; GFX900-NEXT: ;;#ASMSTART 11670; GFX900-NEXT: ; def s[8:13] 11671; GFX900-NEXT: ;;#ASMEND 11672; GFX900-NEXT: ;;#ASMSTART 11673; GFX900-NEXT: ; def s[16:21] 11674; GFX900-NEXT: ;;#ASMEND 11675; GFX900-NEXT: s_mov_b32 s8, s20 11676; GFX900-NEXT: s_mov_b32 s9, s21 11677; GFX900-NEXT: s_mov_b32 s10, s20 11678; GFX900-NEXT: s_mov_b32 s11, s21 11679; GFX900-NEXT: s_mov_b32 s14, s12 11680; GFX900-NEXT: s_mov_b32 s15, s13 11681; GFX900-NEXT: ;;#ASMSTART 11682; GFX900-NEXT: ; use s[8:15] 11683; GFX900-NEXT: ;;#ASMEND 11684; GFX900-NEXT: s_setpc_b64 s[30:31] 11685; 11686; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2: 11687; GFX90A: ; %bb.0: 11688; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11689; GFX90A-NEXT: ;;#ASMSTART 11690; GFX90A-NEXT: ; def s[8:13] 11691; GFX90A-NEXT: ;;#ASMEND 11692; GFX90A-NEXT: ;;#ASMSTART 11693; GFX90A-NEXT: ; def s[16:21] 11694; GFX90A-NEXT: ;;#ASMEND 11695; GFX90A-NEXT: s_mov_b32 s8, s20 11696; GFX90A-NEXT: s_mov_b32 s9, s21 11697; GFX90A-NEXT: s_mov_b32 s10, s20 11698; GFX90A-NEXT: s_mov_b32 s11, s21 11699; GFX90A-NEXT: s_mov_b32 s14, s12 11700; GFX90A-NEXT: s_mov_b32 s15, s13 11701; GFX90A-NEXT: ;;#ASMSTART 11702; GFX90A-NEXT: ; use s[8:15] 11703; GFX90A-NEXT: ;;#ASMEND 11704; GFX90A-NEXT: s_setpc_b64 s[30:31] 11705; 11706; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2: 11707; GFX940: ; %bb.0: 11708; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11709; GFX940-NEXT: ;;#ASMSTART 11710; GFX940-NEXT: ; def s[8:13] 11711; GFX940-NEXT: ;;#ASMEND 11712; GFX940-NEXT: ;;#ASMSTART 11713; GFX940-NEXT: ; def s[0:5] 11714; GFX940-NEXT: ;;#ASMEND 11715; GFX940-NEXT: s_mov_b32 s8, s4 11716; GFX940-NEXT: s_mov_b32 s9, s5 11717; GFX940-NEXT: s_mov_b32 s10, s4 11718; GFX940-NEXT: s_mov_b32 s11, s5 11719; GFX940-NEXT: s_mov_b32 s14, s12 11720; GFX940-NEXT: s_mov_b32 s15, s13 11721; GFX940-NEXT: ;;#ASMSTART 11722; GFX940-NEXT: ; use s[8:15] 11723; GFX940-NEXT: ;;#ASMEND 11724; GFX940-NEXT: s_setpc_b64 s[30:31] 11725 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11726 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11727 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2> 11728 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11729 ret void 11730} 11731 11732define void @s_shuffle_v4i64_v3i64__5_5_u_2() { 11733; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2: 11734; GFX900: ; %bb.0: 11735; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11736; GFX900-NEXT: ;;#ASMSTART 11737; GFX900-NEXT: ; def s[8:13] 11738; GFX900-NEXT: ;;#ASMEND 11739; GFX900-NEXT: ;;#ASMSTART 11740; GFX900-NEXT: ; def s[16:21] 11741; GFX900-NEXT: ;;#ASMEND 11742; GFX900-NEXT: s_mov_b32 s8, s20 11743; GFX900-NEXT: s_mov_b32 s9, s21 11744; GFX900-NEXT: s_mov_b32 s10, s20 11745; GFX900-NEXT: s_mov_b32 s11, s21 11746; GFX900-NEXT: s_mov_b32 s14, s12 11747; GFX900-NEXT: s_mov_b32 s15, s13 11748; GFX900-NEXT: ;;#ASMSTART 11749; GFX900-NEXT: ; use s[8:15] 11750; GFX900-NEXT: ;;#ASMEND 11751; GFX900-NEXT: s_setpc_b64 s[30:31] 11752; 11753; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2: 11754; GFX90A: ; %bb.0: 11755; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11756; GFX90A-NEXT: ;;#ASMSTART 11757; GFX90A-NEXT: ; def s[8:13] 11758; GFX90A-NEXT: ;;#ASMEND 11759; GFX90A-NEXT: ;;#ASMSTART 11760; GFX90A-NEXT: ; def s[16:21] 11761; GFX90A-NEXT: ;;#ASMEND 11762; GFX90A-NEXT: s_mov_b32 s8, s20 11763; GFX90A-NEXT: s_mov_b32 s9, s21 11764; GFX90A-NEXT: s_mov_b32 s10, s20 11765; GFX90A-NEXT: s_mov_b32 s11, s21 11766; GFX90A-NEXT: s_mov_b32 s14, s12 11767; GFX90A-NEXT: s_mov_b32 s15, s13 11768; GFX90A-NEXT: ;;#ASMSTART 11769; GFX90A-NEXT: ; use s[8:15] 11770; GFX90A-NEXT: ;;#ASMEND 11771; GFX90A-NEXT: s_setpc_b64 s[30:31] 11772; 11773; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2: 11774; GFX940: ; %bb.0: 11775; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11776; GFX940-NEXT: ;;#ASMSTART 11777; GFX940-NEXT: ; def s[8:13] 11778; GFX940-NEXT: ;;#ASMEND 11779; GFX940-NEXT: ;;#ASMSTART 11780; GFX940-NEXT: ; def s[0:5] 11781; GFX940-NEXT: ;;#ASMEND 11782; GFX940-NEXT: s_mov_b32 s8, s12 11783; GFX940-NEXT: s_mov_b32 s9, s13 11784; GFX940-NEXT: s_mov_b32 s10, s12 11785; GFX940-NEXT: s_mov_b32 s11, s13 11786; GFX940-NEXT: s_mov_b32 s14, s4 11787; GFX940-NEXT: s_mov_b32 s15, s5 11788; GFX940-NEXT: ;;#ASMSTART 11789; GFX940-NEXT: ; use s[8:15] 11790; GFX940-NEXT: ;;#ASMEND 11791; GFX940-NEXT: s_setpc_b64 s[30:31] 11792 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11793 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11794 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2> 11795 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11796 ret void 11797} 11798 11799define void @s_shuffle_v4i64_v3i64__5_5_0_2() { 11800; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2: 11801; GFX900: ; %bb.0: 11802; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11803; GFX900-NEXT: ;;#ASMSTART 11804; GFX900-NEXT: ; def s[12:17] 11805; GFX900-NEXT: ;;#ASMEND 11806; GFX900-NEXT: ;;#ASMSTART 11807; GFX900-NEXT: ; def s[20:25] 11808; GFX900-NEXT: ;;#ASMEND 11809; GFX900-NEXT: s_mov_b32 s8, s24 11810; GFX900-NEXT: s_mov_b32 s9, s25 11811; GFX900-NEXT: s_mov_b32 s10, s24 11812; GFX900-NEXT: s_mov_b32 s11, s25 11813; GFX900-NEXT: s_mov_b32 s14, s16 11814; GFX900-NEXT: s_mov_b32 s15, s17 11815; GFX900-NEXT: ;;#ASMSTART 11816; GFX900-NEXT: ; use s[8:15] 11817; GFX900-NEXT: ;;#ASMEND 11818; GFX900-NEXT: s_setpc_b64 s[30:31] 11819; 11820; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2: 11821; GFX90A: ; %bb.0: 11822; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11823; GFX90A-NEXT: ;;#ASMSTART 11824; GFX90A-NEXT: ; def s[12:17] 11825; GFX90A-NEXT: ;;#ASMEND 11826; GFX90A-NEXT: ;;#ASMSTART 11827; GFX90A-NEXT: ; def s[20:25] 11828; GFX90A-NEXT: ;;#ASMEND 11829; GFX90A-NEXT: s_mov_b32 s8, s24 11830; GFX90A-NEXT: s_mov_b32 s9, s25 11831; GFX90A-NEXT: s_mov_b32 s10, s24 11832; GFX90A-NEXT: s_mov_b32 s11, s25 11833; GFX90A-NEXT: s_mov_b32 s14, s16 11834; GFX90A-NEXT: s_mov_b32 s15, s17 11835; GFX90A-NEXT: ;;#ASMSTART 11836; GFX90A-NEXT: ; use s[8:15] 11837; GFX90A-NEXT: ;;#ASMEND 11838; GFX90A-NEXT: s_setpc_b64 s[30:31] 11839; 11840; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2: 11841; GFX940: ; %bb.0: 11842; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11843; GFX940-NEXT: ;;#ASMSTART 11844; GFX940-NEXT: ; def s[8:13] 11845; GFX940-NEXT: ;;#ASMEND 11846; GFX940-NEXT: ;;#ASMSTART 11847; GFX940-NEXT: ; def s[0:5] 11848; GFX940-NEXT: ;;#ASMEND 11849; GFX940-NEXT: s_mov_b32 s8, s12 11850; GFX940-NEXT: s_mov_b32 s9, s13 11851; GFX940-NEXT: s_mov_b32 s10, s12 11852; GFX940-NEXT: s_mov_b32 s11, s13 11853; GFX940-NEXT: s_mov_b32 s12, s0 11854; GFX940-NEXT: s_mov_b32 s13, s1 11855; GFX940-NEXT: s_mov_b32 s14, s4 11856; GFX940-NEXT: s_mov_b32 s15, s5 11857; GFX940-NEXT: ;;#ASMSTART 11858; GFX940-NEXT: ; use s[8:15] 11859; GFX940-NEXT: ;;#ASMEND 11860; GFX940-NEXT: s_setpc_b64 s[30:31] 11861 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11862 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11863 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2> 11864 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11865 ret void 11866} 11867 11868define void @s_shuffle_v4i64_v3i64__5_5_1_2() { 11869; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2: 11870; GFX900: ; %bb.0: 11871; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11872; GFX900-NEXT: ;;#ASMSTART 11873; GFX900-NEXT: ; def s[12:17] 11874; GFX900-NEXT: ;;#ASMEND 11875; GFX900-NEXT: ;;#ASMSTART 11876; GFX900-NEXT: ; def s[8:13] 11877; GFX900-NEXT: ;;#ASMEND 11878; GFX900-NEXT: s_mov_b32 s8, s12 11879; GFX900-NEXT: s_mov_b32 s9, s13 11880; GFX900-NEXT: s_mov_b32 s10, s12 11881; GFX900-NEXT: s_mov_b32 s11, s13 11882; GFX900-NEXT: s_mov_b32 s12, s14 11883; GFX900-NEXT: s_mov_b32 s13, s15 11884; GFX900-NEXT: s_mov_b32 s14, s16 11885; GFX900-NEXT: s_mov_b32 s15, s17 11886; GFX900-NEXT: ;;#ASMSTART 11887; GFX900-NEXT: ; use s[8:15] 11888; GFX900-NEXT: ;;#ASMEND 11889; GFX900-NEXT: s_setpc_b64 s[30:31] 11890; 11891; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2: 11892; GFX90A: ; %bb.0: 11893; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11894; GFX90A-NEXT: ;;#ASMSTART 11895; GFX90A-NEXT: ; def s[12:17] 11896; GFX90A-NEXT: ;;#ASMEND 11897; GFX90A-NEXT: ;;#ASMSTART 11898; GFX90A-NEXT: ; def s[8:13] 11899; GFX90A-NEXT: ;;#ASMEND 11900; GFX90A-NEXT: s_mov_b32 s8, s12 11901; GFX90A-NEXT: s_mov_b32 s9, s13 11902; GFX90A-NEXT: s_mov_b32 s10, s12 11903; GFX90A-NEXT: s_mov_b32 s11, s13 11904; GFX90A-NEXT: s_mov_b32 s12, s14 11905; GFX90A-NEXT: s_mov_b32 s13, s15 11906; GFX90A-NEXT: s_mov_b32 s14, s16 11907; GFX90A-NEXT: s_mov_b32 s15, s17 11908; GFX90A-NEXT: ;;#ASMSTART 11909; GFX90A-NEXT: ; use s[8:15] 11910; GFX90A-NEXT: ;;#ASMEND 11911; GFX90A-NEXT: s_setpc_b64 s[30:31] 11912; 11913; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2: 11914; GFX940: ; %bb.0: 11915; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11916; GFX940-NEXT: ;;#ASMSTART 11917; GFX940-NEXT: ; def s[8:13] 11918; GFX940-NEXT: ;;#ASMEND 11919; GFX940-NEXT: ;;#ASMSTART 11920; GFX940-NEXT: ; def s[0:5] 11921; GFX940-NEXT: ;;#ASMEND 11922; GFX940-NEXT: s_mov_b32 s8, s12 11923; GFX940-NEXT: s_mov_b32 s9, s13 11924; GFX940-NEXT: s_mov_b32 s10, s12 11925; GFX940-NEXT: s_mov_b32 s11, s13 11926; GFX940-NEXT: s_mov_b32 s12, s2 11927; GFX940-NEXT: s_mov_b32 s13, s3 11928; GFX940-NEXT: s_mov_b32 s14, s4 11929; GFX940-NEXT: s_mov_b32 s15, s5 11930; GFX940-NEXT: ;;#ASMSTART 11931; GFX940-NEXT: ; use s[8:15] 11932; GFX940-NEXT: ;;#ASMEND 11933; GFX940-NEXT: s_setpc_b64 s[30:31] 11934 %vec0 = call <3 x i64> asm "; def $0", "=s"() 11935 %vec1 = call <3 x i64> asm "; def $0", "=s"() 11936 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2> 11937 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 11938 ret void 11939} 11940 11941define void @s_shuffle_v4i64_v3i64__5_5_3_2() { 11942; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2: 11943; GFX900: ; %bb.0: 11944; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11945; GFX900-NEXT: ;;#ASMSTART 11946; GFX900-NEXT: ; def s[12:17] 11947; GFX900-NEXT: ;;#ASMEND 11948; GFX900-NEXT: ;;#ASMSTART 11949; GFX900-NEXT: ; def s[20:25] 11950; GFX900-NEXT: ;;#ASMEND 11951; GFX900-NEXT: s_mov_b32 s8, s24 11952; GFX900-NEXT: s_mov_b32 s9, s25 11953; GFX900-NEXT: s_mov_b32 s10, s24 11954; GFX900-NEXT: s_mov_b32 s11, s25 11955; GFX900-NEXT: s_mov_b32 s12, s20 11956; GFX900-NEXT: s_mov_b32 s13, s21 11957; GFX900-NEXT: s_mov_b32 s14, s16 11958; GFX900-NEXT: s_mov_b32 s15, s17 11959; GFX900-NEXT: ;;#ASMSTART 11960; GFX900-NEXT: ; use s[8:15] 11961; GFX900-NEXT: ;;#ASMEND 11962; GFX900-NEXT: s_setpc_b64 s[30:31] 11963; 11964; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2: 11965; GFX90A: ; %bb.0: 11966; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11967; GFX90A-NEXT: ;;#ASMSTART 11968; GFX90A-NEXT: ; def s[12:17] 11969; GFX90A-NEXT: ;;#ASMEND 11970; GFX90A-NEXT: ;;#ASMSTART 11971; GFX90A-NEXT: ; def s[20:25] 11972; GFX90A-NEXT: ;;#ASMEND 11973; GFX90A-NEXT: s_mov_b32 s8, s24 11974; GFX90A-NEXT: s_mov_b32 s9, s25 11975; GFX90A-NEXT: s_mov_b32 s10, s24 11976; GFX90A-NEXT: s_mov_b32 s11, s25 11977; GFX90A-NEXT: s_mov_b32 s12, s20 11978; GFX90A-NEXT: s_mov_b32 s13, s21 11979; GFX90A-NEXT: s_mov_b32 s14, s16 11980; GFX90A-NEXT: s_mov_b32 s15, s17 11981; GFX90A-NEXT: ;;#ASMSTART 11982; GFX90A-NEXT: ; use s[8:15] 11983; GFX90A-NEXT: ;;#ASMEND 11984; GFX90A-NEXT: s_setpc_b64 s[30:31] 11985; 11986; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2: 11987; GFX940: ; %bb.0: 11988; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11989; GFX940-NEXT: ;;#ASMSTART 11990; GFX940-NEXT: ; def s[12:17] 11991; GFX940-NEXT: ;;#ASMEND 11992; GFX940-NEXT: ;;#ASMSTART 11993; GFX940-NEXT: ; def s[0:5] 11994; GFX940-NEXT: ;;#ASMEND 11995; GFX940-NEXT: s_mov_b32 s8, s16 11996; GFX940-NEXT: s_mov_b32 s9, s17 11997; GFX940-NEXT: s_mov_b32 s10, s16 11998; GFX940-NEXT: s_mov_b32 s11, s17 11999; GFX940-NEXT: s_mov_b32 s14, s4 12000; GFX940-NEXT: s_mov_b32 s15, s5 12001; GFX940-NEXT: ;;#ASMSTART 12002; GFX940-NEXT: ; use s[8:15] 12003; GFX940-NEXT: ;;#ASMEND 12004; GFX940-NEXT: s_setpc_b64 s[30:31] 12005 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12006 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12007 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2> 12008 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12009 ret void 12010} 12011 12012define void @s_shuffle_v4i64_v3i64__5_5_4_2() { 12013; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2: 12014; GFX900: ; %bb.0: 12015; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12016; GFX900-NEXT: ;;#ASMSTART 12017; GFX900-NEXT: ; def s[12:17] 12018; GFX900-NEXT: ;;#ASMEND 12019; GFX900-NEXT: ;;#ASMSTART 12020; GFX900-NEXT: ; def s[20:25] 12021; GFX900-NEXT: ;;#ASMEND 12022; GFX900-NEXT: s_mov_b32 s8, s24 12023; GFX900-NEXT: s_mov_b32 s9, s25 12024; GFX900-NEXT: s_mov_b32 s10, s24 12025; GFX900-NEXT: s_mov_b32 s11, s25 12026; GFX900-NEXT: s_mov_b32 s12, s22 12027; GFX900-NEXT: s_mov_b32 s13, s23 12028; GFX900-NEXT: s_mov_b32 s14, s16 12029; GFX900-NEXT: s_mov_b32 s15, s17 12030; GFX900-NEXT: ;;#ASMSTART 12031; GFX900-NEXT: ; use s[8:15] 12032; GFX900-NEXT: ;;#ASMEND 12033; GFX900-NEXT: s_setpc_b64 s[30:31] 12034; 12035; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2: 12036; GFX90A: ; %bb.0: 12037; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12038; GFX90A-NEXT: ;;#ASMSTART 12039; GFX90A-NEXT: ; def s[12:17] 12040; GFX90A-NEXT: ;;#ASMEND 12041; GFX90A-NEXT: ;;#ASMSTART 12042; GFX90A-NEXT: ; def s[20:25] 12043; GFX90A-NEXT: ;;#ASMEND 12044; GFX90A-NEXT: s_mov_b32 s8, s24 12045; GFX90A-NEXT: s_mov_b32 s9, s25 12046; GFX90A-NEXT: s_mov_b32 s10, s24 12047; GFX90A-NEXT: s_mov_b32 s11, s25 12048; GFX90A-NEXT: s_mov_b32 s12, s22 12049; GFX90A-NEXT: s_mov_b32 s13, s23 12050; GFX90A-NEXT: s_mov_b32 s14, s16 12051; GFX90A-NEXT: s_mov_b32 s15, s17 12052; GFX90A-NEXT: ;;#ASMSTART 12053; GFX90A-NEXT: ; use s[8:15] 12054; GFX90A-NEXT: ;;#ASMEND 12055; GFX90A-NEXT: s_setpc_b64 s[30:31] 12056; 12057; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2: 12058; GFX940: ; %bb.0: 12059; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12060; GFX940-NEXT: ;;#ASMSTART 12061; GFX940-NEXT: ; def s[12:17] 12062; GFX940-NEXT: ;;#ASMEND 12063; GFX940-NEXT: ;;#ASMSTART 12064; GFX940-NEXT: ; def s[0:5] 12065; GFX940-NEXT: ;;#ASMEND 12066; GFX940-NEXT: s_mov_b32 s8, s16 12067; GFX940-NEXT: s_mov_b32 s9, s17 12068; GFX940-NEXT: s_mov_b32 s10, s16 12069; GFX940-NEXT: s_mov_b32 s11, s17 12070; GFX940-NEXT: s_mov_b32 s12, s14 12071; GFX940-NEXT: s_mov_b32 s13, s15 12072; GFX940-NEXT: s_mov_b32 s14, s4 12073; GFX940-NEXT: s_mov_b32 s15, s5 12074; GFX940-NEXT: ;;#ASMSTART 12075; GFX940-NEXT: ; use s[8:15] 12076; GFX940-NEXT: ;;#ASMEND 12077; GFX940-NEXT: s_setpc_b64 s[30:31] 12078 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12079 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12080 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2> 12081 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12082 ret void 12083} 12084 12085define void @s_shuffle_v4i64_v3i64__u_3_3_3() { 12086; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_3_3_3: 12087; GFX9: ; %bb.0: 12088; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12089; GFX9-NEXT: ;;#ASMSTART 12090; GFX9-NEXT: ; use s[8:15] 12091; GFX9-NEXT: ;;#ASMEND 12092; GFX9-NEXT: s_setpc_b64 s[30:31] 12093 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12094 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3> 12095 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12096 ret void 12097} 12098 12099define void @s_shuffle_v4i64_v3i64__0_3_3_3() { 12100; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3: 12101; GFX900: ; %bb.0: 12102; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12103; GFX900-NEXT: ;;#ASMSTART 12104; GFX900-NEXT: ; def s[8:13] 12105; GFX900-NEXT: ;;#ASMEND 12106; GFX900-NEXT: ;;#ASMSTART 12107; GFX900-NEXT: ; use s[8:15] 12108; GFX900-NEXT: ;;#ASMEND 12109; GFX900-NEXT: s_setpc_b64 s[30:31] 12110; 12111; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3: 12112; GFX90A: ; %bb.0: 12113; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12114; GFX90A-NEXT: ;;#ASMSTART 12115; GFX90A-NEXT: ; def s[8:13] 12116; GFX90A-NEXT: ;;#ASMEND 12117; GFX90A-NEXT: ;;#ASMSTART 12118; GFX90A-NEXT: ; use s[8:15] 12119; GFX90A-NEXT: ;;#ASMEND 12120; GFX90A-NEXT: s_setpc_b64 s[30:31] 12121; 12122; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3: 12123; GFX940: ; %bb.0: 12124; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12125; GFX940-NEXT: ;;#ASMSTART 12126; GFX940-NEXT: ; def s[8:13] 12127; GFX940-NEXT: ;;#ASMEND 12128; GFX940-NEXT: s_nop 0 12129; GFX940-NEXT: ;;#ASMSTART 12130; GFX940-NEXT: ; use s[8:15] 12131; GFX940-NEXT: ;;#ASMEND 12132; GFX940-NEXT: s_setpc_b64 s[30:31] 12133 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12134 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3> 12135 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12136 ret void 12137} 12138 12139define void @s_shuffle_v4i64_v3i64__1_3_3_3() { 12140; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3: 12141; GFX900: ; %bb.0: 12142; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12143; GFX900-NEXT: ;;#ASMSTART 12144; GFX900-NEXT: ; def s[4:9] 12145; GFX900-NEXT: ;;#ASMEND 12146; GFX900-NEXT: s_mov_b32 s8, s6 12147; GFX900-NEXT: s_mov_b32 s9, s7 12148; GFX900-NEXT: ;;#ASMSTART 12149; GFX900-NEXT: ; use s[8:15] 12150; GFX900-NEXT: ;;#ASMEND 12151; GFX900-NEXT: s_setpc_b64 s[30:31] 12152; 12153; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3: 12154; GFX90A: ; %bb.0: 12155; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12156; GFX90A-NEXT: ;;#ASMSTART 12157; GFX90A-NEXT: ; def s[4:9] 12158; GFX90A-NEXT: ;;#ASMEND 12159; GFX90A-NEXT: s_mov_b32 s8, s6 12160; GFX90A-NEXT: s_mov_b32 s9, s7 12161; GFX90A-NEXT: ;;#ASMSTART 12162; GFX90A-NEXT: ; use s[8:15] 12163; GFX90A-NEXT: ;;#ASMEND 12164; GFX90A-NEXT: s_setpc_b64 s[30:31] 12165; 12166; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3: 12167; GFX940: ; %bb.0: 12168; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12169; GFX940-NEXT: ;;#ASMSTART 12170; GFX940-NEXT: ; def s[0:5] 12171; GFX940-NEXT: ;;#ASMEND 12172; GFX940-NEXT: s_mov_b32 s8, s2 12173; GFX940-NEXT: s_mov_b32 s9, s3 12174; GFX940-NEXT: ;;#ASMSTART 12175; GFX940-NEXT: ; use s[8:15] 12176; GFX940-NEXT: ;;#ASMEND 12177; GFX940-NEXT: s_setpc_b64 s[30:31] 12178 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12179 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3> 12180 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12181 ret void 12182} 12183 12184define void @s_shuffle_v4i64_v3i64__2_3_3_3() { 12185; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3: 12186; GFX900: ; %bb.0: 12187; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12188; GFX900-NEXT: ;;#ASMSTART 12189; GFX900-NEXT: ; def s[4:9] 12190; GFX900-NEXT: ;;#ASMEND 12191; GFX900-NEXT: ;;#ASMSTART 12192; GFX900-NEXT: ; use s[8:15] 12193; GFX900-NEXT: ;;#ASMEND 12194; GFX900-NEXT: s_setpc_b64 s[30:31] 12195; 12196; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3: 12197; GFX90A: ; %bb.0: 12198; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12199; GFX90A-NEXT: ;;#ASMSTART 12200; GFX90A-NEXT: ; def s[4:9] 12201; GFX90A-NEXT: ;;#ASMEND 12202; GFX90A-NEXT: ;;#ASMSTART 12203; GFX90A-NEXT: ; use s[8:15] 12204; GFX90A-NEXT: ;;#ASMEND 12205; GFX90A-NEXT: s_setpc_b64 s[30:31] 12206; 12207; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3: 12208; GFX940: ; %bb.0: 12209; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12210; GFX940-NEXT: ;;#ASMSTART 12211; GFX940-NEXT: ; def s[0:5] 12212; GFX940-NEXT: ;;#ASMEND 12213; GFX940-NEXT: s_mov_b32 s8, s4 12214; GFX940-NEXT: s_mov_b32 s9, s5 12215; GFX940-NEXT: ;;#ASMSTART 12216; GFX940-NEXT: ; use s[8:15] 12217; GFX940-NEXT: ;;#ASMEND 12218; GFX940-NEXT: s_setpc_b64 s[30:31] 12219 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12220 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3> 12221 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12222 ret void 12223} 12224 12225define void @s_shuffle_v4i64_v3i64__3_3_3_3() { 12226; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_3_3_3: 12227; GFX9: ; %bb.0: 12228; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12229; GFX9-NEXT: ;;#ASMSTART 12230; GFX9-NEXT: ; use s[8:15] 12231; GFX9-NEXT: ;;#ASMEND 12232; GFX9-NEXT: s_setpc_b64 s[30:31] 12233 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12234 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 12235 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12236 ret void 12237} 12238 12239define void @s_shuffle_v4i64_v3i64__4_3_3_3() { 12240; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3: 12241; GFX900: ; %bb.0: 12242; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12243; GFX900-NEXT: ;;#ASMSTART 12244; GFX900-NEXT: ; def s[4:9] 12245; GFX900-NEXT: ;;#ASMEND 12246; GFX900-NEXT: s_mov_b32 s8, s6 12247; GFX900-NEXT: s_mov_b32 s9, s7 12248; GFX900-NEXT: s_mov_b32 s10, s4 12249; GFX900-NEXT: s_mov_b32 s11, s5 12250; GFX900-NEXT: s_mov_b32 s12, s4 12251; GFX900-NEXT: s_mov_b32 s13, s5 12252; GFX900-NEXT: s_mov_b32 s14, s4 12253; GFX900-NEXT: s_mov_b32 s15, s5 12254; GFX900-NEXT: ;;#ASMSTART 12255; GFX900-NEXT: ; use s[8:15] 12256; GFX900-NEXT: ;;#ASMEND 12257; GFX900-NEXT: s_setpc_b64 s[30:31] 12258; 12259; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3: 12260; GFX90A: ; %bb.0: 12261; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12262; GFX90A-NEXT: ;;#ASMSTART 12263; GFX90A-NEXT: ; def s[4:9] 12264; GFX90A-NEXT: ;;#ASMEND 12265; GFX90A-NEXT: s_mov_b32 s8, s6 12266; GFX90A-NEXT: s_mov_b32 s9, s7 12267; GFX90A-NEXT: s_mov_b32 s10, s4 12268; GFX90A-NEXT: s_mov_b32 s11, s5 12269; GFX90A-NEXT: s_mov_b32 s12, s4 12270; GFX90A-NEXT: s_mov_b32 s13, s5 12271; GFX90A-NEXT: s_mov_b32 s14, s4 12272; GFX90A-NEXT: s_mov_b32 s15, s5 12273; GFX90A-NEXT: ;;#ASMSTART 12274; GFX90A-NEXT: ; use s[8:15] 12275; GFX90A-NEXT: ;;#ASMEND 12276; GFX90A-NEXT: s_setpc_b64 s[30:31] 12277; 12278; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3: 12279; GFX940: ; %bb.0: 12280; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12281; GFX940-NEXT: ;;#ASMSTART 12282; GFX940-NEXT: ; def s[0:5] 12283; GFX940-NEXT: ;;#ASMEND 12284; GFX940-NEXT: s_mov_b32 s8, s2 12285; GFX940-NEXT: s_mov_b32 s9, s3 12286; GFX940-NEXT: s_mov_b32 s10, s0 12287; GFX940-NEXT: s_mov_b32 s11, s1 12288; GFX940-NEXT: s_mov_b32 s12, s0 12289; GFX940-NEXT: s_mov_b32 s13, s1 12290; GFX940-NEXT: s_mov_b32 s14, s0 12291; GFX940-NEXT: s_mov_b32 s15, s1 12292; GFX940-NEXT: ;;#ASMSTART 12293; GFX940-NEXT: ; use s[8:15] 12294; GFX940-NEXT: ;;#ASMEND 12295; GFX940-NEXT: s_setpc_b64 s[30:31] 12296 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12297 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12298 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3> 12299 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12300 ret void 12301} 12302 12303define void @s_shuffle_v4i64_v3i64__5_3_3_3() { 12304; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3: 12305; GFX900: ; %bb.0: 12306; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12307; GFX900-NEXT: ;;#ASMSTART 12308; GFX900-NEXT: ; def s[4:9] 12309; GFX900-NEXT: ;;#ASMEND 12310; GFX900-NEXT: s_mov_b32 s10, s4 12311; GFX900-NEXT: s_mov_b32 s11, s5 12312; GFX900-NEXT: s_mov_b32 s12, s4 12313; GFX900-NEXT: s_mov_b32 s13, s5 12314; GFX900-NEXT: s_mov_b32 s14, s4 12315; GFX900-NEXT: s_mov_b32 s15, s5 12316; GFX900-NEXT: ;;#ASMSTART 12317; GFX900-NEXT: ; use s[8:15] 12318; GFX900-NEXT: ;;#ASMEND 12319; GFX900-NEXT: s_setpc_b64 s[30:31] 12320; 12321; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3: 12322; GFX90A: ; %bb.0: 12323; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12324; GFX90A-NEXT: ;;#ASMSTART 12325; GFX90A-NEXT: ; def s[4:9] 12326; GFX90A-NEXT: ;;#ASMEND 12327; GFX90A-NEXT: s_mov_b32 s10, s4 12328; GFX90A-NEXT: s_mov_b32 s11, s5 12329; GFX90A-NEXT: s_mov_b32 s12, s4 12330; GFX90A-NEXT: s_mov_b32 s13, s5 12331; GFX90A-NEXT: s_mov_b32 s14, s4 12332; GFX90A-NEXT: s_mov_b32 s15, s5 12333; GFX90A-NEXT: ;;#ASMSTART 12334; GFX90A-NEXT: ; use s[8:15] 12335; GFX90A-NEXT: ;;#ASMEND 12336; GFX90A-NEXT: s_setpc_b64 s[30:31] 12337; 12338; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3: 12339; GFX940: ; %bb.0: 12340; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12341; GFX940-NEXT: ;;#ASMSTART 12342; GFX940-NEXT: ; def s[0:5] 12343; GFX940-NEXT: ;;#ASMEND 12344; GFX940-NEXT: s_mov_b32 s8, s4 12345; GFX940-NEXT: s_mov_b32 s9, s5 12346; GFX940-NEXT: s_mov_b32 s10, s0 12347; GFX940-NEXT: s_mov_b32 s11, s1 12348; GFX940-NEXT: s_mov_b32 s12, s0 12349; GFX940-NEXT: s_mov_b32 s13, s1 12350; GFX940-NEXT: s_mov_b32 s14, s0 12351; GFX940-NEXT: s_mov_b32 s15, s1 12352; GFX940-NEXT: ;;#ASMSTART 12353; GFX940-NEXT: ; use s[8:15] 12354; GFX940-NEXT: ;;#ASMEND 12355; GFX940-NEXT: s_setpc_b64 s[30:31] 12356 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12357 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12358 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3> 12359 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12360 ret void 12361} 12362 12363define void @s_shuffle_v4i64_v3i64__5_u_3_3() { 12364; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3: 12365; GFX900: ; %bb.0: 12366; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12367; GFX900-NEXT: ;;#ASMSTART 12368; GFX900-NEXT: ; def s[4:9] 12369; GFX900-NEXT: ;;#ASMEND 12370; GFX900-NEXT: s_mov_b32 s12, s4 12371; GFX900-NEXT: s_mov_b32 s13, s5 12372; GFX900-NEXT: s_mov_b32 s14, s4 12373; GFX900-NEXT: s_mov_b32 s15, s5 12374; GFX900-NEXT: ;;#ASMSTART 12375; GFX900-NEXT: ; use s[8:15] 12376; GFX900-NEXT: ;;#ASMEND 12377; GFX900-NEXT: s_setpc_b64 s[30:31] 12378; 12379; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3: 12380; GFX90A: ; %bb.0: 12381; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12382; GFX90A-NEXT: ;;#ASMSTART 12383; GFX90A-NEXT: ; def s[4:9] 12384; GFX90A-NEXT: ;;#ASMEND 12385; GFX90A-NEXT: s_mov_b32 s12, s4 12386; GFX90A-NEXT: s_mov_b32 s13, s5 12387; GFX90A-NEXT: s_mov_b32 s14, s4 12388; GFX90A-NEXT: s_mov_b32 s15, s5 12389; GFX90A-NEXT: ;;#ASMSTART 12390; GFX90A-NEXT: ; use s[8:15] 12391; GFX90A-NEXT: ;;#ASMEND 12392; GFX90A-NEXT: s_setpc_b64 s[30:31] 12393; 12394; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3: 12395; GFX940: ; %bb.0: 12396; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12397; GFX940-NEXT: ;;#ASMSTART 12398; GFX940-NEXT: ; def s[0:5] 12399; GFX940-NEXT: ;;#ASMEND 12400; GFX940-NEXT: s_mov_b32 s8, s4 12401; GFX940-NEXT: s_mov_b32 s9, s5 12402; GFX940-NEXT: s_mov_b32 s12, s0 12403; GFX940-NEXT: s_mov_b32 s13, s1 12404; GFX940-NEXT: s_mov_b32 s14, s0 12405; GFX940-NEXT: s_mov_b32 s15, s1 12406; GFX940-NEXT: ;;#ASMSTART 12407; GFX940-NEXT: ; use s[8:15] 12408; GFX940-NEXT: ;;#ASMEND 12409; GFX940-NEXT: s_setpc_b64 s[30:31] 12410 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12411 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12412 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3> 12413 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12414 ret void 12415} 12416 12417define void @s_shuffle_v4i64_v3i64__5_0_3_3() { 12418; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3: 12419; GFX900: ; %bb.0: 12420; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12421; GFX900-NEXT: ;;#ASMSTART 12422; GFX900-NEXT: ; def s[4:9] 12423; GFX900-NEXT: ;;#ASMEND 12424; GFX900-NEXT: ;;#ASMSTART 12425; GFX900-NEXT: ; def s[16:21] 12426; GFX900-NEXT: ;;#ASMEND 12427; GFX900-NEXT: s_mov_b32 s8, s20 12428; GFX900-NEXT: s_mov_b32 s9, s21 12429; GFX900-NEXT: s_mov_b32 s10, s4 12430; GFX900-NEXT: s_mov_b32 s11, s5 12431; GFX900-NEXT: s_mov_b32 s12, s16 12432; GFX900-NEXT: s_mov_b32 s13, s17 12433; GFX900-NEXT: s_mov_b32 s14, s16 12434; GFX900-NEXT: s_mov_b32 s15, s17 12435; GFX900-NEXT: ;;#ASMSTART 12436; GFX900-NEXT: ; use s[8:15] 12437; GFX900-NEXT: ;;#ASMEND 12438; GFX900-NEXT: s_setpc_b64 s[30:31] 12439; 12440; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3: 12441; GFX90A: ; %bb.0: 12442; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12443; GFX90A-NEXT: ;;#ASMSTART 12444; GFX90A-NEXT: ; def s[4:9] 12445; GFX90A-NEXT: ;;#ASMEND 12446; GFX90A-NEXT: ;;#ASMSTART 12447; GFX90A-NEXT: ; def s[16:21] 12448; GFX90A-NEXT: ;;#ASMEND 12449; GFX90A-NEXT: s_mov_b32 s8, s20 12450; GFX90A-NEXT: s_mov_b32 s9, s21 12451; GFX90A-NEXT: s_mov_b32 s10, s4 12452; GFX90A-NEXT: s_mov_b32 s11, s5 12453; GFX90A-NEXT: s_mov_b32 s12, s16 12454; GFX90A-NEXT: s_mov_b32 s13, s17 12455; GFX90A-NEXT: s_mov_b32 s14, s16 12456; GFX90A-NEXT: s_mov_b32 s15, s17 12457; GFX90A-NEXT: ;;#ASMSTART 12458; GFX90A-NEXT: ; use s[8:15] 12459; GFX90A-NEXT: ;;#ASMEND 12460; GFX90A-NEXT: s_setpc_b64 s[30:31] 12461; 12462; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3: 12463; GFX940: ; %bb.0: 12464; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12465; GFX940-NEXT: ;;#ASMSTART 12466; GFX940-NEXT: ; def s[0:5] 12467; GFX940-NEXT: ;;#ASMEND 12468; GFX940-NEXT: s_mov_b32 s10, s0 12469; GFX940-NEXT: ;;#ASMSTART 12470; GFX940-NEXT: ; def s[4:9] 12471; GFX940-NEXT: ;;#ASMEND 12472; GFX940-NEXT: s_mov_b32 s11, s1 12473; GFX940-NEXT: s_mov_b32 s12, s4 12474; GFX940-NEXT: s_mov_b32 s13, s5 12475; GFX940-NEXT: s_mov_b32 s14, s4 12476; GFX940-NEXT: s_mov_b32 s15, s5 12477; GFX940-NEXT: ;;#ASMSTART 12478; GFX940-NEXT: ; use s[8:15] 12479; GFX940-NEXT: ;;#ASMEND 12480; GFX940-NEXT: s_setpc_b64 s[30:31] 12481 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12482 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12483 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3> 12484 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12485 ret void 12486} 12487 12488define void @s_shuffle_v4i64_v3i64__5_1_3_3() { 12489; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3: 12490; GFX900: ; %bb.0: 12491; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12492; GFX900-NEXT: ;;#ASMSTART 12493; GFX900-NEXT: ; def s[8:13] 12494; GFX900-NEXT: ;;#ASMEND 12495; GFX900-NEXT: ;;#ASMSTART 12496; GFX900-NEXT: ; def s[4:9] 12497; GFX900-NEXT: ;;#ASMEND 12498; GFX900-NEXT: s_mov_b32 s12, s4 12499; GFX900-NEXT: s_mov_b32 s13, s5 12500; GFX900-NEXT: s_mov_b32 s14, s4 12501; GFX900-NEXT: s_mov_b32 s15, s5 12502; GFX900-NEXT: ;;#ASMSTART 12503; GFX900-NEXT: ; use s[8:15] 12504; GFX900-NEXT: ;;#ASMEND 12505; GFX900-NEXT: s_setpc_b64 s[30:31] 12506; 12507; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3: 12508; GFX90A: ; %bb.0: 12509; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12510; GFX90A-NEXT: ;;#ASMSTART 12511; GFX90A-NEXT: ; def s[8:13] 12512; GFX90A-NEXT: ;;#ASMEND 12513; GFX90A-NEXT: ;;#ASMSTART 12514; GFX90A-NEXT: ; def s[4:9] 12515; GFX90A-NEXT: ;;#ASMEND 12516; GFX90A-NEXT: s_mov_b32 s12, s4 12517; GFX90A-NEXT: s_mov_b32 s13, s5 12518; GFX90A-NEXT: s_mov_b32 s14, s4 12519; GFX90A-NEXT: s_mov_b32 s15, s5 12520; GFX90A-NEXT: ;;#ASMSTART 12521; GFX90A-NEXT: ; use s[8:15] 12522; GFX90A-NEXT: ;;#ASMEND 12523; GFX90A-NEXT: s_setpc_b64 s[30:31] 12524; 12525; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3: 12526; GFX940: ; %bb.0: 12527; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12528; GFX940-NEXT: ;;#ASMSTART 12529; GFX940-NEXT: ; def s[8:13] 12530; GFX940-NEXT: ;;#ASMEND 12531; GFX940-NEXT: ;;#ASMSTART 12532; GFX940-NEXT: ; def s[0:5] 12533; GFX940-NEXT: ;;#ASMEND 12534; GFX940-NEXT: s_mov_b32 s8, s4 12535; GFX940-NEXT: s_mov_b32 s9, s5 12536; GFX940-NEXT: s_mov_b32 s12, s0 12537; GFX940-NEXT: s_mov_b32 s13, s1 12538; GFX940-NEXT: s_mov_b32 s14, s0 12539; GFX940-NEXT: s_mov_b32 s15, s1 12540; GFX940-NEXT: ;;#ASMSTART 12541; GFX940-NEXT: ; use s[8:15] 12542; GFX940-NEXT: ;;#ASMEND 12543; GFX940-NEXT: s_setpc_b64 s[30:31] 12544 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12545 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12546 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3> 12547 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12548 ret void 12549} 12550 12551define void @s_shuffle_v4i64_v3i64__5_2_3_3() { 12552; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3: 12553; GFX900: ; %bb.0: 12554; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12555; GFX900-NEXT: ;;#ASMSTART 12556; GFX900-NEXT: ; def s[8:13] 12557; GFX900-NEXT: ;;#ASMEND 12558; GFX900-NEXT: ;;#ASMSTART 12559; GFX900-NEXT: ; def s[4:9] 12560; GFX900-NEXT: ;;#ASMEND 12561; GFX900-NEXT: s_mov_b32 s10, s12 12562; GFX900-NEXT: s_mov_b32 s11, s13 12563; GFX900-NEXT: s_mov_b32 s12, s4 12564; GFX900-NEXT: s_mov_b32 s13, s5 12565; GFX900-NEXT: s_mov_b32 s14, s4 12566; GFX900-NEXT: s_mov_b32 s15, s5 12567; GFX900-NEXT: ;;#ASMSTART 12568; GFX900-NEXT: ; use s[8:15] 12569; GFX900-NEXT: ;;#ASMEND 12570; GFX900-NEXT: s_setpc_b64 s[30:31] 12571; 12572; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3: 12573; GFX90A: ; %bb.0: 12574; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12575; GFX90A-NEXT: ;;#ASMSTART 12576; GFX90A-NEXT: ; def s[8:13] 12577; GFX90A-NEXT: ;;#ASMEND 12578; GFX90A-NEXT: ;;#ASMSTART 12579; GFX90A-NEXT: ; def s[4:9] 12580; GFX90A-NEXT: ;;#ASMEND 12581; GFX90A-NEXT: s_mov_b32 s10, s12 12582; GFX90A-NEXT: s_mov_b32 s11, s13 12583; GFX90A-NEXT: s_mov_b32 s12, s4 12584; GFX90A-NEXT: s_mov_b32 s13, s5 12585; GFX90A-NEXT: s_mov_b32 s14, s4 12586; GFX90A-NEXT: s_mov_b32 s15, s5 12587; GFX90A-NEXT: ;;#ASMSTART 12588; GFX90A-NEXT: ; use s[8:15] 12589; GFX90A-NEXT: ;;#ASMEND 12590; GFX90A-NEXT: s_setpc_b64 s[30:31] 12591; 12592; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3: 12593; GFX940: ; %bb.0: 12594; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12595; GFX940-NEXT: ;;#ASMSTART 12596; GFX940-NEXT: ; def s[0:5] 12597; GFX940-NEXT: ;;#ASMEND 12598; GFX940-NEXT: ;;#ASMSTART 12599; GFX940-NEXT: ; def s[16:21] 12600; GFX940-NEXT: ;;#ASMEND 12601; GFX940-NEXT: s_mov_b32 s8, s20 12602; GFX940-NEXT: s_mov_b32 s9, s21 12603; GFX940-NEXT: s_mov_b32 s10, s4 12604; GFX940-NEXT: s_mov_b32 s11, s5 12605; GFX940-NEXT: s_mov_b32 s12, s16 12606; GFX940-NEXT: s_mov_b32 s13, s17 12607; GFX940-NEXT: s_mov_b32 s14, s16 12608; GFX940-NEXT: s_mov_b32 s15, s17 12609; GFX940-NEXT: ;;#ASMSTART 12610; GFX940-NEXT: ; use s[8:15] 12611; GFX940-NEXT: ;;#ASMEND 12612; GFX940-NEXT: s_setpc_b64 s[30:31] 12613 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12614 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12615 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3> 12616 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12617 ret void 12618} 12619 12620define void @s_shuffle_v4i64_v3i64__5_4_3_3() { 12621; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3: 12622; GFX900: ; %bb.0: 12623; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12624; GFX900-NEXT: ;;#ASMSTART 12625; GFX900-NEXT: ; def s[4:9] 12626; GFX900-NEXT: ;;#ASMEND 12627; GFX900-NEXT: s_mov_b32 s10, s6 12628; GFX900-NEXT: s_mov_b32 s11, s7 12629; GFX900-NEXT: s_mov_b32 s12, s4 12630; GFX900-NEXT: s_mov_b32 s13, s5 12631; GFX900-NEXT: s_mov_b32 s14, s4 12632; GFX900-NEXT: s_mov_b32 s15, s5 12633; GFX900-NEXT: ;;#ASMSTART 12634; GFX900-NEXT: ; use s[8:15] 12635; GFX900-NEXT: ;;#ASMEND 12636; GFX900-NEXT: s_setpc_b64 s[30:31] 12637; 12638; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3: 12639; GFX90A: ; %bb.0: 12640; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12641; GFX90A-NEXT: ;;#ASMSTART 12642; GFX90A-NEXT: ; def s[4:9] 12643; GFX90A-NEXT: ;;#ASMEND 12644; GFX90A-NEXT: s_mov_b32 s10, s6 12645; GFX90A-NEXT: s_mov_b32 s11, s7 12646; GFX90A-NEXT: s_mov_b32 s12, s4 12647; GFX90A-NEXT: s_mov_b32 s13, s5 12648; GFX90A-NEXT: s_mov_b32 s14, s4 12649; GFX90A-NEXT: s_mov_b32 s15, s5 12650; GFX90A-NEXT: ;;#ASMSTART 12651; GFX90A-NEXT: ; use s[8:15] 12652; GFX90A-NEXT: ;;#ASMEND 12653; GFX90A-NEXT: s_setpc_b64 s[30:31] 12654; 12655; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3: 12656; GFX940: ; %bb.0: 12657; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12658; GFX940-NEXT: ;;#ASMSTART 12659; GFX940-NEXT: ; def s[0:5] 12660; GFX940-NEXT: ;;#ASMEND 12661; GFX940-NEXT: s_mov_b32 s8, s4 12662; GFX940-NEXT: s_mov_b32 s9, s5 12663; GFX940-NEXT: s_mov_b32 s10, s2 12664; GFX940-NEXT: s_mov_b32 s11, s3 12665; GFX940-NEXT: s_mov_b32 s12, s0 12666; GFX940-NEXT: s_mov_b32 s13, s1 12667; GFX940-NEXT: s_mov_b32 s14, s0 12668; GFX940-NEXT: s_mov_b32 s15, s1 12669; GFX940-NEXT: ;;#ASMSTART 12670; GFX940-NEXT: ; use s[8:15] 12671; GFX940-NEXT: ;;#ASMEND 12672; GFX940-NEXT: s_setpc_b64 s[30:31] 12673 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12674 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12675 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3> 12676 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12677 ret void 12678} 12679 12680define void @s_shuffle_v4i64_v3i64__5_5_3_3() { 12681; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3: 12682; GFX900: ; %bb.0: 12683; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12684; GFX900-NEXT: ;;#ASMSTART 12685; GFX900-NEXT: ; def s[16:21] 12686; GFX900-NEXT: ;;#ASMEND 12687; GFX900-NEXT: s_mov_b32 s8, s20 12688; GFX900-NEXT: s_mov_b32 s9, s21 12689; GFX900-NEXT: s_mov_b32 s10, s20 12690; GFX900-NEXT: s_mov_b32 s11, s21 12691; GFX900-NEXT: s_mov_b32 s12, s16 12692; GFX900-NEXT: s_mov_b32 s13, s17 12693; GFX900-NEXT: s_mov_b32 s14, s16 12694; GFX900-NEXT: s_mov_b32 s15, s17 12695; GFX900-NEXT: ;;#ASMSTART 12696; GFX900-NEXT: ; use s[8:15] 12697; GFX900-NEXT: ;;#ASMEND 12698; GFX900-NEXT: s_setpc_b64 s[30:31] 12699; 12700; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3: 12701; GFX90A: ; %bb.0: 12702; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12703; GFX90A-NEXT: ;;#ASMSTART 12704; GFX90A-NEXT: ; def s[16:21] 12705; GFX90A-NEXT: ;;#ASMEND 12706; GFX90A-NEXT: s_mov_b32 s8, s20 12707; GFX90A-NEXT: s_mov_b32 s9, s21 12708; GFX90A-NEXT: s_mov_b32 s10, s20 12709; GFX90A-NEXT: s_mov_b32 s11, s21 12710; GFX90A-NEXT: s_mov_b32 s12, s16 12711; GFX90A-NEXT: s_mov_b32 s13, s17 12712; GFX90A-NEXT: s_mov_b32 s14, s16 12713; GFX90A-NEXT: s_mov_b32 s15, s17 12714; GFX90A-NEXT: ;;#ASMSTART 12715; GFX90A-NEXT: ; use s[8:15] 12716; GFX90A-NEXT: ;;#ASMEND 12717; GFX90A-NEXT: s_setpc_b64 s[30:31] 12718; 12719; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3: 12720; GFX940: ; %bb.0: 12721; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12722; GFX940-NEXT: ;;#ASMSTART 12723; GFX940-NEXT: ; def s[0:5] 12724; GFX940-NEXT: ;;#ASMEND 12725; GFX940-NEXT: s_mov_b32 s8, s4 12726; GFX940-NEXT: s_mov_b32 s9, s5 12727; GFX940-NEXT: s_mov_b32 s10, s4 12728; GFX940-NEXT: s_mov_b32 s11, s5 12729; GFX940-NEXT: s_mov_b32 s12, s0 12730; GFX940-NEXT: s_mov_b32 s13, s1 12731; GFX940-NEXT: s_mov_b32 s14, s0 12732; GFX940-NEXT: s_mov_b32 s15, s1 12733; GFX940-NEXT: ;;#ASMSTART 12734; GFX940-NEXT: ; use s[8:15] 12735; GFX940-NEXT: ;;#ASMEND 12736; GFX940-NEXT: s_setpc_b64 s[30:31] 12737 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12738 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12739 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3> 12740 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12741 ret void 12742} 12743 12744define void @s_shuffle_v4i64_v3i64__5_5_u_3() { 12745; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3: 12746; GFX900: ; %bb.0: 12747; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12748; GFX900-NEXT: ;;#ASMSTART 12749; GFX900-NEXT: ; def s[12:17] 12750; GFX900-NEXT: ;;#ASMEND 12751; GFX900-NEXT: s_mov_b32 s8, s16 12752; GFX900-NEXT: s_mov_b32 s9, s17 12753; GFX900-NEXT: s_mov_b32 s10, s16 12754; GFX900-NEXT: s_mov_b32 s11, s17 12755; GFX900-NEXT: s_mov_b32 s14, s12 12756; GFX900-NEXT: s_mov_b32 s15, s13 12757; GFX900-NEXT: ;;#ASMSTART 12758; GFX900-NEXT: ; use s[8:15] 12759; GFX900-NEXT: ;;#ASMEND 12760; GFX900-NEXT: s_setpc_b64 s[30:31] 12761; 12762; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3: 12763; GFX90A: ; %bb.0: 12764; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12765; GFX90A-NEXT: ;;#ASMSTART 12766; GFX90A-NEXT: ; def s[12:17] 12767; GFX90A-NEXT: ;;#ASMEND 12768; GFX90A-NEXT: s_mov_b32 s8, s16 12769; GFX90A-NEXT: s_mov_b32 s9, s17 12770; GFX90A-NEXT: s_mov_b32 s10, s16 12771; GFX90A-NEXT: s_mov_b32 s11, s17 12772; GFX90A-NEXT: s_mov_b32 s14, s12 12773; GFX90A-NEXT: s_mov_b32 s15, s13 12774; GFX90A-NEXT: ;;#ASMSTART 12775; GFX90A-NEXT: ; use s[8:15] 12776; GFX90A-NEXT: ;;#ASMEND 12777; GFX90A-NEXT: s_setpc_b64 s[30:31] 12778; 12779; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3: 12780; GFX940: ; %bb.0: 12781; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12782; GFX940-NEXT: ;;#ASMSTART 12783; GFX940-NEXT: ; def s[0:5] 12784; GFX940-NEXT: ;;#ASMEND 12785; GFX940-NEXT: s_mov_b32 s8, s4 12786; GFX940-NEXT: s_mov_b32 s9, s5 12787; GFX940-NEXT: s_mov_b32 s10, s4 12788; GFX940-NEXT: s_mov_b32 s11, s5 12789; GFX940-NEXT: s_mov_b32 s14, s0 12790; GFX940-NEXT: s_mov_b32 s15, s1 12791; GFX940-NEXT: ;;#ASMSTART 12792; GFX940-NEXT: ; use s[8:15] 12793; GFX940-NEXT: ;;#ASMEND 12794; GFX940-NEXT: s_setpc_b64 s[30:31] 12795 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12796 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12797 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3> 12798 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12799 ret void 12800} 12801 12802define void @s_shuffle_v4i64_v3i64__5_5_0_3() { 12803; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3: 12804; GFX900: ; %bb.0: 12805; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12806; GFX900-NEXT: ;;#ASMSTART 12807; GFX900-NEXT: ; def s[4:9] 12808; GFX900-NEXT: ;;#ASMEND 12809; GFX900-NEXT: ;;#ASMSTART 12810; GFX900-NEXT: ; def s[16:21] 12811; GFX900-NEXT: ;;#ASMEND 12812; GFX900-NEXT: s_mov_b32 s8, s20 12813; GFX900-NEXT: s_mov_b32 s9, s21 12814; GFX900-NEXT: s_mov_b32 s10, s20 12815; GFX900-NEXT: s_mov_b32 s11, s21 12816; GFX900-NEXT: s_mov_b32 s12, s4 12817; GFX900-NEXT: s_mov_b32 s13, s5 12818; GFX900-NEXT: s_mov_b32 s14, s16 12819; GFX900-NEXT: s_mov_b32 s15, s17 12820; GFX900-NEXT: ;;#ASMSTART 12821; GFX900-NEXT: ; use s[8:15] 12822; GFX900-NEXT: ;;#ASMEND 12823; GFX900-NEXT: s_setpc_b64 s[30:31] 12824; 12825; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3: 12826; GFX90A: ; %bb.0: 12827; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12828; GFX90A-NEXT: ;;#ASMSTART 12829; GFX90A-NEXT: ; def s[4:9] 12830; GFX90A-NEXT: ;;#ASMEND 12831; GFX90A-NEXT: ;;#ASMSTART 12832; GFX90A-NEXT: ; def s[16:21] 12833; GFX90A-NEXT: ;;#ASMEND 12834; GFX90A-NEXT: s_mov_b32 s8, s20 12835; GFX90A-NEXT: s_mov_b32 s9, s21 12836; GFX90A-NEXT: s_mov_b32 s10, s20 12837; GFX90A-NEXT: s_mov_b32 s11, s21 12838; GFX90A-NEXT: s_mov_b32 s12, s4 12839; GFX90A-NEXT: s_mov_b32 s13, s5 12840; GFX90A-NEXT: s_mov_b32 s14, s16 12841; GFX90A-NEXT: s_mov_b32 s15, s17 12842; GFX90A-NEXT: ;;#ASMSTART 12843; GFX90A-NEXT: ; use s[8:15] 12844; GFX90A-NEXT: ;;#ASMEND 12845; GFX90A-NEXT: s_setpc_b64 s[30:31] 12846; 12847; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3: 12848; GFX940: ; %bb.0: 12849; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12850; GFX940-NEXT: ;;#ASMSTART 12851; GFX940-NEXT: ; def s[0:5] 12852; GFX940-NEXT: ;;#ASMEND 12853; GFX940-NEXT: ;;#ASMSTART 12854; GFX940-NEXT: ; def s[16:21] 12855; GFX940-NEXT: ;;#ASMEND 12856; GFX940-NEXT: s_mov_b32 s8, s20 12857; GFX940-NEXT: s_mov_b32 s9, s21 12858; GFX940-NEXT: s_mov_b32 s10, s20 12859; GFX940-NEXT: s_mov_b32 s11, s21 12860; GFX940-NEXT: s_mov_b32 s12, s0 12861; GFX940-NEXT: s_mov_b32 s13, s1 12862; GFX940-NEXT: s_mov_b32 s14, s16 12863; GFX940-NEXT: s_mov_b32 s15, s17 12864; GFX940-NEXT: ;;#ASMSTART 12865; GFX940-NEXT: ; use s[8:15] 12866; GFX940-NEXT: ;;#ASMEND 12867; GFX940-NEXT: s_setpc_b64 s[30:31] 12868 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12869 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12870 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3> 12871 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12872 ret void 12873} 12874 12875define void @s_shuffle_v4i64_v3i64__5_5_1_3() { 12876; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3: 12877; GFX900: ; %bb.0: 12878; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12879; GFX900-NEXT: ;;#ASMSTART 12880; GFX900-NEXT: ; def s[4:9] 12881; GFX900-NEXT: ;;#ASMEND 12882; GFX900-NEXT: ;;#ASMSTART 12883; GFX900-NEXT: ; def s[16:21] 12884; GFX900-NEXT: ;;#ASMEND 12885; GFX900-NEXT: s_mov_b32 s8, s20 12886; GFX900-NEXT: s_mov_b32 s9, s21 12887; GFX900-NEXT: s_mov_b32 s10, s20 12888; GFX900-NEXT: s_mov_b32 s11, s21 12889; GFX900-NEXT: s_mov_b32 s12, s6 12890; GFX900-NEXT: s_mov_b32 s13, s7 12891; GFX900-NEXT: s_mov_b32 s14, s16 12892; GFX900-NEXT: s_mov_b32 s15, s17 12893; GFX900-NEXT: ;;#ASMSTART 12894; GFX900-NEXT: ; use s[8:15] 12895; GFX900-NEXT: ;;#ASMEND 12896; GFX900-NEXT: s_setpc_b64 s[30:31] 12897; 12898; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3: 12899; GFX90A: ; %bb.0: 12900; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12901; GFX90A-NEXT: ;;#ASMSTART 12902; GFX90A-NEXT: ; def s[4:9] 12903; GFX90A-NEXT: ;;#ASMEND 12904; GFX90A-NEXT: ;;#ASMSTART 12905; GFX90A-NEXT: ; def s[16:21] 12906; GFX90A-NEXT: ;;#ASMEND 12907; GFX90A-NEXT: s_mov_b32 s8, s20 12908; GFX90A-NEXT: s_mov_b32 s9, s21 12909; GFX90A-NEXT: s_mov_b32 s10, s20 12910; GFX90A-NEXT: s_mov_b32 s11, s21 12911; GFX90A-NEXT: s_mov_b32 s12, s6 12912; GFX90A-NEXT: s_mov_b32 s13, s7 12913; GFX90A-NEXT: s_mov_b32 s14, s16 12914; GFX90A-NEXT: s_mov_b32 s15, s17 12915; GFX90A-NEXT: ;;#ASMSTART 12916; GFX90A-NEXT: ; use s[8:15] 12917; GFX90A-NEXT: ;;#ASMEND 12918; GFX90A-NEXT: s_setpc_b64 s[30:31] 12919; 12920; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3: 12921; GFX940: ; %bb.0: 12922; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12923; GFX940-NEXT: ;;#ASMSTART 12924; GFX940-NEXT: ; def s[0:5] 12925; GFX940-NEXT: ;;#ASMEND 12926; GFX940-NEXT: ;;#ASMSTART 12927; GFX940-NEXT: ; def s[16:21] 12928; GFX940-NEXT: ;;#ASMEND 12929; GFX940-NEXT: s_mov_b32 s8, s20 12930; GFX940-NEXT: s_mov_b32 s9, s21 12931; GFX940-NEXT: s_mov_b32 s10, s20 12932; GFX940-NEXT: s_mov_b32 s11, s21 12933; GFX940-NEXT: s_mov_b32 s12, s2 12934; GFX940-NEXT: s_mov_b32 s13, s3 12935; GFX940-NEXT: s_mov_b32 s14, s16 12936; GFX940-NEXT: s_mov_b32 s15, s17 12937; GFX940-NEXT: ;;#ASMSTART 12938; GFX940-NEXT: ; use s[8:15] 12939; GFX940-NEXT: ;;#ASMEND 12940; GFX940-NEXT: s_setpc_b64 s[30:31] 12941 %vec0 = call <3 x i64> asm "; def $0", "=s"() 12942 %vec1 = call <3 x i64> asm "; def $0", "=s"() 12943 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3> 12944 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 12945 ret void 12946} 12947 12948define void @s_shuffle_v4i64_v3i64__5_5_2_3() { 12949; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3: 12950; GFX900: ; %bb.0: 12951; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12952; GFX900-NEXT: ;;#ASMSTART 12953; GFX900-NEXT: ; def s[8:13] 12954; GFX900-NEXT: ;;#ASMEND 12955; GFX900-NEXT: ;;#ASMSTART 12956; GFX900-NEXT: ; def s[16:21] 12957; GFX900-NEXT: ;;#ASMEND 12958; GFX900-NEXT: s_mov_b32 s8, s20 12959; GFX900-NEXT: s_mov_b32 s9, s21 12960; GFX900-NEXT: s_mov_b32 s10, s20 12961; GFX900-NEXT: s_mov_b32 s11, s21 12962; GFX900-NEXT: s_mov_b32 s14, s16 12963; GFX900-NEXT: s_mov_b32 s15, s17 12964; GFX900-NEXT: ;;#ASMSTART 12965; GFX900-NEXT: ; use s[8:15] 12966; GFX900-NEXT: ;;#ASMEND 12967; GFX900-NEXT: s_setpc_b64 s[30:31] 12968; 12969; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3: 12970; GFX90A: ; %bb.0: 12971; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12972; GFX90A-NEXT: ;;#ASMSTART 12973; GFX90A-NEXT: ; def s[8:13] 12974; GFX90A-NEXT: ;;#ASMEND 12975; GFX90A-NEXT: ;;#ASMSTART 12976; GFX90A-NEXT: ; def s[16:21] 12977; GFX90A-NEXT: ;;#ASMEND 12978; GFX90A-NEXT: s_mov_b32 s8, s20 12979; GFX90A-NEXT: s_mov_b32 s9, s21 12980; GFX90A-NEXT: s_mov_b32 s10, s20 12981; GFX90A-NEXT: s_mov_b32 s11, s21 12982; GFX90A-NEXT: s_mov_b32 s14, s16 12983; GFX90A-NEXT: s_mov_b32 s15, s17 12984; GFX90A-NEXT: ;;#ASMSTART 12985; GFX90A-NEXT: ; use s[8:15] 12986; GFX90A-NEXT: ;;#ASMEND 12987; GFX90A-NEXT: s_setpc_b64 s[30:31] 12988; 12989; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3: 12990; GFX940: ; %bb.0: 12991; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12992; GFX940-NEXT: ;;#ASMSTART 12993; GFX940-NEXT: ; def s[8:13] 12994; GFX940-NEXT: ;;#ASMEND 12995; GFX940-NEXT: ;;#ASMSTART 12996; GFX940-NEXT: ; def s[0:5] 12997; GFX940-NEXT: ;;#ASMEND 12998; GFX940-NEXT: s_mov_b32 s8, s4 12999; GFX940-NEXT: s_mov_b32 s9, s5 13000; GFX940-NEXT: s_mov_b32 s10, s4 13001; GFX940-NEXT: s_mov_b32 s11, s5 13002; GFX940-NEXT: s_mov_b32 s14, s0 13003; GFX940-NEXT: s_mov_b32 s15, s1 13004; GFX940-NEXT: ;;#ASMSTART 13005; GFX940-NEXT: ; use s[8:15] 13006; GFX940-NEXT: ;;#ASMEND 13007; GFX940-NEXT: s_setpc_b64 s[30:31] 13008 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13009 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13010 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3> 13011 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13012 ret void 13013} 13014 13015define void @s_shuffle_v4i64_v3i64__5_5_4_3() { 13016; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3: 13017; GFX900: ; %bb.0: 13018; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13019; GFX900-NEXT: ;;#ASMSTART 13020; GFX900-NEXT: ; def s[16:21] 13021; GFX900-NEXT: ;;#ASMEND 13022; GFX900-NEXT: s_mov_b32 s8, s20 13023; GFX900-NEXT: s_mov_b32 s9, s21 13024; GFX900-NEXT: s_mov_b32 s10, s20 13025; GFX900-NEXT: s_mov_b32 s11, s21 13026; GFX900-NEXT: s_mov_b32 s12, s18 13027; GFX900-NEXT: s_mov_b32 s13, s19 13028; GFX900-NEXT: s_mov_b32 s14, s16 13029; GFX900-NEXT: s_mov_b32 s15, s17 13030; GFX900-NEXT: ;;#ASMSTART 13031; GFX900-NEXT: ; use s[8:15] 13032; GFX900-NEXT: ;;#ASMEND 13033; GFX900-NEXT: s_setpc_b64 s[30:31] 13034; 13035; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3: 13036; GFX90A: ; %bb.0: 13037; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13038; GFX90A-NEXT: ;;#ASMSTART 13039; GFX90A-NEXT: ; def s[16:21] 13040; GFX90A-NEXT: ;;#ASMEND 13041; GFX90A-NEXT: s_mov_b32 s8, s20 13042; GFX90A-NEXT: s_mov_b32 s9, s21 13043; GFX90A-NEXT: s_mov_b32 s10, s20 13044; GFX90A-NEXT: s_mov_b32 s11, s21 13045; GFX90A-NEXT: s_mov_b32 s12, s18 13046; GFX90A-NEXT: s_mov_b32 s13, s19 13047; GFX90A-NEXT: s_mov_b32 s14, s16 13048; GFX90A-NEXT: s_mov_b32 s15, s17 13049; GFX90A-NEXT: ;;#ASMSTART 13050; GFX90A-NEXT: ; use s[8:15] 13051; GFX90A-NEXT: ;;#ASMEND 13052; GFX90A-NEXT: s_setpc_b64 s[30:31] 13053; 13054; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3: 13055; GFX940: ; %bb.0: 13056; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13057; GFX940-NEXT: ;;#ASMSTART 13058; GFX940-NEXT: ; def s[0:5] 13059; GFX940-NEXT: ;;#ASMEND 13060; GFX940-NEXT: s_mov_b32 s8, s4 13061; GFX940-NEXT: s_mov_b32 s9, s5 13062; GFX940-NEXT: s_mov_b32 s10, s4 13063; GFX940-NEXT: s_mov_b32 s11, s5 13064; GFX940-NEXT: s_mov_b32 s12, s2 13065; GFX940-NEXT: s_mov_b32 s13, s3 13066; GFX940-NEXT: s_mov_b32 s14, s0 13067; GFX940-NEXT: s_mov_b32 s15, s1 13068; GFX940-NEXT: ;;#ASMSTART 13069; GFX940-NEXT: ; use s[8:15] 13070; GFX940-NEXT: ;;#ASMEND 13071; GFX940-NEXT: s_setpc_b64 s[30:31] 13072 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13073 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13074 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3> 13075 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13076 ret void 13077} 13078 13079define void @s_shuffle_v4i64_v3i64__u_4_4_4() { 13080; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4: 13081; GFX9: ; %bb.0: 13082; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13083; GFX9-NEXT: ;;#ASMSTART 13084; GFX9-NEXT: ; def s[8:13] 13085; GFX9-NEXT: ;;#ASMEND 13086; GFX9-NEXT: s_mov_b32 s12, s10 13087; GFX9-NEXT: s_mov_b32 s13, s11 13088; GFX9-NEXT: s_mov_b32 s14, s10 13089; GFX9-NEXT: s_mov_b32 s15, s11 13090; GFX9-NEXT: ;;#ASMSTART 13091; GFX9-NEXT: ; use s[8:15] 13092; GFX9-NEXT: ;;#ASMEND 13093; GFX9-NEXT: s_setpc_b64 s[30:31] 13094 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13095 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13096 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4> 13097 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13098 ret void 13099} 13100 13101define void @s_shuffle_v4i64_v3i64__0_4_4_4() { 13102; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4: 13103; GFX900: ; %bb.0: 13104; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13105; GFX900-NEXT: ;;#ASMSTART 13106; GFX900-NEXT: ; def s[8:13] 13107; GFX900-NEXT: ;;#ASMEND 13108; GFX900-NEXT: ;;#ASMSTART 13109; GFX900-NEXT: ; def s[12:17] 13110; GFX900-NEXT: ;;#ASMEND 13111; GFX900-NEXT: s_mov_b32 s10, s14 13112; GFX900-NEXT: s_mov_b32 s11, s15 13113; GFX900-NEXT: s_mov_b32 s12, s14 13114; GFX900-NEXT: s_mov_b32 s13, s15 13115; GFX900-NEXT: ;;#ASMSTART 13116; GFX900-NEXT: ; use s[8:15] 13117; GFX900-NEXT: ;;#ASMEND 13118; GFX900-NEXT: s_setpc_b64 s[30:31] 13119; 13120; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4: 13121; GFX90A: ; %bb.0: 13122; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13123; GFX90A-NEXT: ;;#ASMSTART 13124; GFX90A-NEXT: ; def s[8:13] 13125; GFX90A-NEXT: ;;#ASMEND 13126; GFX90A-NEXT: ;;#ASMSTART 13127; GFX90A-NEXT: ; def s[12:17] 13128; GFX90A-NEXT: ;;#ASMEND 13129; GFX90A-NEXT: s_mov_b32 s10, s14 13130; GFX90A-NEXT: s_mov_b32 s11, s15 13131; GFX90A-NEXT: s_mov_b32 s12, s14 13132; GFX90A-NEXT: s_mov_b32 s13, s15 13133; GFX90A-NEXT: ;;#ASMSTART 13134; GFX90A-NEXT: ; use s[8:15] 13135; GFX90A-NEXT: ;;#ASMEND 13136; GFX90A-NEXT: s_setpc_b64 s[30:31] 13137; 13138; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4: 13139; GFX940: ; %bb.0: 13140; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13141; GFX940-NEXT: ;;#ASMSTART 13142; GFX940-NEXT: ; def s[8:13] 13143; GFX940-NEXT: ;;#ASMEND 13144; GFX940-NEXT: ;;#ASMSTART 13145; GFX940-NEXT: ; def s[0:5] 13146; GFX940-NEXT: ;;#ASMEND 13147; GFX940-NEXT: s_mov_b32 s10, s2 13148; GFX940-NEXT: s_mov_b32 s11, s3 13149; GFX940-NEXT: s_mov_b32 s12, s2 13150; GFX940-NEXT: s_mov_b32 s13, s3 13151; GFX940-NEXT: s_mov_b32 s14, s2 13152; GFX940-NEXT: s_mov_b32 s15, s3 13153; GFX940-NEXT: ;;#ASMSTART 13154; GFX940-NEXT: ; use s[8:15] 13155; GFX940-NEXT: ;;#ASMEND 13156; GFX940-NEXT: s_setpc_b64 s[30:31] 13157 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13158 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13159 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 13160 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13161 ret void 13162} 13163 13164define void @s_shuffle_v4i64_v3i64__1_4_4_4() { 13165; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4: 13166; GFX900: ; %bb.0: 13167; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13168; GFX900-NEXT: ;;#ASMSTART 13169; GFX900-NEXT: ; def s[4:9] 13170; GFX900-NEXT: ;;#ASMEND 13171; GFX900-NEXT: ;;#ASMSTART 13172; GFX900-NEXT: ; def s[8:13] 13173; GFX900-NEXT: ;;#ASMEND 13174; GFX900-NEXT: s_mov_b32 s8, s6 13175; GFX900-NEXT: s_mov_b32 s9, s7 13176; GFX900-NEXT: s_mov_b32 s12, s10 13177; GFX900-NEXT: s_mov_b32 s13, s11 13178; GFX900-NEXT: s_mov_b32 s14, s10 13179; GFX900-NEXT: s_mov_b32 s15, s11 13180; GFX900-NEXT: ;;#ASMSTART 13181; GFX900-NEXT: ; use s[8:15] 13182; GFX900-NEXT: ;;#ASMEND 13183; GFX900-NEXT: s_setpc_b64 s[30:31] 13184; 13185; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4: 13186; GFX90A: ; %bb.0: 13187; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13188; GFX90A-NEXT: ;;#ASMSTART 13189; GFX90A-NEXT: ; def s[4:9] 13190; GFX90A-NEXT: ;;#ASMEND 13191; GFX90A-NEXT: ;;#ASMSTART 13192; GFX90A-NEXT: ; def s[8:13] 13193; GFX90A-NEXT: ;;#ASMEND 13194; GFX90A-NEXT: s_mov_b32 s8, s6 13195; GFX90A-NEXT: s_mov_b32 s9, s7 13196; GFX90A-NEXT: s_mov_b32 s12, s10 13197; GFX90A-NEXT: s_mov_b32 s13, s11 13198; GFX90A-NEXT: s_mov_b32 s14, s10 13199; GFX90A-NEXT: s_mov_b32 s15, s11 13200; GFX90A-NEXT: ;;#ASMSTART 13201; GFX90A-NEXT: ; use s[8:15] 13202; GFX90A-NEXT: ;;#ASMEND 13203; GFX90A-NEXT: s_setpc_b64 s[30:31] 13204; 13205; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4: 13206; GFX940: ; %bb.0: 13207; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13208; GFX940-NEXT: ;;#ASMSTART 13209; GFX940-NEXT: ; def s[8:13] 13210; GFX940-NEXT: ;;#ASMEND 13211; GFX940-NEXT: ;;#ASMSTART 13212; GFX940-NEXT: ; def s[0:5] 13213; GFX940-NEXT: ;;#ASMEND 13214; GFX940-NEXT: s_mov_b32 s8, s2 13215; GFX940-NEXT: s_mov_b32 s9, s3 13216; GFX940-NEXT: s_mov_b32 s12, s10 13217; GFX940-NEXT: s_mov_b32 s13, s11 13218; GFX940-NEXT: s_mov_b32 s14, s10 13219; GFX940-NEXT: s_mov_b32 s15, s11 13220; GFX940-NEXT: ;;#ASMSTART 13221; GFX940-NEXT: ; use s[8:15] 13222; GFX940-NEXT: ;;#ASMEND 13223; GFX940-NEXT: s_setpc_b64 s[30:31] 13224 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13225 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13226 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4> 13227 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13228 ret void 13229} 13230 13231define void @s_shuffle_v4i64_v3i64__2_4_4_4() { 13232; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4: 13233; GFX900: ; %bb.0: 13234; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13235; GFX900-NEXT: ;;#ASMSTART 13236; GFX900-NEXT: ; def s[12:17] 13237; GFX900-NEXT: ;;#ASMEND 13238; GFX900-NEXT: ;;#ASMSTART 13239; GFX900-NEXT: ; def s[8:13] 13240; GFX900-NEXT: ;;#ASMEND 13241; GFX900-NEXT: s_mov_b32 s8, s16 13242; GFX900-NEXT: s_mov_b32 s9, s17 13243; GFX900-NEXT: s_mov_b32 s12, s10 13244; GFX900-NEXT: s_mov_b32 s13, s11 13245; GFX900-NEXT: s_mov_b32 s14, s10 13246; GFX900-NEXT: s_mov_b32 s15, s11 13247; GFX900-NEXT: ;;#ASMSTART 13248; GFX900-NEXT: ; use s[8:15] 13249; GFX900-NEXT: ;;#ASMEND 13250; GFX900-NEXT: s_setpc_b64 s[30:31] 13251; 13252; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4: 13253; GFX90A: ; %bb.0: 13254; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13255; GFX90A-NEXT: ;;#ASMSTART 13256; GFX90A-NEXT: ; def s[12:17] 13257; GFX90A-NEXT: ;;#ASMEND 13258; GFX90A-NEXT: ;;#ASMSTART 13259; GFX90A-NEXT: ; def s[8:13] 13260; GFX90A-NEXT: ;;#ASMEND 13261; GFX90A-NEXT: s_mov_b32 s8, s16 13262; GFX90A-NEXT: s_mov_b32 s9, s17 13263; GFX90A-NEXT: s_mov_b32 s12, s10 13264; GFX90A-NEXT: s_mov_b32 s13, s11 13265; GFX90A-NEXT: s_mov_b32 s14, s10 13266; GFX90A-NEXT: s_mov_b32 s15, s11 13267; GFX90A-NEXT: ;;#ASMSTART 13268; GFX90A-NEXT: ; use s[8:15] 13269; GFX90A-NEXT: ;;#ASMEND 13270; GFX90A-NEXT: s_setpc_b64 s[30:31] 13271; 13272; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4: 13273; GFX940: ; %bb.0: 13274; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13275; GFX940-NEXT: ;;#ASMSTART 13276; GFX940-NEXT: ; def s[8:13] 13277; GFX940-NEXT: ;;#ASMEND 13278; GFX940-NEXT: ;;#ASMSTART 13279; GFX940-NEXT: ; def s[0:5] 13280; GFX940-NEXT: ;;#ASMEND 13281; GFX940-NEXT: s_mov_b32 s8, s4 13282; GFX940-NEXT: s_mov_b32 s9, s5 13283; GFX940-NEXT: s_mov_b32 s12, s10 13284; GFX940-NEXT: s_mov_b32 s13, s11 13285; GFX940-NEXT: s_mov_b32 s14, s10 13286; GFX940-NEXT: s_mov_b32 s15, s11 13287; GFX940-NEXT: ;;#ASMSTART 13288; GFX940-NEXT: ; use s[8:15] 13289; GFX940-NEXT: ;;#ASMEND 13290; GFX940-NEXT: s_setpc_b64 s[30:31] 13291 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13292 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13293 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4> 13294 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13295 ret void 13296} 13297 13298define void @s_shuffle_v4i64_v3i64__3_4_4_4() { 13299; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4: 13300; GFX9: ; %bb.0: 13301; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13302; GFX9-NEXT: ;;#ASMSTART 13303; GFX9-NEXT: ; def s[8:13] 13304; GFX9-NEXT: ;;#ASMEND 13305; GFX9-NEXT: s_mov_b32 s12, s10 13306; GFX9-NEXT: s_mov_b32 s13, s11 13307; GFX9-NEXT: s_mov_b32 s14, s10 13308; GFX9-NEXT: s_mov_b32 s15, s11 13309; GFX9-NEXT: ;;#ASMSTART 13310; GFX9-NEXT: ; use s[8:15] 13311; GFX9-NEXT: ;;#ASMEND 13312; GFX9-NEXT: s_setpc_b64 s[30:31] 13313 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13314 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13315 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4> 13316 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13317 ret void 13318} 13319 13320define void @s_shuffle_v4i64_v3i64__4_4_4_4() { 13321; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4: 13322; GFX9: ; %bb.0: 13323; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13324; GFX9-NEXT: ;;#ASMSTART 13325; GFX9-NEXT: ; def s[8:13] 13326; GFX9-NEXT: ;;#ASMEND 13327; GFX9-NEXT: s_mov_b32 s8, s10 13328; GFX9-NEXT: s_mov_b32 s9, s11 13329; GFX9-NEXT: s_mov_b32 s12, s10 13330; GFX9-NEXT: s_mov_b32 s13, s11 13331; GFX9-NEXT: s_mov_b32 s14, s10 13332; GFX9-NEXT: s_mov_b32 s15, s11 13333; GFX9-NEXT: ;;#ASMSTART 13334; GFX9-NEXT: ; use s[8:15] 13335; GFX9-NEXT: ;;#ASMEND 13336; GFX9-NEXT: s_setpc_b64 s[30:31] 13337 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13338 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13339 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4> 13340 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13341 ret void 13342} 13343 13344define void @s_shuffle_v4i64_v3i64__5_4_4_4() { 13345; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4: 13346; GFX9: ; %bb.0: 13347; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13348; GFX9-NEXT: ;;#ASMSTART 13349; GFX9-NEXT: ; def s[8:13] 13350; GFX9-NEXT: ;;#ASMEND 13351; GFX9-NEXT: s_mov_b32 s8, s12 13352; GFX9-NEXT: s_mov_b32 s9, s13 13353; GFX9-NEXT: s_mov_b32 s12, s10 13354; GFX9-NEXT: s_mov_b32 s13, s11 13355; GFX9-NEXT: s_mov_b32 s14, s10 13356; GFX9-NEXT: s_mov_b32 s15, s11 13357; GFX9-NEXT: ;;#ASMSTART 13358; GFX9-NEXT: ; use s[8:15] 13359; GFX9-NEXT: ;;#ASMEND 13360; GFX9-NEXT: s_setpc_b64 s[30:31] 13361 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13362 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13363 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4> 13364 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13365 ret void 13366} 13367 13368define void @s_shuffle_v4i64_v3i64__5_u_4_4() { 13369; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4: 13370; GFX900: ; %bb.0: 13371; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13372; GFX900-NEXT: ;;#ASMSTART 13373; GFX900-NEXT: ; def s[4:9] 13374; GFX900-NEXT: ;;#ASMEND 13375; GFX900-NEXT: s_mov_b32 s12, s6 13376; GFX900-NEXT: s_mov_b32 s13, s7 13377; GFX900-NEXT: s_mov_b32 s14, s6 13378; GFX900-NEXT: s_mov_b32 s15, s7 13379; GFX900-NEXT: ;;#ASMSTART 13380; GFX900-NEXT: ; use s[8:15] 13381; GFX900-NEXT: ;;#ASMEND 13382; GFX900-NEXT: s_setpc_b64 s[30:31] 13383; 13384; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4: 13385; GFX90A: ; %bb.0: 13386; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13387; GFX90A-NEXT: ;;#ASMSTART 13388; GFX90A-NEXT: ; def s[4:9] 13389; GFX90A-NEXT: ;;#ASMEND 13390; GFX90A-NEXT: s_mov_b32 s12, s6 13391; GFX90A-NEXT: s_mov_b32 s13, s7 13392; GFX90A-NEXT: s_mov_b32 s14, s6 13393; GFX90A-NEXT: s_mov_b32 s15, s7 13394; GFX90A-NEXT: ;;#ASMSTART 13395; GFX90A-NEXT: ; use s[8:15] 13396; GFX90A-NEXT: ;;#ASMEND 13397; GFX90A-NEXT: s_setpc_b64 s[30:31] 13398; 13399; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4: 13400; GFX940: ; %bb.0: 13401; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13402; GFX940-NEXT: ;;#ASMSTART 13403; GFX940-NEXT: ; def s[0:5] 13404; GFX940-NEXT: ;;#ASMEND 13405; GFX940-NEXT: s_mov_b32 s8, s4 13406; GFX940-NEXT: s_mov_b32 s9, s5 13407; GFX940-NEXT: s_mov_b32 s12, s2 13408; GFX940-NEXT: s_mov_b32 s13, s3 13409; GFX940-NEXT: s_mov_b32 s14, s2 13410; GFX940-NEXT: s_mov_b32 s15, s3 13411; GFX940-NEXT: ;;#ASMSTART 13412; GFX940-NEXT: ; use s[8:15] 13413; GFX940-NEXT: ;;#ASMEND 13414; GFX940-NEXT: s_setpc_b64 s[30:31] 13415 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13416 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13417 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4> 13418 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13419 ret void 13420} 13421 13422define void @s_shuffle_v4i64_v3i64__5_0_4_4() { 13423; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4: 13424; GFX900: ; %bb.0: 13425; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13426; GFX900-NEXT: ;;#ASMSTART 13427; GFX900-NEXT: ; def s[4:9] 13428; GFX900-NEXT: ;;#ASMEND 13429; GFX900-NEXT: ;;#ASMSTART 13430; GFX900-NEXT: ; def s[12:17] 13431; GFX900-NEXT: ;;#ASMEND 13432; GFX900-NEXT: s_mov_b32 s8, s16 13433; GFX900-NEXT: s_mov_b32 s9, s17 13434; GFX900-NEXT: s_mov_b32 s10, s4 13435; GFX900-NEXT: s_mov_b32 s11, s5 13436; GFX900-NEXT: s_mov_b32 s12, s14 13437; GFX900-NEXT: s_mov_b32 s13, s15 13438; GFX900-NEXT: ;;#ASMSTART 13439; GFX900-NEXT: ; use s[8:15] 13440; GFX900-NEXT: ;;#ASMEND 13441; GFX900-NEXT: s_setpc_b64 s[30:31] 13442; 13443; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4: 13444; GFX90A: ; %bb.0: 13445; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13446; GFX90A-NEXT: ;;#ASMSTART 13447; GFX90A-NEXT: ; def s[4:9] 13448; GFX90A-NEXT: ;;#ASMEND 13449; GFX90A-NEXT: ;;#ASMSTART 13450; GFX90A-NEXT: ; def s[12:17] 13451; GFX90A-NEXT: ;;#ASMEND 13452; GFX90A-NEXT: s_mov_b32 s8, s16 13453; GFX90A-NEXT: s_mov_b32 s9, s17 13454; GFX90A-NEXT: s_mov_b32 s10, s4 13455; GFX90A-NEXT: s_mov_b32 s11, s5 13456; GFX90A-NEXT: s_mov_b32 s12, s14 13457; GFX90A-NEXT: s_mov_b32 s13, s15 13458; GFX90A-NEXT: ;;#ASMSTART 13459; GFX90A-NEXT: ; use s[8:15] 13460; GFX90A-NEXT: ;;#ASMEND 13461; GFX90A-NEXT: s_setpc_b64 s[30:31] 13462; 13463; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4: 13464; GFX940: ; %bb.0: 13465; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13466; GFX940-NEXT: ;;#ASMSTART 13467; GFX940-NEXT: ; def s[0:5] 13468; GFX940-NEXT: ;;#ASMEND 13469; GFX940-NEXT: s_mov_b32 s10, s0 13470; GFX940-NEXT: ;;#ASMSTART 13471; GFX940-NEXT: ; def s[4:9] 13472; GFX940-NEXT: ;;#ASMEND 13473; GFX940-NEXT: s_mov_b32 s11, s1 13474; GFX940-NEXT: s_mov_b32 s12, s6 13475; GFX940-NEXT: s_mov_b32 s13, s7 13476; GFX940-NEXT: s_mov_b32 s14, s6 13477; GFX940-NEXT: s_mov_b32 s15, s7 13478; GFX940-NEXT: ;;#ASMSTART 13479; GFX940-NEXT: ; use s[8:15] 13480; GFX940-NEXT: ;;#ASMEND 13481; GFX940-NEXT: s_setpc_b64 s[30:31] 13482 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13483 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13484 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4> 13485 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13486 ret void 13487} 13488 13489define void @s_shuffle_v4i64_v3i64__5_1_4_4() { 13490; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4: 13491; GFX900: ; %bb.0: 13492; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13493; GFX900-NEXT: ;;#ASMSTART 13494; GFX900-NEXT: ; def s[8:13] 13495; GFX900-NEXT: ;;#ASMEND 13496; GFX900-NEXT: ;;#ASMSTART 13497; GFX900-NEXT: ; def s[4:9] 13498; GFX900-NEXT: ;;#ASMEND 13499; GFX900-NEXT: s_mov_b32 s12, s6 13500; GFX900-NEXT: s_mov_b32 s13, s7 13501; GFX900-NEXT: s_mov_b32 s14, s6 13502; GFX900-NEXT: s_mov_b32 s15, s7 13503; GFX900-NEXT: ;;#ASMSTART 13504; GFX900-NEXT: ; use s[8:15] 13505; GFX900-NEXT: ;;#ASMEND 13506; GFX900-NEXT: s_setpc_b64 s[30:31] 13507; 13508; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4: 13509; GFX90A: ; %bb.0: 13510; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13511; GFX90A-NEXT: ;;#ASMSTART 13512; GFX90A-NEXT: ; def s[8:13] 13513; GFX90A-NEXT: ;;#ASMEND 13514; GFX90A-NEXT: ;;#ASMSTART 13515; GFX90A-NEXT: ; def s[4:9] 13516; GFX90A-NEXT: ;;#ASMEND 13517; GFX90A-NEXT: s_mov_b32 s12, s6 13518; GFX90A-NEXT: s_mov_b32 s13, s7 13519; GFX90A-NEXT: s_mov_b32 s14, s6 13520; GFX90A-NEXT: s_mov_b32 s15, s7 13521; GFX90A-NEXT: ;;#ASMSTART 13522; GFX90A-NEXT: ; use s[8:15] 13523; GFX90A-NEXT: ;;#ASMEND 13524; GFX90A-NEXT: s_setpc_b64 s[30:31] 13525; 13526; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4: 13527; GFX940: ; %bb.0: 13528; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13529; GFX940-NEXT: ;;#ASMSTART 13530; GFX940-NEXT: ; def s[8:13] 13531; GFX940-NEXT: ;;#ASMEND 13532; GFX940-NEXT: ;;#ASMSTART 13533; GFX940-NEXT: ; def s[0:5] 13534; GFX940-NEXT: ;;#ASMEND 13535; GFX940-NEXT: s_mov_b32 s8, s4 13536; GFX940-NEXT: s_mov_b32 s9, s5 13537; GFX940-NEXT: s_mov_b32 s12, s2 13538; GFX940-NEXT: s_mov_b32 s13, s3 13539; GFX940-NEXT: s_mov_b32 s14, s2 13540; GFX940-NEXT: s_mov_b32 s15, s3 13541; GFX940-NEXT: ;;#ASMSTART 13542; GFX940-NEXT: ; use s[8:15] 13543; GFX940-NEXT: ;;#ASMEND 13544; GFX940-NEXT: s_setpc_b64 s[30:31] 13545 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13546 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13547 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4> 13548 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13549 ret void 13550} 13551 13552define void @s_shuffle_v4i64_v3i64__5_2_4_4() { 13553; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4: 13554; GFX900: ; %bb.0: 13555; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13556; GFX900-NEXT: ;;#ASMSTART 13557; GFX900-NEXT: ; def s[8:13] 13558; GFX900-NEXT: ;;#ASMEND 13559; GFX900-NEXT: ;;#ASMSTART 13560; GFX900-NEXT: ; def s[4:9] 13561; GFX900-NEXT: ;;#ASMEND 13562; GFX900-NEXT: s_mov_b32 s10, s12 13563; GFX900-NEXT: s_mov_b32 s11, s13 13564; GFX900-NEXT: s_mov_b32 s12, s6 13565; GFX900-NEXT: s_mov_b32 s13, s7 13566; GFX900-NEXT: s_mov_b32 s14, s6 13567; GFX900-NEXT: s_mov_b32 s15, s7 13568; GFX900-NEXT: ;;#ASMSTART 13569; GFX900-NEXT: ; use s[8:15] 13570; GFX900-NEXT: ;;#ASMEND 13571; GFX900-NEXT: s_setpc_b64 s[30:31] 13572; 13573; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4: 13574; GFX90A: ; %bb.0: 13575; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13576; GFX90A-NEXT: ;;#ASMSTART 13577; GFX90A-NEXT: ; def s[8:13] 13578; GFX90A-NEXT: ;;#ASMEND 13579; GFX90A-NEXT: ;;#ASMSTART 13580; GFX90A-NEXT: ; def s[4:9] 13581; GFX90A-NEXT: ;;#ASMEND 13582; GFX90A-NEXT: s_mov_b32 s10, s12 13583; GFX90A-NEXT: s_mov_b32 s11, s13 13584; GFX90A-NEXT: s_mov_b32 s12, s6 13585; GFX90A-NEXT: s_mov_b32 s13, s7 13586; GFX90A-NEXT: s_mov_b32 s14, s6 13587; GFX90A-NEXT: s_mov_b32 s15, s7 13588; GFX90A-NEXT: ;;#ASMSTART 13589; GFX90A-NEXT: ; use s[8:15] 13590; GFX90A-NEXT: ;;#ASMEND 13591; GFX90A-NEXT: s_setpc_b64 s[30:31] 13592; 13593; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4: 13594; GFX940: ; %bb.0: 13595; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13596; GFX940-NEXT: ;;#ASMSTART 13597; GFX940-NEXT: ; def s[12:17] 13598; GFX940-NEXT: ;;#ASMEND 13599; GFX940-NEXT: ;;#ASMSTART 13600; GFX940-NEXT: ; def s[0:5] 13601; GFX940-NEXT: ;;#ASMEND 13602; GFX940-NEXT: s_mov_b32 s8, s16 13603; GFX940-NEXT: s_mov_b32 s9, s17 13604; GFX940-NEXT: s_mov_b32 s10, s4 13605; GFX940-NEXT: s_mov_b32 s11, s5 13606; GFX940-NEXT: s_mov_b32 s12, s14 13607; GFX940-NEXT: s_mov_b32 s13, s15 13608; GFX940-NEXT: ;;#ASMSTART 13609; GFX940-NEXT: ; use s[8:15] 13610; GFX940-NEXT: ;;#ASMEND 13611; GFX940-NEXT: s_setpc_b64 s[30:31] 13612 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13613 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13614 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4> 13615 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13616 ret void 13617} 13618 13619define void @s_shuffle_v4i64_v3i64__5_3_4_4() { 13620; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4: 13621; GFX900: ; %bb.0: 13622; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13623; GFX900-NEXT: ;;#ASMSTART 13624; GFX900-NEXT: ; def s[4:9] 13625; GFX900-NEXT: ;;#ASMEND 13626; GFX900-NEXT: s_mov_b32 s10, s4 13627; GFX900-NEXT: s_mov_b32 s11, s5 13628; GFX900-NEXT: s_mov_b32 s12, s6 13629; GFX900-NEXT: s_mov_b32 s13, s7 13630; GFX900-NEXT: s_mov_b32 s14, s6 13631; GFX900-NEXT: s_mov_b32 s15, s7 13632; GFX900-NEXT: ;;#ASMSTART 13633; GFX900-NEXT: ; use s[8:15] 13634; GFX900-NEXT: ;;#ASMEND 13635; GFX900-NEXT: s_setpc_b64 s[30:31] 13636; 13637; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4: 13638; GFX90A: ; %bb.0: 13639; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13640; GFX90A-NEXT: ;;#ASMSTART 13641; GFX90A-NEXT: ; def s[4:9] 13642; GFX90A-NEXT: ;;#ASMEND 13643; GFX90A-NEXT: s_mov_b32 s10, s4 13644; GFX90A-NEXT: s_mov_b32 s11, s5 13645; GFX90A-NEXT: s_mov_b32 s12, s6 13646; GFX90A-NEXT: s_mov_b32 s13, s7 13647; GFX90A-NEXT: s_mov_b32 s14, s6 13648; GFX90A-NEXT: s_mov_b32 s15, s7 13649; GFX90A-NEXT: ;;#ASMSTART 13650; GFX90A-NEXT: ; use s[8:15] 13651; GFX90A-NEXT: ;;#ASMEND 13652; GFX90A-NEXT: s_setpc_b64 s[30:31] 13653; 13654; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4: 13655; GFX940: ; %bb.0: 13656; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13657; GFX940-NEXT: ;;#ASMSTART 13658; GFX940-NEXT: ; def s[0:5] 13659; GFX940-NEXT: ;;#ASMEND 13660; GFX940-NEXT: s_mov_b32 s8, s4 13661; GFX940-NEXT: s_mov_b32 s9, s5 13662; GFX940-NEXT: s_mov_b32 s10, s0 13663; GFX940-NEXT: s_mov_b32 s11, s1 13664; GFX940-NEXT: s_mov_b32 s12, s2 13665; GFX940-NEXT: s_mov_b32 s13, s3 13666; GFX940-NEXT: s_mov_b32 s14, s2 13667; GFX940-NEXT: s_mov_b32 s15, s3 13668; GFX940-NEXT: ;;#ASMSTART 13669; GFX940-NEXT: ; use s[8:15] 13670; GFX940-NEXT: ;;#ASMEND 13671; GFX940-NEXT: s_setpc_b64 s[30:31] 13672 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13673 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13674 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4> 13675 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13676 ret void 13677} 13678 13679define void @s_shuffle_v4i64_v3i64__5_5_4_4() { 13680; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4: 13681; GFX900: ; %bb.0: 13682; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13683; GFX900-NEXT: ;;#ASMSTART 13684; GFX900-NEXT: ; def s[12:17] 13685; GFX900-NEXT: ;;#ASMEND 13686; GFX900-NEXT: s_mov_b32 s8, s16 13687; GFX900-NEXT: s_mov_b32 s9, s17 13688; GFX900-NEXT: s_mov_b32 s10, s16 13689; GFX900-NEXT: s_mov_b32 s11, s17 13690; GFX900-NEXT: s_mov_b32 s12, s14 13691; GFX900-NEXT: s_mov_b32 s13, s15 13692; GFX900-NEXT: ;;#ASMSTART 13693; GFX900-NEXT: ; use s[8:15] 13694; GFX900-NEXT: ;;#ASMEND 13695; GFX900-NEXT: s_setpc_b64 s[30:31] 13696; 13697; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4: 13698; GFX90A: ; %bb.0: 13699; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13700; GFX90A-NEXT: ;;#ASMSTART 13701; GFX90A-NEXT: ; def s[12:17] 13702; GFX90A-NEXT: ;;#ASMEND 13703; GFX90A-NEXT: s_mov_b32 s8, s16 13704; GFX90A-NEXT: s_mov_b32 s9, s17 13705; GFX90A-NEXT: s_mov_b32 s10, s16 13706; GFX90A-NEXT: s_mov_b32 s11, s17 13707; GFX90A-NEXT: s_mov_b32 s12, s14 13708; GFX90A-NEXT: s_mov_b32 s13, s15 13709; GFX90A-NEXT: ;;#ASMSTART 13710; GFX90A-NEXT: ; use s[8:15] 13711; GFX90A-NEXT: ;;#ASMEND 13712; GFX90A-NEXT: s_setpc_b64 s[30:31] 13713; 13714; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4: 13715; GFX940: ; %bb.0: 13716; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13717; GFX940-NEXT: ;;#ASMSTART 13718; GFX940-NEXT: ; def s[0:5] 13719; GFX940-NEXT: ;;#ASMEND 13720; GFX940-NEXT: s_mov_b32 s8, s4 13721; GFX940-NEXT: s_mov_b32 s9, s5 13722; GFX940-NEXT: s_mov_b32 s10, s4 13723; GFX940-NEXT: s_mov_b32 s11, s5 13724; GFX940-NEXT: s_mov_b32 s12, s2 13725; GFX940-NEXT: s_mov_b32 s13, s3 13726; GFX940-NEXT: s_mov_b32 s14, s2 13727; GFX940-NEXT: s_mov_b32 s15, s3 13728; GFX940-NEXT: ;;#ASMSTART 13729; GFX940-NEXT: ; use s[8:15] 13730; GFX940-NEXT: ;;#ASMEND 13731; GFX940-NEXT: s_setpc_b64 s[30:31] 13732 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13733 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13734 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4> 13735 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13736 ret void 13737} 13738 13739define void @s_shuffle_v4i64_v3i64__5_5_u_4() { 13740; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4: 13741; GFX900: ; %bb.0: 13742; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13743; GFX900-NEXT: ;;#ASMSTART 13744; GFX900-NEXT: ; def s[12:17] 13745; GFX900-NEXT: ;;#ASMEND 13746; GFX900-NEXT: s_mov_b32 s8, s16 13747; GFX900-NEXT: s_mov_b32 s9, s17 13748; GFX900-NEXT: s_mov_b32 s10, s16 13749; GFX900-NEXT: s_mov_b32 s11, s17 13750; GFX900-NEXT: ;;#ASMSTART 13751; GFX900-NEXT: ; use s[8:15] 13752; GFX900-NEXT: ;;#ASMEND 13753; GFX900-NEXT: s_setpc_b64 s[30:31] 13754; 13755; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4: 13756; GFX90A: ; %bb.0: 13757; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13758; GFX90A-NEXT: ;;#ASMSTART 13759; GFX90A-NEXT: ; def s[12:17] 13760; GFX90A-NEXT: ;;#ASMEND 13761; GFX90A-NEXT: s_mov_b32 s8, s16 13762; GFX90A-NEXT: s_mov_b32 s9, s17 13763; GFX90A-NEXT: s_mov_b32 s10, s16 13764; GFX90A-NEXT: s_mov_b32 s11, s17 13765; GFX90A-NEXT: ;;#ASMSTART 13766; GFX90A-NEXT: ; use s[8:15] 13767; GFX90A-NEXT: ;;#ASMEND 13768; GFX90A-NEXT: s_setpc_b64 s[30:31] 13769; 13770; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4: 13771; GFX940: ; %bb.0: 13772; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13773; GFX940-NEXT: ;;#ASMSTART 13774; GFX940-NEXT: ; def s[0:5] 13775; GFX940-NEXT: ;;#ASMEND 13776; GFX940-NEXT: s_mov_b32 s8, s4 13777; GFX940-NEXT: s_mov_b32 s9, s5 13778; GFX940-NEXT: s_mov_b32 s10, s4 13779; GFX940-NEXT: s_mov_b32 s11, s5 13780; GFX940-NEXT: s_mov_b32 s14, s2 13781; GFX940-NEXT: s_mov_b32 s15, s3 13782; GFX940-NEXT: ;;#ASMSTART 13783; GFX940-NEXT: ; use s[8:15] 13784; GFX940-NEXT: ;;#ASMEND 13785; GFX940-NEXT: s_setpc_b64 s[30:31] 13786 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13787 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13788 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4> 13789 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13790 ret void 13791} 13792 13793define void @s_shuffle_v4i64_v3i64__5_5_0_4() { 13794; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4: 13795; GFX900: ; %bb.0: 13796; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13797; GFX900-NEXT: ;;#ASMSTART 13798; GFX900-NEXT: ; def s[4:9] 13799; GFX900-NEXT: ;;#ASMEND 13800; GFX900-NEXT: ;;#ASMSTART 13801; GFX900-NEXT: ; def s[12:17] 13802; GFX900-NEXT: ;;#ASMEND 13803; GFX900-NEXT: s_mov_b32 s8, s16 13804; GFX900-NEXT: s_mov_b32 s9, s17 13805; GFX900-NEXT: s_mov_b32 s10, s16 13806; GFX900-NEXT: s_mov_b32 s11, s17 13807; GFX900-NEXT: s_mov_b32 s12, s4 13808; GFX900-NEXT: s_mov_b32 s13, s5 13809; GFX900-NEXT: ;;#ASMSTART 13810; GFX900-NEXT: ; use s[8:15] 13811; GFX900-NEXT: ;;#ASMEND 13812; GFX900-NEXT: s_setpc_b64 s[30:31] 13813; 13814; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4: 13815; GFX90A: ; %bb.0: 13816; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13817; GFX90A-NEXT: ;;#ASMSTART 13818; GFX90A-NEXT: ; def s[4:9] 13819; GFX90A-NEXT: ;;#ASMEND 13820; GFX90A-NEXT: ;;#ASMSTART 13821; GFX90A-NEXT: ; def s[12:17] 13822; GFX90A-NEXT: ;;#ASMEND 13823; GFX90A-NEXT: s_mov_b32 s8, s16 13824; GFX90A-NEXT: s_mov_b32 s9, s17 13825; GFX90A-NEXT: s_mov_b32 s10, s16 13826; GFX90A-NEXT: s_mov_b32 s11, s17 13827; GFX90A-NEXT: s_mov_b32 s12, s4 13828; GFX90A-NEXT: s_mov_b32 s13, s5 13829; GFX90A-NEXT: ;;#ASMSTART 13830; GFX90A-NEXT: ; use s[8:15] 13831; GFX90A-NEXT: ;;#ASMEND 13832; GFX90A-NEXT: s_setpc_b64 s[30:31] 13833; 13834; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4: 13835; GFX940: ; %bb.0: 13836; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13837; GFX940-NEXT: ;;#ASMSTART 13838; GFX940-NEXT: ; def s[12:17] 13839; GFX940-NEXT: ;;#ASMEND 13840; GFX940-NEXT: ;;#ASMSTART 13841; GFX940-NEXT: ; def s[0:5] 13842; GFX940-NEXT: ;;#ASMEND 13843; GFX940-NEXT: s_mov_b32 s8, s16 13844; GFX940-NEXT: s_mov_b32 s9, s17 13845; GFX940-NEXT: s_mov_b32 s10, s16 13846; GFX940-NEXT: s_mov_b32 s11, s17 13847; GFX940-NEXT: s_mov_b32 s12, s0 13848; GFX940-NEXT: s_mov_b32 s13, s1 13849; GFX940-NEXT: ;;#ASMSTART 13850; GFX940-NEXT: ; use s[8:15] 13851; GFX940-NEXT: ;;#ASMEND 13852; GFX940-NEXT: s_setpc_b64 s[30:31] 13853 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13854 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13855 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4> 13856 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13857 ret void 13858} 13859 13860define void @s_shuffle_v4i64_v3i64__5_5_1_4() { 13861; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4: 13862; GFX900: ; %bb.0: 13863; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13864; GFX900-NEXT: ;;#ASMSTART 13865; GFX900-NEXT: ; def s[4:9] 13866; GFX900-NEXT: ;;#ASMEND 13867; GFX900-NEXT: ;;#ASMSTART 13868; GFX900-NEXT: ; def s[12:17] 13869; GFX900-NEXT: ;;#ASMEND 13870; GFX900-NEXT: s_mov_b32 s8, s16 13871; GFX900-NEXT: s_mov_b32 s9, s17 13872; GFX900-NEXT: s_mov_b32 s10, s16 13873; GFX900-NEXT: s_mov_b32 s11, s17 13874; GFX900-NEXT: s_mov_b32 s12, s6 13875; GFX900-NEXT: s_mov_b32 s13, s7 13876; GFX900-NEXT: ;;#ASMSTART 13877; GFX900-NEXT: ; use s[8:15] 13878; GFX900-NEXT: ;;#ASMEND 13879; GFX900-NEXT: s_setpc_b64 s[30:31] 13880; 13881; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4: 13882; GFX90A: ; %bb.0: 13883; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13884; GFX90A-NEXT: ;;#ASMSTART 13885; GFX90A-NEXT: ; def s[4:9] 13886; GFX90A-NEXT: ;;#ASMEND 13887; GFX90A-NEXT: ;;#ASMSTART 13888; GFX90A-NEXT: ; def s[12:17] 13889; GFX90A-NEXT: ;;#ASMEND 13890; GFX90A-NEXT: s_mov_b32 s8, s16 13891; GFX90A-NEXT: s_mov_b32 s9, s17 13892; GFX90A-NEXT: s_mov_b32 s10, s16 13893; GFX90A-NEXT: s_mov_b32 s11, s17 13894; GFX90A-NEXT: s_mov_b32 s12, s6 13895; GFX90A-NEXT: s_mov_b32 s13, s7 13896; GFX90A-NEXT: ;;#ASMSTART 13897; GFX90A-NEXT: ; use s[8:15] 13898; GFX90A-NEXT: ;;#ASMEND 13899; GFX90A-NEXT: s_setpc_b64 s[30:31] 13900; 13901; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4: 13902; GFX940: ; %bb.0: 13903; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13904; GFX940-NEXT: ;;#ASMSTART 13905; GFX940-NEXT: ; def s[12:17] 13906; GFX940-NEXT: ;;#ASMEND 13907; GFX940-NEXT: ;;#ASMSTART 13908; GFX940-NEXT: ; def s[0:5] 13909; GFX940-NEXT: ;;#ASMEND 13910; GFX940-NEXT: s_mov_b32 s8, s16 13911; GFX940-NEXT: s_mov_b32 s9, s17 13912; GFX940-NEXT: s_mov_b32 s10, s16 13913; GFX940-NEXT: s_mov_b32 s11, s17 13914; GFX940-NEXT: s_mov_b32 s12, s2 13915; GFX940-NEXT: s_mov_b32 s13, s3 13916; GFX940-NEXT: ;;#ASMSTART 13917; GFX940-NEXT: ; use s[8:15] 13918; GFX940-NEXT: ;;#ASMEND 13919; GFX940-NEXT: s_setpc_b64 s[30:31] 13920 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13921 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13922 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4> 13923 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13924 ret void 13925} 13926 13927define void @s_shuffle_v4i64_v3i64__5_5_2_4() { 13928; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4: 13929; GFX900: ; %bb.0: 13930; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13931; GFX900-NEXT: ;;#ASMSTART 13932; GFX900-NEXT: ; def s[8:13] 13933; GFX900-NEXT: ;;#ASMEND 13934; GFX900-NEXT: ;;#ASMSTART 13935; GFX900-NEXT: ; def s[16:21] 13936; GFX900-NEXT: ;;#ASMEND 13937; GFX900-NEXT: s_mov_b32 s8, s20 13938; GFX900-NEXT: s_mov_b32 s9, s21 13939; GFX900-NEXT: s_mov_b32 s10, s20 13940; GFX900-NEXT: s_mov_b32 s11, s21 13941; GFX900-NEXT: s_mov_b32 s14, s18 13942; GFX900-NEXT: s_mov_b32 s15, s19 13943; GFX900-NEXT: ;;#ASMSTART 13944; GFX900-NEXT: ; use s[8:15] 13945; GFX900-NEXT: ;;#ASMEND 13946; GFX900-NEXT: s_setpc_b64 s[30:31] 13947; 13948; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4: 13949; GFX90A: ; %bb.0: 13950; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13951; GFX90A-NEXT: ;;#ASMSTART 13952; GFX90A-NEXT: ; def s[8:13] 13953; GFX90A-NEXT: ;;#ASMEND 13954; GFX90A-NEXT: ;;#ASMSTART 13955; GFX90A-NEXT: ; def s[16:21] 13956; GFX90A-NEXT: ;;#ASMEND 13957; GFX90A-NEXT: s_mov_b32 s8, s20 13958; GFX90A-NEXT: s_mov_b32 s9, s21 13959; GFX90A-NEXT: s_mov_b32 s10, s20 13960; GFX90A-NEXT: s_mov_b32 s11, s21 13961; GFX90A-NEXT: s_mov_b32 s14, s18 13962; GFX90A-NEXT: s_mov_b32 s15, s19 13963; GFX90A-NEXT: ;;#ASMSTART 13964; GFX90A-NEXT: ; use s[8:15] 13965; GFX90A-NEXT: ;;#ASMEND 13966; GFX90A-NEXT: s_setpc_b64 s[30:31] 13967; 13968; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4: 13969; GFX940: ; %bb.0: 13970; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13971; GFX940-NEXT: ;;#ASMSTART 13972; GFX940-NEXT: ; def s[8:13] 13973; GFX940-NEXT: ;;#ASMEND 13974; GFX940-NEXT: ;;#ASMSTART 13975; GFX940-NEXT: ; def s[0:5] 13976; GFX940-NEXT: ;;#ASMEND 13977; GFX940-NEXT: s_mov_b32 s8, s4 13978; GFX940-NEXT: s_mov_b32 s9, s5 13979; GFX940-NEXT: s_mov_b32 s10, s4 13980; GFX940-NEXT: s_mov_b32 s11, s5 13981; GFX940-NEXT: s_mov_b32 s14, s2 13982; GFX940-NEXT: s_mov_b32 s15, s3 13983; GFX940-NEXT: ;;#ASMSTART 13984; GFX940-NEXT: ; use s[8:15] 13985; GFX940-NEXT: ;;#ASMEND 13986; GFX940-NEXT: s_setpc_b64 s[30:31] 13987 %vec0 = call <3 x i64> asm "; def $0", "=s"() 13988 %vec1 = call <3 x i64> asm "; def $0", "=s"() 13989 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4> 13990 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 13991 ret void 13992} 13993 13994define void @s_shuffle_v4i64_v3i64__5_5_3_4() { 13995; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4: 13996; GFX900: ; %bb.0: 13997; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13998; GFX900-NEXT: ;;#ASMSTART 13999; GFX900-NEXT: ; def s[12:17] 14000; GFX900-NEXT: ;;#ASMEND 14001; GFX900-NEXT: s_mov_b32 s8, s16 14002; GFX900-NEXT: s_mov_b32 s9, s17 14003; GFX900-NEXT: s_mov_b32 s10, s16 14004; GFX900-NEXT: s_mov_b32 s11, s17 14005; GFX900-NEXT: ;;#ASMSTART 14006; GFX900-NEXT: ; use s[8:15] 14007; GFX900-NEXT: ;;#ASMEND 14008; GFX900-NEXT: s_setpc_b64 s[30:31] 14009; 14010; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4: 14011; GFX90A: ; %bb.0: 14012; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14013; GFX90A-NEXT: ;;#ASMSTART 14014; GFX90A-NEXT: ; def s[12:17] 14015; GFX90A-NEXT: ;;#ASMEND 14016; GFX90A-NEXT: s_mov_b32 s8, s16 14017; GFX90A-NEXT: s_mov_b32 s9, s17 14018; GFX90A-NEXT: s_mov_b32 s10, s16 14019; GFX90A-NEXT: s_mov_b32 s11, s17 14020; GFX90A-NEXT: ;;#ASMSTART 14021; GFX90A-NEXT: ; use s[8:15] 14022; GFX90A-NEXT: ;;#ASMEND 14023; GFX90A-NEXT: s_setpc_b64 s[30:31] 14024; 14025; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4: 14026; GFX940: ; %bb.0: 14027; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14028; GFX940-NEXT: ;;#ASMSTART 14029; GFX940-NEXT: ; def s[0:5] 14030; GFX940-NEXT: ;;#ASMEND 14031; GFX940-NEXT: s_mov_b32 s8, s4 14032; GFX940-NEXT: s_mov_b32 s9, s5 14033; GFX940-NEXT: s_mov_b32 s10, s4 14034; GFX940-NEXT: s_mov_b32 s11, s5 14035; GFX940-NEXT: s_mov_b32 s12, s0 14036; GFX940-NEXT: s_mov_b32 s13, s1 14037; GFX940-NEXT: s_mov_b32 s14, s2 14038; GFX940-NEXT: s_mov_b32 s15, s3 14039; GFX940-NEXT: ;;#ASMSTART 14040; GFX940-NEXT: ; use s[8:15] 14041; GFX940-NEXT: ;;#ASMEND 14042; GFX940-NEXT: s_setpc_b64 s[30:31] 14043 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14044 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14045 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4> 14046 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14047 ret void 14048} 14049 14050define void @s_shuffle_v4i64_v3i64__u_5_5_5() { 14051; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5: 14052; GFX9: ; %bb.0: 14053; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14054; GFX9-NEXT: ;;#ASMSTART 14055; GFX9-NEXT: ; def s[8:13] 14056; GFX9-NEXT: ;;#ASMEND 14057; GFX9-NEXT: s_mov_b32 s10, s12 14058; GFX9-NEXT: s_mov_b32 s11, s13 14059; GFX9-NEXT: s_mov_b32 s14, s12 14060; GFX9-NEXT: s_mov_b32 s15, s13 14061; GFX9-NEXT: ;;#ASMSTART 14062; GFX9-NEXT: ; use s[8:15] 14063; GFX9-NEXT: ;;#ASMEND 14064; GFX9-NEXT: s_setpc_b64 s[30:31] 14065 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14066 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14067 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5> 14068 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14069 ret void 14070} 14071 14072define void @s_shuffle_v4i64_v3i64__0_5_5_5() { 14073; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5: 14074; GFX900: ; %bb.0: 14075; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14076; GFX900-NEXT: ;;#ASMSTART 14077; GFX900-NEXT: ; def s[8:13] 14078; GFX900-NEXT: ;;#ASMEND 14079; GFX900-NEXT: ;;#ASMSTART 14080; GFX900-NEXT: ; def s[12:17] 14081; GFX900-NEXT: ;;#ASMEND 14082; GFX900-NEXT: s_mov_b32 s10, s16 14083; GFX900-NEXT: s_mov_b32 s11, s17 14084; GFX900-NEXT: s_mov_b32 s12, s16 14085; GFX900-NEXT: s_mov_b32 s13, s17 14086; GFX900-NEXT: s_mov_b32 s14, s16 14087; GFX900-NEXT: s_mov_b32 s15, s17 14088; GFX900-NEXT: ;;#ASMSTART 14089; GFX900-NEXT: ; use s[8:15] 14090; GFX900-NEXT: ;;#ASMEND 14091; GFX900-NEXT: s_setpc_b64 s[30:31] 14092; 14093; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5: 14094; GFX90A: ; %bb.0: 14095; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14096; GFX90A-NEXT: ;;#ASMSTART 14097; GFX90A-NEXT: ; def s[8:13] 14098; GFX90A-NEXT: ;;#ASMEND 14099; GFX90A-NEXT: ;;#ASMSTART 14100; GFX90A-NEXT: ; def s[12:17] 14101; GFX90A-NEXT: ;;#ASMEND 14102; GFX90A-NEXT: s_mov_b32 s10, s16 14103; GFX90A-NEXT: s_mov_b32 s11, s17 14104; GFX90A-NEXT: s_mov_b32 s12, s16 14105; GFX90A-NEXT: s_mov_b32 s13, s17 14106; GFX90A-NEXT: s_mov_b32 s14, s16 14107; GFX90A-NEXT: s_mov_b32 s15, s17 14108; GFX90A-NEXT: ;;#ASMSTART 14109; GFX90A-NEXT: ; use s[8:15] 14110; GFX90A-NEXT: ;;#ASMEND 14111; GFX90A-NEXT: s_setpc_b64 s[30:31] 14112; 14113; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5: 14114; GFX940: ; %bb.0: 14115; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14116; GFX940-NEXT: ;;#ASMSTART 14117; GFX940-NEXT: ; def s[8:13] 14118; GFX940-NEXT: ;;#ASMEND 14119; GFX940-NEXT: ;;#ASMSTART 14120; GFX940-NEXT: ; def s[0:5] 14121; GFX940-NEXT: ;;#ASMEND 14122; GFX940-NEXT: s_mov_b32 s10, s4 14123; GFX940-NEXT: s_mov_b32 s11, s5 14124; GFX940-NEXT: s_mov_b32 s12, s4 14125; GFX940-NEXT: s_mov_b32 s13, s5 14126; GFX940-NEXT: s_mov_b32 s14, s4 14127; GFX940-NEXT: s_mov_b32 s15, s5 14128; GFX940-NEXT: ;;#ASMSTART 14129; GFX940-NEXT: ; use s[8:15] 14130; GFX940-NEXT: ;;#ASMEND 14131; GFX940-NEXT: s_setpc_b64 s[30:31] 14132 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14133 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14134 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5> 14135 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14136 ret void 14137} 14138 14139define void @s_shuffle_v4i64_v3i64__1_5_5_5() { 14140; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5: 14141; GFX900: ; %bb.0: 14142; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14143; GFX900-NEXT: ;;#ASMSTART 14144; GFX900-NEXT: ; def s[4:9] 14145; GFX900-NEXT: ;;#ASMEND 14146; GFX900-NEXT: ;;#ASMSTART 14147; GFX900-NEXT: ; def s[8:13] 14148; GFX900-NEXT: ;;#ASMEND 14149; GFX900-NEXT: s_mov_b32 s8, s6 14150; GFX900-NEXT: s_mov_b32 s9, s7 14151; GFX900-NEXT: s_mov_b32 s10, s12 14152; GFX900-NEXT: s_mov_b32 s11, s13 14153; GFX900-NEXT: s_mov_b32 s14, s12 14154; GFX900-NEXT: s_mov_b32 s15, s13 14155; GFX900-NEXT: ;;#ASMSTART 14156; GFX900-NEXT: ; use s[8:15] 14157; GFX900-NEXT: ;;#ASMEND 14158; GFX900-NEXT: s_setpc_b64 s[30:31] 14159; 14160; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5: 14161; GFX90A: ; %bb.0: 14162; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14163; GFX90A-NEXT: ;;#ASMSTART 14164; GFX90A-NEXT: ; def s[4:9] 14165; GFX90A-NEXT: ;;#ASMEND 14166; GFX90A-NEXT: ;;#ASMSTART 14167; GFX90A-NEXT: ; def s[8:13] 14168; GFX90A-NEXT: ;;#ASMEND 14169; GFX90A-NEXT: s_mov_b32 s8, s6 14170; GFX90A-NEXT: s_mov_b32 s9, s7 14171; GFX90A-NEXT: s_mov_b32 s10, s12 14172; GFX90A-NEXT: s_mov_b32 s11, s13 14173; GFX90A-NEXT: s_mov_b32 s14, s12 14174; GFX90A-NEXT: s_mov_b32 s15, s13 14175; GFX90A-NEXT: ;;#ASMSTART 14176; GFX90A-NEXT: ; use s[8:15] 14177; GFX90A-NEXT: ;;#ASMEND 14178; GFX90A-NEXT: s_setpc_b64 s[30:31] 14179; 14180; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5: 14181; GFX940: ; %bb.0: 14182; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14183; GFX940-NEXT: ;;#ASMSTART 14184; GFX940-NEXT: ; def s[8:13] 14185; GFX940-NEXT: ;;#ASMEND 14186; GFX940-NEXT: ;;#ASMSTART 14187; GFX940-NEXT: ; def s[0:5] 14188; GFX940-NEXT: ;;#ASMEND 14189; GFX940-NEXT: s_mov_b32 s8, s2 14190; GFX940-NEXT: s_mov_b32 s9, s3 14191; GFX940-NEXT: s_mov_b32 s10, s12 14192; GFX940-NEXT: s_mov_b32 s11, s13 14193; GFX940-NEXT: s_mov_b32 s14, s12 14194; GFX940-NEXT: s_mov_b32 s15, s13 14195; GFX940-NEXT: ;;#ASMSTART 14196; GFX940-NEXT: ; use s[8:15] 14197; GFX940-NEXT: ;;#ASMEND 14198; GFX940-NEXT: s_setpc_b64 s[30:31] 14199 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14200 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14201 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5> 14202 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14203 ret void 14204} 14205 14206define void @s_shuffle_v4i64_v3i64__2_5_5_5() { 14207; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5: 14208; GFX900: ; %bb.0: 14209; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14210; GFX900-NEXT: ;;#ASMSTART 14211; GFX900-NEXT: ; def s[12:17] 14212; GFX900-NEXT: ;;#ASMEND 14213; GFX900-NEXT: ;;#ASMSTART 14214; GFX900-NEXT: ; def s[8:13] 14215; GFX900-NEXT: ;;#ASMEND 14216; GFX900-NEXT: s_mov_b32 s8, s16 14217; GFX900-NEXT: s_mov_b32 s9, s17 14218; GFX900-NEXT: s_mov_b32 s10, s12 14219; GFX900-NEXT: s_mov_b32 s11, s13 14220; GFX900-NEXT: s_mov_b32 s14, s12 14221; GFX900-NEXT: s_mov_b32 s15, s13 14222; GFX900-NEXT: ;;#ASMSTART 14223; GFX900-NEXT: ; use s[8:15] 14224; GFX900-NEXT: ;;#ASMEND 14225; GFX900-NEXT: s_setpc_b64 s[30:31] 14226; 14227; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5: 14228; GFX90A: ; %bb.0: 14229; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14230; GFX90A-NEXT: ;;#ASMSTART 14231; GFX90A-NEXT: ; def s[12:17] 14232; GFX90A-NEXT: ;;#ASMEND 14233; GFX90A-NEXT: ;;#ASMSTART 14234; GFX90A-NEXT: ; def s[8:13] 14235; GFX90A-NEXT: ;;#ASMEND 14236; GFX90A-NEXT: s_mov_b32 s8, s16 14237; GFX90A-NEXT: s_mov_b32 s9, s17 14238; GFX90A-NEXT: s_mov_b32 s10, s12 14239; GFX90A-NEXT: s_mov_b32 s11, s13 14240; GFX90A-NEXT: s_mov_b32 s14, s12 14241; GFX90A-NEXT: s_mov_b32 s15, s13 14242; GFX90A-NEXT: ;;#ASMSTART 14243; GFX90A-NEXT: ; use s[8:15] 14244; GFX90A-NEXT: ;;#ASMEND 14245; GFX90A-NEXT: s_setpc_b64 s[30:31] 14246; 14247; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5: 14248; GFX940: ; %bb.0: 14249; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14250; GFX940-NEXT: ;;#ASMSTART 14251; GFX940-NEXT: ; def s[8:13] 14252; GFX940-NEXT: ;;#ASMEND 14253; GFX940-NEXT: ;;#ASMSTART 14254; GFX940-NEXT: ; def s[0:5] 14255; GFX940-NEXT: ;;#ASMEND 14256; GFX940-NEXT: s_mov_b32 s8, s4 14257; GFX940-NEXT: s_mov_b32 s9, s5 14258; GFX940-NEXT: s_mov_b32 s10, s12 14259; GFX940-NEXT: s_mov_b32 s11, s13 14260; GFX940-NEXT: s_mov_b32 s14, s12 14261; GFX940-NEXT: s_mov_b32 s15, s13 14262; GFX940-NEXT: ;;#ASMSTART 14263; GFX940-NEXT: ; use s[8:15] 14264; GFX940-NEXT: ;;#ASMEND 14265; GFX940-NEXT: s_setpc_b64 s[30:31] 14266 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14267 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14268 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5> 14269 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14270 ret void 14271} 14272 14273define void @s_shuffle_v4i64_v3i64__3_5_5_5() { 14274; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5: 14275; GFX9: ; %bb.0: 14276; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14277; GFX9-NEXT: ;;#ASMSTART 14278; GFX9-NEXT: ; def s[8:13] 14279; GFX9-NEXT: ;;#ASMEND 14280; GFX9-NEXT: s_mov_b32 s10, s12 14281; GFX9-NEXT: s_mov_b32 s11, s13 14282; GFX9-NEXT: s_mov_b32 s14, s12 14283; GFX9-NEXT: s_mov_b32 s15, s13 14284; GFX9-NEXT: ;;#ASMSTART 14285; GFX9-NEXT: ; use s[8:15] 14286; GFX9-NEXT: ;;#ASMEND 14287; GFX9-NEXT: s_setpc_b64 s[30:31] 14288 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14289 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14290 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5> 14291 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14292 ret void 14293} 14294 14295define void @s_shuffle_v4i64_v3i64__4_5_5_5() { 14296; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5: 14297; GFX9: ; %bb.0: 14298; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14299; GFX9-NEXT: ;;#ASMSTART 14300; GFX9-NEXT: ; def s[8:13] 14301; GFX9-NEXT: ;;#ASMEND 14302; GFX9-NEXT: s_mov_b32 s8, s10 14303; GFX9-NEXT: s_mov_b32 s9, s11 14304; GFX9-NEXT: s_mov_b32 s10, s12 14305; GFX9-NEXT: s_mov_b32 s11, s13 14306; GFX9-NEXT: s_mov_b32 s14, s12 14307; GFX9-NEXT: s_mov_b32 s15, s13 14308; GFX9-NEXT: ;;#ASMSTART 14309; GFX9-NEXT: ; use s[8:15] 14310; GFX9-NEXT: ;;#ASMEND 14311; GFX9-NEXT: s_setpc_b64 s[30:31] 14312 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14313 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14314 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5> 14315 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14316 ret void 14317} 14318 14319define void @s_shuffle_v4i64_v3i64__5_u_5_5() { 14320; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5: 14321; GFX9: ; %bb.0: 14322; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14323; GFX9-NEXT: ;;#ASMSTART 14324; GFX9-NEXT: ; def s[8:13] 14325; GFX9-NEXT: ;;#ASMEND 14326; GFX9-NEXT: s_mov_b32 s8, s12 14327; GFX9-NEXT: s_mov_b32 s9, s13 14328; GFX9-NEXT: s_mov_b32 s14, s12 14329; GFX9-NEXT: s_mov_b32 s15, s13 14330; GFX9-NEXT: ;;#ASMSTART 14331; GFX9-NEXT: ; use s[8:15] 14332; GFX9-NEXT: ;;#ASMEND 14333; GFX9-NEXT: s_setpc_b64 s[30:31] 14334 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14335 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14336 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5> 14337 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14338 ret void 14339} 14340 14341define void @s_shuffle_v4i64_v3i64__5_0_5_5() { 14342; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5: 14343; GFX900: ; %bb.0: 14344; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14345; GFX900-NEXT: ;;#ASMSTART 14346; GFX900-NEXT: ; def s[4:9] 14347; GFX900-NEXT: ;;#ASMEND 14348; GFX900-NEXT: ;;#ASMSTART 14349; GFX900-NEXT: ; def s[8:13] 14350; GFX900-NEXT: ;;#ASMEND 14351; GFX900-NEXT: s_mov_b32 s8, s12 14352; GFX900-NEXT: s_mov_b32 s9, s13 14353; GFX900-NEXT: s_mov_b32 s10, s4 14354; GFX900-NEXT: s_mov_b32 s11, s5 14355; GFX900-NEXT: s_mov_b32 s14, s12 14356; GFX900-NEXT: s_mov_b32 s15, s13 14357; GFX900-NEXT: ;;#ASMSTART 14358; GFX900-NEXT: ; use s[8:15] 14359; GFX900-NEXT: ;;#ASMEND 14360; GFX900-NEXT: s_setpc_b64 s[30:31] 14361; 14362; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5: 14363; GFX90A: ; %bb.0: 14364; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14365; GFX90A-NEXT: ;;#ASMSTART 14366; GFX90A-NEXT: ; def s[4:9] 14367; GFX90A-NEXT: ;;#ASMEND 14368; GFX90A-NEXT: ;;#ASMSTART 14369; GFX90A-NEXT: ; def s[8:13] 14370; GFX90A-NEXT: ;;#ASMEND 14371; GFX90A-NEXT: s_mov_b32 s8, s12 14372; GFX90A-NEXT: s_mov_b32 s9, s13 14373; GFX90A-NEXT: s_mov_b32 s10, s4 14374; GFX90A-NEXT: s_mov_b32 s11, s5 14375; GFX90A-NEXT: s_mov_b32 s14, s12 14376; GFX90A-NEXT: s_mov_b32 s15, s13 14377; GFX90A-NEXT: ;;#ASMSTART 14378; GFX90A-NEXT: ; use s[8:15] 14379; GFX90A-NEXT: ;;#ASMEND 14380; GFX90A-NEXT: s_setpc_b64 s[30:31] 14381; 14382; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5: 14383; GFX940: ; %bb.0: 14384; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14385; GFX940-NEXT: ;;#ASMSTART 14386; GFX940-NEXT: ; def s[8:13] 14387; GFX940-NEXT: ;;#ASMEND 14388; GFX940-NEXT: ;;#ASMSTART 14389; GFX940-NEXT: ; def s[0:5] 14390; GFX940-NEXT: ;;#ASMEND 14391; GFX940-NEXT: s_mov_b32 s8, s12 14392; GFX940-NEXT: s_mov_b32 s9, s13 14393; GFX940-NEXT: s_mov_b32 s10, s0 14394; GFX940-NEXT: s_mov_b32 s11, s1 14395; GFX940-NEXT: s_mov_b32 s14, s12 14396; GFX940-NEXT: s_mov_b32 s15, s13 14397; GFX940-NEXT: ;;#ASMSTART 14398; GFX940-NEXT: ; use s[8:15] 14399; GFX940-NEXT: ;;#ASMEND 14400; GFX940-NEXT: s_setpc_b64 s[30:31] 14401 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14402 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14403 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5> 14404 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14405 ret void 14406} 14407 14408define void @s_shuffle_v4i64_v3i64__5_1_5_5() { 14409; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5: 14410; GFX900: ; %bb.0: 14411; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14412; GFX900-NEXT: ;;#ASMSTART 14413; GFX900-NEXT: ; def s[8:13] 14414; GFX900-NEXT: ;;#ASMEND 14415; GFX900-NEXT: ;;#ASMSTART 14416; GFX900-NEXT: ; def s[12:17] 14417; GFX900-NEXT: ;;#ASMEND 14418; GFX900-NEXT: s_mov_b32 s8, s16 14419; GFX900-NEXT: s_mov_b32 s9, s17 14420; GFX900-NEXT: s_mov_b32 s12, s16 14421; GFX900-NEXT: s_mov_b32 s13, s17 14422; GFX900-NEXT: s_mov_b32 s14, s16 14423; GFX900-NEXT: s_mov_b32 s15, s17 14424; GFX900-NEXT: ;;#ASMSTART 14425; GFX900-NEXT: ; use s[8:15] 14426; GFX900-NEXT: ;;#ASMEND 14427; GFX900-NEXT: s_setpc_b64 s[30:31] 14428; 14429; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5: 14430; GFX90A: ; %bb.0: 14431; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14432; GFX90A-NEXT: ;;#ASMSTART 14433; GFX90A-NEXT: ; def s[8:13] 14434; GFX90A-NEXT: ;;#ASMEND 14435; GFX90A-NEXT: ;;#ASMSTART 14436; GFX90A-NEXT: ; def s[12:17] 14437; GFX90A-NEXT: ;;#ASMEND 14438; GFX90A-NEXT: s_mov_b32 s8, s16 14439; GFX90A-NEXT: s_mov_b32 s9, s17 14440; GFX90A-NEXT: s_mov_b32 s12, s16 14441; GFX90A-NEXT: s_mov_b32 s13, s17 14442; GFX90A-NEXT: s_mov_b32 s14, s16 14443; GFX90A-NEXT: s_mov_b32 s15, s17 14444; GFX90A-NEXT: ;;#ASMSTART 14445; GFX90A-NEXT: ; use s[8:15] 14446; GFX90A-NEXT: ;;#ASMEND 14447; GFX90A-NEXT: s_setpc_b64 s[30:31] 14448; 14449; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5: 14450; GFX940: ; %bb.0: 14451; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14452; GFX940-NEXT: ;;#ASMSTART 14453; GFX940-NEXT: ; def s[8:13] 14454; GFX940-NEXT: ;;#ASMEND 14455; GFX940-NEXT: ;;#ASMSTART 14456; GFX940-NEXT: ; def s[0:5] 14457; GFX940-NEXT: ;;#ASMEND 14458; GFX940-NEXT: s_mov_b32 s8, s4 14459; GFX940-NEXT: s_mov_b32 s9, s5 14460; GFX940-NEXT: s_mov_b32 s12, s4 14461; GFX940-NEXT: s_mov_b32 s13, s5 14462; GFX940-NEXT: s_mov_b32 s14, s4 14463; GFX940-NEXT: s_mov_b32 s15, s5 14464; GFX940-NEXT: ;;#ASMSTART 14465; GFX940-NEXT: ; use s[8:15] 14466; GFX940-NEXT: ;;#ASMEND 14467; GFX940-NEXT: s_setpc_b64 s[30:31] 14468 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14469 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14470 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5> 14471 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14472 ret void 14473} 14474 14475define void @s_shuffle_v4i64_v3i64__5_2_5_5() { 14476; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5: 14477; GFX900: ; %bb.0: 14478; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14479; GFX900-NEXT: ;;#ASMSTART 14480; GFX900-NEXT: ; def s[12:17] 14481; GFX900-NEXT: ;;#ASMEND 14482; GFX900-NEXT: ;;#ASMSTART 14483; GFX900-NEXT: ; def s[8:13] 14484; GFX900-NEXT: ;;#ASMEND 14485; GFX900-NEXT: s_mov_b32 s8, s12 14486; GFX900-NEXT: s_mov_b32 s9, s13 14487; GFX900-NEXT: s_mov_b32 s10, s16 14488; GFX900-NEXT: s_mov_b32 s11, s17 14489; GFX900-NEXT: s_mov_b32 s14, s12 14490; GFX900-NEXT: s_mov_b32 s15, s13 14491; GFX900-NEXT: ;;#ASMSTART 14492; GFX900-NEXT: ; use s[8:15] 14493; GFX900-NEXT: ;;#ASMEND 14494; GFX900-NEXT: s_setpc_b64 s[30:31] 14495; 14496; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5: 14497; GFX90A: ; %bb.0: 14498; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14499; GFX90A-NEXT: ;;#ASMSTART 14500; GFX90A-NEXT: ; def s[12:17] 14501; GFX90A-NEXT: ;;#ASMEND 14502; GFX90A-NEXT: ;;#ASMSTART 14503; GFX90A-NEXT: ; def s[8:13] 14504; GFX90A-NEXT: ;;#ASMEND 14505; GFX90A-NEXT: s_mov_b32 s8, s12 14506; GFX90A-NEXT: s_mov_b32 s9, s13 14507; GFX90A-NEXT: s_mov_b32 s10, s16 14508; GFX90A-NEXT: s_mov_b32 s11, s17 14509; GFX90A-NEXT: s_mov_b32 s14, s12 14510; GFX90A-NEXT: s_mov_b32 s15, s13 14511; GFX90A-NEXT: ;;#ASMSTART 14512; GFX90A-NEXT: ; use s[8:15] 14513; GFX90A-NEXT: ;;#ASMEND 14514; GFX90A-NEXT: s_setpc_b64 s[30:31] 14515; 14516; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5: 14517; GFX940: ; %bb.0: 14518; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14519; GFX940-NEXT: ;;#ASMSTART 14520; GFX940-NEXT: ; def s[8:13] 14521; GFX940-NEXT: ;;#ASMEND 14522; GFX940-NEXT: ;;#ASMSTART 14523; GFX940-NEXT: ; def s[0:5] 14524; GFX940-NEXT: ;;#ASMEND 14525; GFX940-NEXT: s_mov_b32 s8, s12 14526; GFX940-NEXT: s_mov_b32 s9, s13 14527; GFX940-NEXT: s_mov_b32 s10, s4 14528; GFX940-NEXT: s_mov_b32 s11, s5 14529; GFX940-NEXT: s_mov_b32 s14, s12 14530; GFX940-NEXT: s_mov_b32 s15, s13 14531; GFX940-NEXT: ;;#ASMSTART 14532; GFX940-NEXT: ; use s[8:15] 14533; GFX940-NEXT: ;;#ASMEND 14534; GFX940-NEXT: s_setpc_b64 s[30:31] 14535 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14536 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14537 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5> 14538 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14539 ret void 14540} 14541 14542define void @s_shuffle_v4i64_v3i64__5_3_5_5() { 14543; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5: 14544; GFX900: ; %bb.0: 14545; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14546; GFX900-NEXT: ;;#ASMSTART 14547; GFX900-NEXT: ; def s[12:17] 14548; GFX900-NEXT: ;;#ASMEND 14549; GFX900-NEXT: s_mov_b32 s8, s16 14550; GFX900-NEXT: s_mov_b32 s9, s17 14551; GFX900-NEXT: s_mov_b32 s10, s12 14552; GFX900-NEXT: s_mov_b32 s11, s13 14553; GFX900-NEXT: s_mov_b32 s12, s16 14554; GFX900-NEXT: s_mov_b32 s13, s17 14555; GFX900-NEXT: s_mov_b32 s14, s16 14556; GFX900-NEXT: s_mov_b32 s15, s17 14557; GFX900-NEXT: ;;#ASMSTART 14558; GFX900-NEXT: ; use s[8:15] 14559; GFX900-NEXT: ;;#ASMEND 14560; GFX900-NEXT: s_setpc_b64 s[30:31] 14561; 14562; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5: 14563; GFX90A: ; %bb.0: 14564; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14565; GFX90A-NEXT: ;;#ASMSTART 14566; GFX90A-NEXT: ; def s[12:17] 14567; GFX90A-NEXT: ;;#ASMEND 14568; GFX90A-NEXT: s_mov_b32 s8, s16 14569; GFX90A-NEXT: s_mov_b32 s9, s17 14570; GFX90A-NEXT: s_mov_b32 s10, s12 14571; GFX90A-NEXT: s_mov_b32 s11, s13 14572; GFX90A-NEXT: s_mov_b32 s12, s16 14573; GFX90A-NEXT: s_mov_b32 s13, s17 14574; GFX90A-NEXT: s_mov_b32 s14, s16 14575; GFX90A-NEXT: s_mov_b32 s15, s17 14576; GFX90A-NEXT: ;;#ASMSTART 14577; GFX90A-NEXT: ; use s[8:15] 14578; GFX90A-NEXT: ;;#ASMEND 14579; GFX90A-NEXT: s_setpc_b64 s[30:31] 14580; 14581; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5: 14582; GFX940: ; %bb.0: 14583; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14584; GFX940-NEXT: ;;#ASMSTART 14585; GFX940-NEXT: ; def s[0:5] 14586; GFX940-NEXT: ;;#ASMEND 14587; GFX940-NEXT: s_mov_b32 s8, s4 14588; GFX940-NEXT: s_mov_b32 s9, s5 14589; GFX940-NEXT: s_mov_b32 s10, s0 14590; GFX940-NEXT: s_mov_b32 s11, s1 14591; GFX940-NEXT: s_mov_b32 s12, s4 14592; GFX940-NEXT: s_mov_b32 s13, s5 14593; GFX940-NEXT: s_mov_b32 s14, s4 14594; GFX940-NEXT: s_mov_b32 s15, s5 14595; GFX940-NEXT: ;;#ASMSTART 14596; GFX940-NEXT: ; use s[8:15] 14597; GFX940-NEXT: ;;#ASMEND 14598; GFX940-NEXT: s_setpc_b64 s[30:31] 14599 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14600 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14601 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5> 14602 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14603 ret void 14604} 14605 14606define void @s_shuffle_v4i64_v3i64__5_4_5_5() { 14607; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5: 14608; GFX9: ; %bb.0: 14609; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14610; GFX9-NEXT: ;;#ASMSTART 14611; GFX9-NEXT: ; def s[8:13] 14612; GFX9-NEXT: ;;#ASMEND 14613; GFX9-NEXT: s_mov_b32 s8, s12 14614; GFX9-NEXT: s_mov_b32 s9, s13 14615; GFX9-NEXT: s_mov_b32 s14, s12 14616; GFX9-NEXT: s_mov_b32 s15, s13 14617; GFX9-NEXT: ;;#ASMSTART 14618; GFX9-NEXT: ; use s[8:15] 14619; GFX9-NEXT: ;;#ASMEND 14620; GFX9-NEXT: s_setpc_b64 s[30:31] 14621 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14622 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14623 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5> 14624 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14625 ret void 14626} 14627 14628define void @s_shuffle_v4i64_v3i64__5_5_u_5() { 14629; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5: 14630; GFX900: ; %bb.0: 14631; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14632; GFX900-NEXT: ;;#ASMSTART 14633; GFX900-NEXT: ; def s[8:13] 14634; GFX900-NEXT: ;;#ASMEND 14635; GFX900-NEXT: s_mov_b32 s8, s12 14636; GFX900-NEXT: s_mov_b32 s9, s13 14637; GFX900-NEXT: s_mov_b32 s10, s12 14638; GFX900-NEXT: s_mov_b32 s11, s13 14639; GFX900-NEXT: s_mov_b32 s14, s12 14640; GFX900-NEXT: s_mov_b32 s15, s13 14641; GFX900-NEXT: ;;#ASMSTART 14642; GFX900-NEXT: ; use s[8:15] 14643; GFX900-NEXT: ;;#ASMEND 14644; GFX900-NEXT: s_setpc_b64 s[30:31] 14645; 14646; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5: 14647; GFX90A: ; %bb.0: 14648; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14649; GFX90A-NEXT: ;;#ASMSTART 14650; GFX90A-NEXT: ; def s[8:13] 14651; GFX90A-NEXT: ;;#ASMEND 14652; GFX90A-NEXT: s_mov_b32 s8, s12 14653; GFX90A-NEXT: s_mov_b32 s9, s13 14654; GFX90A-NEXT: s_mov_b32 s10, s12 14655; GFX90A-NEXT: s_mov_b32 s11, s13 14656; GFX90A-NEXT: s_mov_b32 s14, s12 14657; GFX90A-NEXT: s_mov_b32 s15, s13 14658; GFX90A-NEXT: ;;#ASMSTART 14659; GFX90A-NEXT: ; use s[8:15] 14660; GFX90A-NEXT: ;;#ASMEND 14661; GFX90A-NEXT: s_setpc_b64 s[30:31] 14662; 14663; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5: 14664; GFX940: ; %bb.0: 14665; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14666; GFX940-NEXT: ;;#ASMSTART 14667; GFX940-NEXT: ; def s[0:5] 14668; GFX940-NEXT: ;;#ASMEND 14669; GFX940-NEXT: s_mov_b32 s8, s4 14670; GFX940-NEXT: s_mov_b32 s9, s5 14671; GFX940-NEXT: s_mov_b32 s10, s4 14672; GFX940-NEXT: s_mov_b32 s11, s5 14673; GFX940-NEXT: s_mov_b32 s14, s4 14674; GFX940-NEXT: s_mov_b32 s15, s5 14675; GFX940-NEXT: ;;#ASMSTART 14676; GFX940-NEXT: ; use s[8:15] 14677; GFX940-NEXT: ;;#ASMEND 14678; GFX940-NEXT: s_setpc_b64 s[30:31] 14679 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14680 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14681 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5> 14682 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14683 ret void 14684} 14685 14686define void @s_shuffle_v4i64_v3i64__5_5_0_5() { 14687; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5: 14688; GFX900: ; %bb.0: 14689; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14690; GFX900-NEXT: ;;#ASMSTART 14691; GFX900-NEXT: ; def s[4:9] 14692; GFX900-NEXT: ;;#ASMEND 14693; GFX900-NEXT: ;;#ASMSTART 14694; GFX900-NEXT: ; def s[12:17] 14695; GFX900-NEXT: ;;#ASMEND 14696; GFX900-NEXT: s_mov_b32 s8, s16 14697; GFX900-NEXT: s_mov_b32 s9, s17 14698; GFX900-NEXT: s_mov_b32 s10, s16 14699; GFX900-NEXT: s_mov_b32 s11, s17 14700; GFX900-NEXT: s_mov_b32 s12, s4 14701; GFX900-NEXT: s_mov_b32 s13, s5 14702; GFX900-NEXT: s_mov_b32 s14, s16 14703; GFX900-NEXT: s_mov_b32 s15, s17 14704; GFX900-NEXT: ;;#ASMSTART 14705; GFX900-NEXT: ; use s[8:15] 14706; GFX900-NEXT: ;;#ASMEND 14707; GFX900-NEXT: s_setpc_b64 s[30:31] 14708; 14709; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5: 14710; GFX90A: ; %bb.0: 14711; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14712; GFX90A-NEXT: ;;#ASMSTART 14713; GFX90A-NEXT: ; def s[4:9] 14714; GFX90A-NEXT: ;;#ASMEND 14715; GFX90A-NEXT: ;;#ASMSTART 14716; GFX90A-NEXT: ; def s[12:17] 14717; GFX90A-NEXT: ;;#ASMEND 14718; GFX90A-NEXT: s_mov_b32 s8, s16 14719; GFX90A-NEXT: s_mov_b32 s9, s17 14720; GFX90A-NEXT: s_mov_b32 s10, s16 14721; GFX90A-NEXT: s_mov_b32 s11, s17 14722; GFX90A-NEXT: s_mov_b32 s12, s4 14723; GFX90A-NEXT: s_mov_b32 s13, s5 14724; GFX90A-NEXT: s_mov_b32 s14, s16 14725; GFX90A-NEXT: s_mov_b32 s15, s17 14726; GFX90A-NEXT: ;;#ASMSTART 14727; GFX90A-NEXT: ; use s[8:15] 14728; GFX90A-NEXT: ;;#ASMEND 14729; GFX90A-NEXT: s_setpc_b64 s[30:31] 14730; 14731; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5: 14732; GFX940: ; %bb.0: 14733; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14734; GFX940-NEXT: ;;#ASMSTART 14735; GFX940-NEXT: ; def s[12:17] 14736; GFX940-NEXT: ;;#ASMEND 14737; GFX940-NEXT: ;;#ASMSTART 14738; GFX940-NEXT: ; def s[0:5] 14739; GFX940-NEXT: ;;#ASMEND 14740; GFX940-NEXT: s_mov_b32 s8, s16 14741; GFX940-NEXT: s_mov_b32 s9, s17 14742; GFX940-NEXT: s_mov_b32 s10, s16 14743; GFX940-NEXT: s_mov_b32 s11, s17 14744; GFX940-NEXT: s_mov_b32 s12, s0 14745; GFX940-NEXT: s_mov_b32 s13, s1 14746; GFX940-NEXT: s_mov_b32 s14, s16 14747; GFX940-NEXT: s_mov_b32 s15, s17 14748; GFX940-NEXT: ;;#ASMSTART 14749; GFX940-NEXT: ; use s[8:15] 14750; GFX940-NEXT: ;;#ASMEND 14751; GFX940-NEXT: s_setpc_b64 s[30:31] 14752 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14753 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14754 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5> 14755 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14756 ret void 14757} 14758 14759define void @s_shuffle_v4i64_v3i64__5_5_1_5() { 14760; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5: 14761; GFX900: ; %bb.0: 14762; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14763; GFX900-NEXT: ;;#ASMSTART 14764; GFX900-NEXT: ; def s[4:9] 14765; GFX900-NEXT: ;;#ASMEND 14766; GFX900-NEXT: ;;#ASMSTART 14767; GFX900-NEXT: ; def s[12:17] 14768; GFX900-NEXT: ;;#ASMEND 14769; GFX900-NEXT: s_mov_b32 s8, s16 14770; GFX900-NEXT: s_mov_b32 s9, s17 14771; GFX900-NEXT: s_mov_b32 s10, s16 14772; GFX900-NEXT: s_mov_b32 s11, s17 14773; GFX900-NEXT: s_mov_b32 s12, s6 14774; GFX900-NEXT: s_mov_b32 s13, s7 14775; GFX900-NEXT: s_mov_b32 s14, s16 14776; GFX900-NEXT: s_mov_b32 s15, s17 14777; GFX900-NEXT: ;;#ASMSTART 14778; GFX900-NEXT: ; use s[8:15] 14779; GFX900-NEXT: ;;#ASMEND 14780; GFX900-NEXT: s_setpc_b64 s[30:31] 14781; 14782; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5: 14783; GFX90A: ; %bb.0: 14784; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14785; GFX90A-NEXT: ;;#ASMSTART 14786; GFX90A-NEXT: ; def s[4:9] 14787; GFX90A-NEXT: ;;#ASMEND 14788; GFX90A-NEXT: ;;#ASMSTART 14789; GFX90A-NEXT: ; def s[12:17] 14790; GFX90A-NEXT: ;;#ASMEND 14791; GFX90A-NEXT: s_mov_b32 s8, s16 14792; GFX90A-NEXT: s_mov_b32 s9, s17 14793; GFX90A-NEXT: s_mov_b32 s10, s16 14794; GFX90A-NEXT: s_mov_b32 s11, s17 14795; GFX90A-NEXT: s_mov_b32 s12, s6 14796; GFX90A-NEXT: s_mov_b32 s13, s7 14797; GFX90A-NEXT: s_mov_b32 s14, s16 14798; GFX90A-NEXT: s_mov_b32 s15, s17 14799; GFX90A-NEXT: ;;#ASMSTART 14800; GFX90A-NEXT: ; use s[8:15] 14801; GFX90A-NEXT: ;;#ASMEND 14802; GFX90A-NEXT: s_setpc_b64 s[30:31] 14803; 14804; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5: 14805; GFX940: ; %bb.0: 14806; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14807; GFX940-NEXT: ;;#ASMSTART 14808; GFX940-NEXT: ; def s[12:17] 14809; GFX940-NEXT: ;;#ASMEND 14810; GFX940-NEXT: ;;#ASMSTART 14811; GFX940-NEXT: ; def s[0:5] 14812; GFX940-NEXT: ;;#ASMEND 14813; GFX940-NEXT: s_mov_b32 s8, s16 14814; GFX940-NEXT: s_mov_b32 s9, s17 14815; GFX940-NEXT: s_mov_b32 s10, s16 14816; GFX940-NEXT: s_mov_b32 s11, s17 14817; GFX940-NEXT: s_mov_b32 s12, s2 14818; GFX940-NEXT: s_mov_b32 s13, s3 14819; GFX940-NEXT: s_mov_b32 s14, s16 14820; GFX940-NEXT: s_mov_b32 s15, s17 14821; GFX940-NEXT: ;;#ASMSTART 14822; GFX940-NEXT: ; use s[8:15] 14823; GFX940-NEXT: ;;#ASMEND 14824; GFX940-NEXT: s_setpc_b64 s[30:31] 14825 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14826 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14827 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5> 14828 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14829 ret void 14830} 14831 14832define void @s_shuffle_v4i64_v3i64__5_5_2_5() { 14833; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5: 14834; GFX900: ; %bb.0: 14835; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14836; GFX900-NEXT: ;;#ASMSTART 14837; GFX900-NEXT: ; def s[8:13] 14838; GFX900-NEXT: ;;#ASMEND 14839; GFX900-NEXT: ;;#ASMSTART 14840; GFX900-NEXT: ; def s[16:21] 14841; GFX900-NEXT: ;;#ASMEND 14842; GFX900-NEXT: s_mov_b32 s8, s20 14843; GFX900-NEXT: s_mov_b32 s9, s21 14844; GFX900-NEXT: s_mov_b32 s10, s20 14845; GFX900-NEXT: s_mov_b32 s11, s21 14846; GFX900-NEXT: s_mov_b32 s14, s20 14847; GFX900-NEXT: s_mov_b32 s15, s21 14848; GFX900-NEXT: ;;#ASMSTART 14849; GFX900-NEXT: ; use s[8:15] 14850; GFX900-NEXT: ;;#ASMEND 14851; GFX900-NEXT: s_setpc_b64 s[30:31] 14852; 14853; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5: 14854; GFX90A: ; %bb.0: 14855; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14856; GFX90A-NEXT: ;;#ASMSTART 14857; GFX90A-NEXT: ; def s[8:13] 14858; GFX90A-NEXT: ;;#ASMEND 14859; GFX90A-NEXT: ;;#ASMSTART 14860; GFX90A-NEXT: ; def s[16:21] 14861; GFX90A-NEXT: ;;#ASMEND 14862; GFX90A-NEXT: s_mov_b32 s8, s20 14863; GFX90A-NEXT: s_mov_b32 s9, s21 14864; GFX90A-NEXT: s_mov_b32 s10, s20 14865; GFX90A-NEXT: s_mov_b32 s11, s21 14866; GFX90A-NEXT: s_mov_b32 s14, s20 14867; GFX90A-NEXT: s_mov_b32 s15, s21 14868; GFX90A-NEXT: ;;#ASMSTART 14869; GFX90A-NEXT: ; use s[8:15] 14870; GFX90A-NEXT: ;;#ASMEND 14871; GFX90A-NEXT: s_setpc_b64 s[30:31] 14872; 14873; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5: 14874; GFX940: ; %bb.0: 14875; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14876; GFX940-NEXT: ;;#ASMSTART 14877; GFX940-NEXT: ; def s[8:13] 14878; GFX940-NEXT: ;;#ASMEND 14879; GFX940-NEXT: ;;#ASMSTART 14880; GFX940-NEXT: ; def s[0:5] 14881; GFX940-NEXT: ;;#ASMEND 14882; GFX940-NEXT: s_mov_b32 s8, s4 14883; GFX940-NEXT: s_mov_b32 s9, s5 14884; GFX940-NEXT: s_mov_b32 s10, s4 14885; GFX940-NEXT: s_mov_b32 s11, s5 14886; GFX940-NEXT: s_mov_b32 s14, s4 14887; GFX940-NEXT: s_mov_b32 s15, s5 14888; GFX940-NEXT: ;;#ASMSTART 14889; GFX940-NEXT: ; use s[8:15] 14890; GFX940-NEXT: ;;#ASMEND 14891; GFX940-NEXT: s_setpc_b64 s[30:31] 14892 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14893 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14894 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5> 14895 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14896 ret void 14897} 14898 14899define void @s_shuffle_v4i64_v3i64__5_5_3_5() { 14900; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5: 14901; GFX900: ; %bb.0: 14902; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14903; GFX900-NEXT: ;;#ASMSTART 14904; GFX900-NEXT: ; def s[12:17] 14905; GFX900-NEXT: ;;#ASMEND 14906; GFX900-NEXT: s_mov_b32 s8, s16 14907; GFX900-NEXT: s_mov_b32 s9, s17 14908; GFX900-NEXT: s_mov_b32 s10, s16 14909; GFX900-NEXT: s_mov_b32 s11, s17 14910; GFX900-NEXT: s_mov_b32 s14, s16 14911; GFX900-NEXT: s_mov_b32 s15, s17 14912; GFX900-NEXT: ;;#ASMSTART 14913; GFX900-NEXT: ; use s[8:15] 14914; GFX900-NEXT: ;;#ASMEND 14915; GFX900-NEXT: s_setpc_b64 s[30:31] 14916; 14917; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5: 14918; GFX90A: ; %bb.0: 14919; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14920; GFX90A-NEXT: ;;#ASMSTART 14921; GFX90A-NEXT: ; def s[12:17] 14922; GFX90A-NEXT: ;;#ASMEND 14923; GFX90A-NEXT: s_mov_b32 s8, s16 14924; GFX90A-NEXT: s_mov_b32 s9, s17 14925; GFX90A-NEXT: s_mov_b32 s10, s16 14926; GFX90A-NEXT: s_mov_b32 s11, s17 14927; GFX90A-NEXT: s_mov_b32 s14, s16 14928; GFX90A-NEXT: s_mov_b32 s15, s17 14929; GFX90A-NEXT: ;;#ASMSTART 14930; GFX90A-NEXT: ; use s[8:15] 14931; GFX90A-NEXT: ;;#ASMEND 14932; GFX90A-NEXT: s_setpc_b64 s[30:31] 14933; 14934; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5: 14935; GFX940: ; %bb.0: 14936; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14937; GFX940-NEXT: ;;#ASMSTART 14938; GFX940-NEXT: ; def s[0:5] 14939; GFX940-NEXT: ;;#ASMEND 14940; GFX940-NEXT: s_mov_b32 s8, s4 14941; GFX940-NEXT: s_mov_b32 s9, s5 14942; GFX940-NEXT: s_mov_b32 s10, s4 14943; GFX940-NEXT: s_mov_b32 s11, s5 14944; GFX940-NEXT: s_mov_b32 s12, s0 14945; GFX940-NEXT: s_mov_b32 s13, s1 14946; GFX940-NEXT: s_mov_b32 s14, s4 14947; GFX940-NEXT: s_mov_b32 s15, s5 14948; GFX940-NEXT: ;;#ASMSTART 14949; GFX940-NEXT: ; use s[8:15] 14950; GFX940-NEXT: ;;#ASMEND 14951; GFX940-NEXT: s_setpc_b64 s[30:31] 14952 %vec0 = call <3 x i64> asm "; def $0", "=s"() 14953 %vec1 = call <3 x i64> asm "; def $0", "=s"() 14954 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5> 14955 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 14956 ret void 14957} 14958 14959define void @s_shuffle_v4i64_v3i64__5_5_4_5() { 14960; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5: 14961; GFX900: ; %bb.0: 14962; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14963; GFX900-NEXT: ;;#ASMSTART 14964; GFX900-NEXT: ; def s[12:17] 14965; GFX900-NEXT: ;;#ASMEND 14966; GFX900-NEXT: s_mov_b32 s8, s16 14967; GFX900-NEXT: s_mov_b32 s9, s17 14968; GFX900-NEXT: s_mov_b32 s10, s16 14969; GFX900-NEXT: s_mov_b32 s11, s17 14970; GFX900-NEXT: s_mov_b32 s12, s14 14971; GFX900-NEXT: s_mov_b32 s13, s15 14972; GFX900-NEXT: s_mov_b32 s14, s16 14973; GFX900-NEXT: s_mov_b32 s15, s17 14974; GFX900-NEXT: ;;#ASMSTART 14975; GFX900-NEXT: ; use s[8:15] 14976; GFX900-NEXT: ;;#ASMEND 14977; GFX900-NEXT: s_setpc_b64 s[30:31] 14978; 14979; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5: 14980; GFX90A: ; %bb.0: 14981; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14982; GFX90A-NEXT: ;;#ASMSTART 14983; GFX90A-NEXT: ; def s[12:17] 14984; GFX90A-NEXT: ;;#ASMEND 14985; GFX90A-NEXT: s_mov_b32 s8, s16 14986; GFX90A-NEXT: s_mov_b32 s9, s17 14987; GFX90A-NEXT: s_mov_b32 s10, s16 14988; GFX90A-NEXT: s_mov_b32 s11, s17 14989; GFX90A-NEXT: s_mov_b32 s12, s14 14990; GFX90A-NEXT: s_mov_b32 s13, s15 14991; GFX90A-NEXT: s_mov_b32 s14, s16 14992; GFX90A-NEXT: s_mov_b32 s15, s17 14993; GFX90A-NEXT: ;;#ASMSTART 14994; GFX90A-NEXT: ; use s[8:15] 14995; GFX90A-NEXT: ;;#ASMEND 14996; GFX90A-NEXT: s_setpc_b64 s[30:31] 14997; 14998; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5: 14999; GFX940: ; %bb.0: 15000; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15001; GFX940-NEXT: ;;#ASMSTART 15002; GFX940-NEXT: ; def s[0:5] 15003; GFX940-NEXT: ;;#ASMEND 15004; GFX940-NEXT: s_mov_b32 s8, s4 15005; GFX940-NEXT: s_mov_b32 s9, s5 15006; GFX940-NEXT: s_mov_b32 s10, s4 15007; GFX940-NEXT: s_mov_b32 s11, s5 15008; GFX940-NEXT: s_mov_b32 s12, s2 15009; GFX940-NEXT: s_mov_b32 s13, s3 15010; GFX940-NEXT: s_mov_b32 s14, s4 15011; GFX940-NEXT: s_mov_b32 s15, s5 15012; GFX940-NEXT: ;;#ASMSTART 15013; GFX940-NEXT: ; use s[8:15] 15014; GFX940-NEXT: ;;#ASMEND 15015; GFX940-NEXT: s_setpc_b64 s[30:31] 15016 %vec0 = call <3 x i64> asm "; def $0", "=s"() 15017 %vec1 = call <3 x i64> asm "; def $0", "=s"() 15018 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5> 15019 call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) 15020 ret void 15021} 15022;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 15023; GFX90APLUS: {{.*}} 15024