1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v3i64_v3i64__u_u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v3i64_v3i64__u_u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <3 x i64> asm "; def $0", "=v"() 13 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> poison 14 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 15 ret void 16} 17 18define void @v_shuffle_v3i64_v3i64__0_u_u(ptr addrspace(1) inreg %ptr) { 19; GFX900-LABEL: v_shuffle_v3i64_v3i64__0_u_u: 20; GFX900: ; %bb.0: 21; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX900-NEXT: v_mov_b32_e32 v6, 0 23; GFX900-NEXT: ;;#ASMSTART 24; GFX900-NEXT: ; def v[0:5] 25; GFX900-NEXT: ;;#ASMEND 26; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 27; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 28; GFX900-NEXT: s_waitcnt vmcnt(0) 29; GFX900-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX90A-LABEL: v_shuffle_v3i64_v3i64__0_u_u: 32; GFX90A: ; %bb.0: 33; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX90A-NEXT: v_mov_b32_e32 v6, 0 35; GFX90A-NEXT: ;;#ASMSTART 36; GFX90A-NEXT: ; def v[0:5] 37; GFX90A-NEXT: ;;#ASMEND 38; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 39; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 40; GFX90A-NEXT: s_waitcnt vmcnt(0) 41; GFX90A-NEXT: s_setpc_b64 s[30:31] 42; 43; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_u_u: 44; GFX940: ; %bb.0: 45; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX940-NEXT: v_mov_b32_e32 v6, 0 47; GFX940-NEXT: ;;#ASMSTART 48; GFX940-NEXT: ; def v[0:5] 49; GFX940-NEXT: ;;#ASMEND 50; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 51; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 52; GFX940-NEXT: s_waitcnt vmcnt(0) 53; GFX940-NEXT: s_setpc_b64 s[30:31] 54 %vec0 = call <3 x i64> asm "; def $0", "=v"() 55 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 poison, i32 poison> 56 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 57 ret void 58} 59 60define void @v_shuffle_v3i64_v3i64__1_u_u(ptr addrspace(1) inreg %ptr) { 61; GFX900-LABEL: v_shuffle_v3i64_v3i64__1_u_u: 62; GFX900: ; %bb.0: 63; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GFX900-NEXT: v_mov_b32_e32 v6, 0 65; GFX900-NEXT: ;;#ASMSTART 66; GFX900-NEXT: ; def v[0:5] 67; GFX900-NEXT: ;;#ASMEND 68; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 69; GFX900-NEXT: s_waitcnt vmcnt(0) 70; GFX900-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX90A-LABEL: v_shuffle_v3i64_v3i64__1_u_u: 73; GFX90A: ; %bb.0: 74; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX90A-NEXT: v_mov_b32_e32 v6, 0 76; GFX90A-NEXT: ;;#ASMSTART 77; GFX90A-NEXT: ; def v[0:5] 78; GFX90A-NEXT: ;;#ASMEND 79; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 80; GFX90A-NEXT: s_waitcnt vmcnt(0) 81; GFX90A-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_u_u: 84; GFX940: ; %bb.0: 85; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX940-NEXT: v_mov_b32_e32 v6, 0 87; GFX940-NEXT: ;;#ASMSTART 88; GFX940-NEXT: ; def v[0:5] 89; GFX940-NEXT: ;;#ASMEND 90; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 91; GFX940-NEXT: s_waitcnt vmcnt(0) 92; GFX940-NEXT: s_setpc_b64 s[30:31] 93 %vec0 = call <3 x i64> asm "; def $0", "=v"() 94 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 poison, i32 poison> 95 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 96 ret void 97} 98 99define void @v_shuffle_v3i64_v3i64__2_u_u(ptr addrspace(1) inreg %ptr) { 100; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_u_u: 101; GFX900: ; %bb.0: 102; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX900-NEXT: ;;#ASMSTART 104; GFX900-NEXT: ; def v[0:5] 105; GFX900-NEXT: ;;#ASMEND 106; GFX900-NEXT: v_mov_b32_e32 v6, 0 107; GFX900-NEXT: v_mov_b32_e32 v0, v4 108; GFX900-NEXT: v_mov_b32_e32 v1, v5 109; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 110; GFX900-NEXT: s_waitcnt vmcnt(0) 111; GFX900-NEXT: s_setpc_b64 s[30:31] 112; 113; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_u_u: 114; GFX90A: ; %bb.0: 115; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; GFX90A-NEXT: ;;#ASMSTART 117; GFX90A-NEXT: ; def v[0:5] 118; GFX90A-NEXT: ;;#ASMEND 119; GFX90A-NEXT: v_mov_b32_e32 v6, 0 120; GFX90A-NEXT: v_mov_b32_e32 v0, v4 121; GFX90A-NEXT: v_mov_b32_e32 v1, v5 122; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 123; GFX90A-NEXT: s_waitcnt vmcnt(0) 124; GFX90A-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_u_u: 127; GFX940: ; %bb.0: 128; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX940-NEXT: ;;#ASMSTART 130; GFX940-NEXT: ; def v[0:5] 131; GFX940-NEXT: ;;#ASMEND 132; GFX940-NEXT: v_mov_b32_e32 v6, 0 133; GFX940-NEXT: v_mov_b32_e32 v0, v4 134; GFX940-NEXT: v_mov_b32_e32 v1, v5 135; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 136; GFX940-NEXT: s_waitcnt vmcnt(0) 137; GFX940-NEXT: s_setpc_b64 s[30:31] 138 %vec0 = call <3 x i64> asm "; def $0", "=v"() 139 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 poison, i32 poison> 140 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 141 ret void 142} 143 144define void @v_shuffle_v3i64_v3i64__3_u_u(ptr addrspace(1) inreg %ptr) { 145; GFX9-LABEL: v_shuffle_v3i64_v3i64__3_u_u: 146; GFX9: ; %bb.0: 147; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148; GFX9-NEXT: s_setpc_b64 s[30:31] 149 %vec0 = call <3 x i64> asm "; def $0", "=v"() 150 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 poison, i32 poison> 151 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 152 ret void 153} 154 155define void @v_shuffle_v3i64_v3i64__4_u_u(ptr addrspace(1) inreg %ptr) { 156; GFX900-LABEL: v_shuffle_v3i64_v3i64__4_u_u: 157; GFX900: ; %bb.0: 158; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 159; GFX900-NEXT: v_mov_b32_e32 v6, 0 160; GFX900-NEXT: ;;#ASMSTART 161; GFX900-NEXT: ; def v[0:5] 162; GFX900-NEXT: ;;#ASMEND 163; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 164; GFX900-NEXT: s_waitcnt vmcnt(0) 165; GFX900-NEXT: s_setpc_b64 s[30:31] 166; 167; GFX90A-LABEL: v_shuffle_v3i64_v3i64__4_u_u: 168; GFX90A: ; %bb.0: 169; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX90A-NEXT: v_mov_b32_e32 v6, 0 171; GFX90A-NEXT: ;;#ASMSTART 172; GFX90A-NEXT: ; def v[0:5] 173; GFX90A-NEXT: ;;#ASMEND 174; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 175; GFX90A-NEXT: s_waitcnt vmcnt(0) 176; GFX90A-NEXT: s_setpc_b64 s[30:31] 177; 178; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_u_u: 179; GFX940: ; %bb.0: 180; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 181; GFX940-NEXT: v_mov_b32_e32 v6, 0 182; GFX940-NEXT: ;;#ASMSTART 183; GFX940-NEXT: ; def v[0:5] 184; GFX940-NEXT: ;;#ASMEND 185; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 186; GFX940-NEXT: s_waitcnt vmcnt(0) 187; GFX940-NEXT: s_setpc_b64 s[30:31] 188 %vec0 = call <3 x i64> asm "; def $0", "=v"() 189 %vec1 = call <3 x i64> asm "; def $0", "=v"() 190 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 poison, i32 poison> 191 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 192 ret void 193} 194 195define void @v_shuffle_v3i64_v3i64__5_u_u(ptr addrspace(1) inreg %ptr) { 196; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_u: 197; GFX900: ; %bb.0: 198; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 199; GFX900-NEXT: ;;#ASMSTART 200; GFX900-NEXT: ; def v[0:5] 201; GFX900-NEXT: ;;#ASMEND 202; GFX900-NEXT: v_mov_b32_e32 v6, 0 203; GFX900-NEXT: v_mov_b32_e32 v0, v4 204; GFX900-NEXT: v_mov_b32_e32 v1, v5 205; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 206; GFX900-NEXT: s_waitcnt vmcnt(0) 207; GFX900-NEXT: s_setpc_b64 s[30:31] 208; 209; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_u: 210; GFX90A: ; %bb.0: 211; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; GFX90A-NEXT: ;;#ASMSTART 213; GFX90A-NEXT: ; def v[0:5] 214; GFX90A-NEXT: ;;#ASMEND 215; GFX90A-NEXT: v_mov_b32_e32 v6, 0 216; GFX90A-NEXT: v_mov_b32_e32 v0, v4 217; GFX90A-NEXT: v_mov_b32_e32 v1, v5 218; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 219; GFX90A-NEXT: s_waitcnt vmcnt(0) 220; GFX90A-NEXT: s_setpc_b64 s[30:31] 221; 222; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_u: 223; GFX940: ; %bb.0: 224; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; GFX940-NEXT: ;;#ASMSTART 226; GFX940-NEXT: ; def v[0:5] 227; GFX940-NEXT: ;;#ASMEND 228; GFX940-NEXT: v_mov_b32_e32 v6, 0 229; GFX940-NEXT: v_mov_b32_e32 v0, v4 230; GFX940-NEXT: v_mov_b32_e32 v1, v5 231; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 232; GFX940-NEXT: s_waitcnt vmcnt(0) 233; GFX940-NEXT: s_setpc_b64 s[30:31] 234 %vec0 = call <3 x i64> asm "; def $0", "=v"() 235 %vec1 = call <3 x i64> asm "; def $0", "=v"() 236 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison> 237 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 238 ret void 239} 240 241define void @v_shuffle_v3i64_v3i64__5_0_u(ptr addrspace(1) inreg %ptr) { 242; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_u: 243; GFX900: ; %bb.0: 244; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX900-NEXT: ;;#ASMSTART 246; GFX900-NEXT: ; def v[0:5] 247; GFX900-NEXT: ;;#ASMEND 248; GFX900-NEXT: ;;#ASMSTART 249; GFX900-NEXT: ; def v[2:7] 250; GFX900-NEXT: ;;#ASMEND 251; GFX900-NEXT: v_mov_b32_e32 v8, 0 252; GFX900-NEXT: v_mov_b32_e32 v2, v6 253; GFX900-NEXT: v_mov_b32_e32 v3, v7 254; GFX900-NEXT: v_mov_b32_e32 v4, v0 255; GFX900-NEXT: v_mov_b32_e32 v5, v1 256; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 257; GFX900-NEXT: s_waitcnt vmcnt(0) 258; GFX900-NEXT: s_setpc_b64 s[30:31] 259; 260; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_u: 261; GFX90A: ; %bb.0: 262; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GFX90A-NEXT: ;;#ASMSTART 264; GFX90A-NEXT: ; def v[0:5] 265; GFX90A-NEXT: ;;#ASMEND 266; GFX90A-NEXT: ;;#ASMSTART 267; GFX90A-NEXT: ; def v[2:7] 268; GFX90A-NEXT: ;;#ASMEND 269; GFX90A-NEXT: v_mov_b32_e32 v8, 0 270; GFX90A-NEXT: v_mov_b32_e32 v2, v6 271; GFX90A-NEXT: v_mov_b32_e32 v3, v7 272; GFX90A-NEXT: v_mov_b32_e32 v4, v0 273; GFX90A-NEXT: v_mov_b32_e32 v5, v1 274; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 275; GFX90A-NEXT: s_waitcnt vmcnt(0) 276; GFX90A-NEXT: s_setpc_b64 s[30:31] 277; 278; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_u: 279; GFX940: ; %bb.0: 280; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX940-NEXT: ;;#ASMSTART 282; GFX940-NEXT: ; def v[0:5] 283; GFX940-NEXT: ;;#ASMEND 284; GFX940-NEXT: v_mov_b32_e32 v8, 0 285; GFX940-NEXT: ;;#ASMSTART 286; GFX940-NEXT: ; def v[2:7] 287; GFX940-NEXT: ;;#ASMEND 288; GFX940-NEXT: s_nop 0 289; GFX940-NEXT: v_mov_b32_e32 v2, v6 290; GFX940-NEXT: v_mov_b32_e32 v3, v7 291; GFX940-NEXT: v_mov_b32_e32 v4, v0 292; GFX940-NEXT: v_mov_b32_e32 v5, v1 293; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 294; GFX940-NEXT: s_waitcnt vmcnt(0) 295; GFX940-NEXT: s_setpc_b64 s[30:31] 296 %vec0 = call <3 x i64> asm "; def $0", "=v"() 297 %vec1 = call <3 x i64> asm "; def $0", "=v"() 298 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 poison> 299 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 300 ret void 301} 302 303define void @v_shuffle_v3i64_v3i64__5_1_u(ptr addrspace(1) inreg %ptr) { 304; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_1_u: 305; GFX900: ; %bb.0: 306; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 307; GFX900-NEXT: ;;#ASMSTART 308; GFX900-NEXT: ; def v[0:5] 309; GFX900-NEXT: ;;#ASMEND 310; GFX900-NEXT: v_mov_b32_e32 v10, 0 311; GFX900-NEXT: ;;#ASMSTART 312; GFX900-NEXT: ; def v[4:9] 313; GFX900-NEXT: ;;#ASMEND 314; GFX900-NEXT: v_mov_b32_e32 v0, v8 315; GFX900-NEXT: v_mov_b32_e32 v1, v9 316; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 317; GFX900-NEXT: s_waitcnt vmcnt(0) 318; GFX900-NEXT: s_setpc_b64 s[30:31] 319; 320; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_1_u: 321; GFX90A: ; %bb.0: 322; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 323; GFX90A-NEXT: ;;#ASMSTART 324; GFX90A-NEXT: ; def v[0:5] 325; GFX90A-NEXT: ;;#ASMEND 326; GFX90A-NEXT: v_mov_b32_e32 v10, 0 327; GFX90A-NEXT: ;;#ASMSTART 328; GFX90A-NEXT: ; def v[4:9] 329; GFX90A-NEXT: ;;#ASMEND 330; GFX90A-NEXT: v_mov_b32_e32 v0, v8 331; GFX90A-NEXT: v_mov_b32_e32 v1, v9 332; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 333; GFX90A-NEXT: s_waitcnt vmcnt(0) 334; GFX90A-NEXT: s_setpc_b64 s[30:31] 335; 336; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_u: 337; GFX940: ; %bb.0: 338; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GFX940-NEXT: ;;#ASMSTART 340; GFX940-NEXT: ; def v[0:5] 341; GFX940-NEXT: ;;#ASMEND 342; GFX940-NEXT: v_mov_b32_e32 v10, 0 343; GFX940-NEXT: ;;#ASMSTART 344; GFX940-NEXT: ; def v[4:9] 345; GFX940-NEXT: ;;#ASMEND 346; GFX940-NEXT: s_nop 0 347; GFX940-NEXT: v_mov_b32_e32 v0, v8 348; GFX940-NEXT: v_mov_b32_e32 v1, v9 349; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 350; GFX940-NEXT: s_waitcnt vmcnt(0) 351; GFX940-NEXT: s_setpc_b64 s[30:31] 352 %vec0 = call <3 x i64> asm "; def $0", "=v"() 353 %vec1 = call <3 x i64> asm "; def $0", "=v"() 354 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 poison> 355 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 356 ret void 357} 358 359define void @v_shuffle_v3i64_v3i64__5_2_u(ptr addrspace(1) inreg %ptr) { 360; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_2_u: 361; GFX900: ; %bb.0: 362; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 363; GFX900-NEXT: ;;#ASMSTART 364; GFX900-NEXT: ; def v[0:5] 365; GFX900-NEXT: ;;#ASMEND 366; GFX900-NEXT: v_mov_b32_e32 v12, 0 367; GFX900-NEXT: ;;#ASMSTART 368; GFX900-NEXT: ; def v[6:11] 369; GFX900-NEXT: ;;#ASMEND 370; GFX900-NEXT: v_mov_b32_e32 v2, v10 371; GFX900-NEXT: v_mov_b32_e32 v3, v11 372; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 373; GFX900-NEXT: s_waitcnt vmcnt(0) 374; GFX900-NEXT: s_setpc_b64 s[30:31] 375; 376; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_2_u: 377; GFX90A: ; %bb.0: 378; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 379; GFX90A-NEXT: ;;#ASMSTART 380; GFX90A-NEXT: ; def v[0:5] 381; GFX90A-NEXT: ;;#ASMEND 382; GFX90A-NEXT: v_mov_b32_e32 v12, 0 383; GFX90A-NEXT: ;;#ASMSTART 384; GFX90A-NEXT: ; def v[6:11] 385; GFX90A-NEXT: ;;#ASMEND 386; GFX90A-NEXT: v_mov_b32_e32 v2, v10 387; GFX90A-NEXT: v_mov_b32_e32 v3, v11 388; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 389; GFX90A-NEXT: s_waitcnt vmcnt(0) 390; GFX90A-NEXT: s_setpc_b64 s[30:31] 391; 392; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_u: 393; GFX940: ; %bb.0: 394; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GFX940-NEXT: ;;#ASMSTART 396; GFX940-NEXT: ; def v[0:5] 397; GFX940-NEXT: ;;#ASMEND 398; GFX940-NEXT: v_mov_b32_e32 v12, 0 399; GFX940-NEXT: ;;#ASMSTART 400; GFX940-NEXT: ; def v[6:11] 401; GFX940-NEXT: ;;#ASMEND 402; GFX940-NEXT: s_nop 0 403; GFX940-NEXT: v_mov_b32_e32 v2, v10 404; GFX940-NEXT: v_mov_b32_e32 v3, v11 405; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 406; GFX940-NEXT: s_waitcnt vmcnt(0) 407; GFX940-NEXT: s_setpc_b64 s[30:31] 408 %vec0 = call <3 x i64> asm "; def $0", "=v"() 409 %vec1 = call <3 x i64> asm "; def $0", "=v"() 410 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 poison> 411 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 412 ret void 413} 414 415define void @v_shuffle_v3i64_v3i64__5_3_u(ptr addrspace(1) inreg %ptr) { 416; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_u: 417; GFX900: ; %bb.0: 418; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 419; GFX900-NEXT: ;;#ASMSTART 420; GFX900-NEXT: ; def v[0:5] 421; GFX900-NEXT: ;;#ASMEND 422; GFX900-NEXT: v_mov_b32_e32 v6, 0 423; GFX900-NEXT: v_mov_b32_e32 v2, v4 424; GFX900-NEXT: v_mov_b32_e32 v3, v5 425; GFX900-NEXT: v_mov_b32_e32 v4, v0 426; GFX900-NEXT: v_mov_b32_e32 v5, v1 427; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 428; GFX900-NEXT: s_waitcnt vmcnt(0) 429; GFX900-NEXT: s_setpc_b64 s[30:31] 430; 431; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_u: 432; GFX90A: ; %bb.0: 433; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX90A-NEXT: ;;#ASMSTART 435; GFX90A-NEXT: ; def v[0:5] 436; GFX90A-NEXT: ;;#ASMEND 437; GFX90A-NEXT: v_mov_b32_e32 v6, 0 438; GFX90A-NEXT: v_mov_b32_e32 v2, v4 439; GFX90A-NEXT: v_mov_b32_e32 v3, v5 440; GFX90A-NEXT: v_mov_b32_e32 v4, v0 441; GFX90A-NEXT: v_mov_b32_e32 v5, v1 442; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 443; GFX90A-NEXT: s_waitcnt vmcnt(0) 444; GFX90A-NEXT: s_setpc_b64 s[30:31] 445; 446; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_u: 447; GFX940: ; %bb.0: 448; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 449; GFX940-NEXT: ;;#ASMSTART 450; GFX940-NEXT: ; def v[0:5] 451; GFX940-NEXT: ;;#ASMEND 452; GFX940-NEXT: v_mov_b32_e32 v6, 0 453; GFX940-NEXT: v_mov_b32_e32 v2, v4 454; GFX940-NEXT: v_mov_b32_e32 v3, v5 455; GFX940-NEXT: v_mov_b32_e32 v4, v0 456; GFX940-NEXT: v_mov_b32_e32 v5, v1 457; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 458; GFX940-NEXT: s_waitcnt vmcnt(0) 459; GFX940-NEXT: s_setpc_b64 s[30:31] 460 %vec0 = call <3 x i64> asm "; def $0", "=v"() 461 %vec1 = call <3 x i64> asm "; def $0", "=v"() 462 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 poison> 463 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 464 ret void 465} 466 467define void @v_shuffle_v3i64_v3i64__5_4_u(ptr addrspace(1) inreg %ptr) { 468; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_u: 469; GFX900: ; %bb.0: 470; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 471; GFX900-NEXT: ;;#ASMSTART 472; GFX900-NEXT: ; def v[0:5] 473; GFX900-NEXT: ;;#ASMEND 474; GFX900-NEXT: v_mov_b32_e32 v6, 0 475; GFX900-NEXT: v_mov_b32_e32 v0, v4 476; GFX900-NEXT: v_mov_b32_e32 v1, v5 477; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 478; GFX900-NEXT: s_waitcnt vmcnt(0) 479; GFX900-NEXT: s_setpc_b64 s[30:31] 480; 481; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_u: 482; GFX90A: ; %bb.0: 483; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 484; GFX90A-NEXT: ;;#ASMSTART 485; GFX90A-NEXT: ; def v[0:5] 486; GFX90A-NEXT: ;;#ASMEND 487; GFX90A-NEXT: v_mov_b32_e32 v6, 0 488; GFX90A-NEXT: v_mov_b32_e32 v0, v4 489; GFX90A-NEXT: v_mov_b32_e32 v1, v5 490; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 491; GFX90A-NEXT: s_waitcnt vmcnt(0) 492; GFX90A-NEXT: s_setpc_b64 s[30:31] 493; 494; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_u: 495; GFX940: ; %bb.0: 496; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 497; GFX940-NEXT: ;;#ASMSTART 498; GFX940-NEXT: ; def v[0:5] 499; GFX940-NEXT: ;;#ASMEND 500; GFX940-NEXT: v_mov_b32_e32 v6, 0 501; GFX940-NEXT: v_mov_b32_e32 v0, v4 502; GFX940-NEXT: v_mov_b32_e32 v1, v5 503; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 504; GFX940-NEXT: s_waitcnt vmcnt(0) 505; GFX940-NEXT: s_setpc_b64 s[30:31] 506 %vec0 = call <3 x i64> asm "; def $0", "=v"() 507 %vec1 = call <3 x i64> asm "; def $0", "=v"() 508 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 poison> 509 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 510 ret void 511} 512 513define void @v_shuffle_v3i64_v3i64__5_5_u(ptr addrspace(1) inreg %ptr) { 514; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_u: 515; GFX900: ; %bb.0: 516; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 517; GFX900-NEXT: ;;#ASMSTART 518; GFX900-NEXT: ; def v[0:5] 519; GFX900-NEXT: ;;#ASMEND 520; GFX900-NEXT: v_mov_b32_e32 v6, 0 521; GFX900-NEXT: v_mov_b32_e32 v2, v4 522; GFX900-NEXT: v_mov_b32_e32 v3, v5 523; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 524; GFX900-NEXT: s_waitcnt vmcnt(0) 525; GFX900-NEXT: s_setpc_b64 s[30:31] 526; 527; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_u: 528; GFX90A: ; %bb.0: 529; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 530; GFX90A-NEXT: ;;#ASMSTART 531; GFX90A-NEXT: ; def v[0:5] 532; GFX90A-NEXT: ;;#ASMEND 533; GFX90A-NEXT: v_mov_b32_e32 v6, 0 534; GFX90A-NEXT: v_mov_b32_e32 v2, v4 535; GFX90A-NEXT: v_mov_b32_e32 v3, v5 536; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 537; GFX90A-NEXT: s_waitcnt vmcnt(0) 538; GFX90A-NEXT: s_setpc_b64 s[30:31] 539; 540; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_u: 541; GFX940: ; %bb.0: 542; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 543; GFX940-NEXT: ;;#ASMSTART 544; GFX940-NEXT: ; def v[0:5] 545; GFX940-NEXT: ;;#ASMEND 546; GFX940-NEXT: v_mov_b32_e32 v6, 0 547; GFX940-NEXT: v_mov_b32_e32 v2, v4 548; GFX940-NEXT: v_mov_b32_e32 v3, v5 549; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 550; GFX940-NEXT: s_waitcnt vmcnt(0) 551; GFX940-NEXT: s_setpc_b64 s[30:31] 552 %vec0 = call <3 x i64> asm "; def $0", "=v"() 553 %vec1 = call <3 x i64> asm "; def $0", "=v"() 554 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 poison> 555 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 556 ret void 557} 558 559define void @v_shuffle_v3i64_v3i64__5_5_0(ptr addrspace(1) inreg %ptr) { 560; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_0: 561; GFX900: ; %bb.0: 562; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 563; GFX900-NEXT: ;;#ASMSTART 564; GFX900-NEXT: ; def v[0:5] 565; GFX900-NEXT: ;;#ASMEND 566; GFX900-NEXT: ;;#ASMSTART 567; GFX900-NEXT: ; def v[2:7] 568; GFX900-NEXT: ;;#ASMEND 569; GFX900-NEXT: v_mov_b32_e32 v8, 0 570; GFX900-NEXT: v_mov_b32_e32 v4, v6 571; GFX900-NEXT: v_mov_b32_e32 v5, v7 572; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 573; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 574; GFX900-NEXT: s_waitcnt vmcnt(0) 575; GFX900-NEXT: s_setpc_b64 s[30:31] 576; 577; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_0: 578; GFX90A: ; %bb.0: 579; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 580; GFX90A-NEXT: ;;#ASMSTART 581; GFX90A-NEXT: ; def v[0:5] 582; GFX90A-NEXT: ;;#ASMEND 583; GFX90A-NEXT: ;;#ASMSTART 584; GFX90A-NEXT: ; def v[2:7] 585; GFX90A-NEXT: ;;#ASMEND 586; GFX90A-NEXT: v_mov_b32_e32 v8, 0 587; GFX90A-NEXT: v_mov_b32_e32 v4, v6 588; GFX90A-NEXT: v_mov_b32_e32 v5, v7 589; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 590; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 591; GFX90A-NEXT: s_waitcnt vmcnt(0) 592; GFX90A-NEXT: s_setpc_b64 s[30:31] 593; 594; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_0: 595; GFX940: ; %bb.0: 596; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 597; GFX940-NEXT: ;;#ASMSTART 598; GFX940-NEXT: ; def v[0:5] 599; GFX940-NEXT: ;;#ASMEND 600; GFX940-NEXT: v_mov_b32_e32 v8, 0 601; GFX940-NEXT: ;;#ASMSTART 602; GFX940-NEXT: ; def v[2:7] 603; GFX940-NEXT: ;;#ASMEND 604; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 605; GFX940-NEXT: v_mov_b32_e32 v4, v6 606; GFX940-NEXT: v_mov_b32_e32 v5, v7 607; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 608; GFX940-NEXT: s_waitcnt vmcnt(0) 609; GFX940-NEXT: s_setpc_b64 s[30:31] 610 %vec0 = call <3 x i64> asm "; def $0", "=v"() 611 %vec1 = call <3 x i64> asm "; def $0", "=v"() 612 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 0> 613 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 614 ret void 615} 616 617define void @v_shuffle_v3i64_v3i64__5_5_1(ptr addrspace(1) inreg %ptr) { 618; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_1: 619; GFX900: ; %bb.0: 620; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 621; GFX900-NEXT: ;;#ASMSTART 622; GFX900-NEXT: ; def v[0:5] 623; GFX900-NEXT: ;;#ASMEND 624; GFX900-NEXT: ;;#ASMSTART 625; GFX900-NEXT: ; def v[4:9] 626; GFX900-NEXT: ;;#ASMEND 627; GFX900-NEXT: v_mov_b32_e32 v10, 0 628; GFX900-NEXT: v_mov_b32_e32 v6, v8 629; GFX900-NEXT: v_mov_b32_e32 v7, v9 630; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 631; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 632; GFX900-NEXT: s_waitcnt vmcnt(0) 633; GFX900-NEXT: s_setpc_b64 s[30:31] 634; 635; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_1: 636; GFX90A: ; %bb.0: 637; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 638; GFX90A-NEXT: ;;#ASMSTART 639; GFX90A-NEXT: ; def v[0:5] 640; GFX90A-NEXT: ;;#ASMEND 641; GFX90A-NEXT: ;;#ASMSTART 642; GFX90A-NEXT: ; def v[4:9] 643; GFX90A-NEXT: ;;#ASMEND 644; GFX90A-NEXT: v_mov_b32_e32 v10, 0 645; GFX90A-NEXT: v_mov_b32_e32 v6, v8 646; GFX90A-NEXT: v_mov_b32_e32 v7, v9 647; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 648; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 649; GFX90A-NEXT: s_waitcnt vmcnt(0) 650; GFX90A-NEXT: s_setpc_b64 s[30:31] 651; 652; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_1: 653; GFX940: ; %bb.0: 654; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 655; GFX940-NEXT: ;;#ASMSTART 656; GFX940-NEXT: ; def v[0:5] 657; GFX940-NEXT: ;;#ASMEND 658; GFX940-NEXT: v_mov_b32_e32 v10, 0 659; GFX940-NEXT: ;;#ASMSTART 660; GFX940-NEXT: ; def v[4:9] 661; GFX940-NEXT: ;;#ASMEND 662; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 663; GFX940-NEXT: v_mov_b32_e32 v6, v8 664; GFX940-NEXT: v_mov_b32_e32 v7, v9 665; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 666; GFX940-NEXT: s_waitcnt vmcnt(0) 667; GFX940-NEXT: s_setpc_b64 s[30:31] 668 %vec0 = call <3 x i64> asm "; def $0", "=v"() 669 %vec1 = call <3 x i64> asm "; def $0", "=v"() 670 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 1> 671 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 672 ret void 673} 674 675define void @v_shuffle_v3i64_v3i64__5_5_2(ptr addrspace(1) inreg %ptr) { 676; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_2: 677; GFX900: ; %bb.0: 678; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 679; GFX900-NEXT: ;;#ASMSTART 680; GFX900-NEXT: ; def v[6:11] 681; GFX900-NEXT: ;;#ASMEND 682; GFX900-NEXT: v_mov_b32_e32 v12, 0 683; GFX900-NEXT: v_mov_b32_e32 v8, v10 684; GFX900-NEXT: v_mov_b32_e32 v9, v11 685; GFX900-NEXT: ;;#ASMSTART 686; GFX900-NEXT: ; def v[0:5] 687; GFX900-NEXT: ;;#ASMEND 688; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 689; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 690; GFX900-NEXT: s_waitcnt vmcnt(0) 691; GFX900-NEXT: s_setpc_b64 s[30:31] 692; 693; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_2: 694; GFX90A: ; %bb.0: 695; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 696; GFX90A-NEXT: ;;#ASMSTART 697; GFX90A-NEXT: ; def v[6:11] 698; GFX90A-NEXT: ;;#ASMEND 699; GFX90A-NEXT: v_mov_b32_e32 v12, 0 700; GFX90A-NEXT: v_mov_b32_e32 v8, v10 701; GFX90A-NEXT: v_mov_b32_e32 v9, v11 702; GFX90A-NEXT: ;;#ASMSTART 703; GFX90A-NEXT: ; def v[0:5] 704; GFX90A-NEXT: ;;#ASMEND 705; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 706; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 707; GFX90A-NEXT: s_waitcnt vmcnt(0) 708; GFX90A-NEXT: s_setpc_b64 s[30:31] 709; 710; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_2: 711; GFX940: ; %bb.0: 712; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 713; GFX940-NEXT: ;;#ASMSTART 714; GFX940-NEXT: ; def v[6:11] 715; GFX940-NEXT: ;;#ASMEND 716; GFX940-NEXT: v_mov_b32_e32 v12, 0 717; GFX940-NEXT: v_mov_b32_e32 v8, v10 718; GFX940-NEXT: v_mov_b32_e32 v9, v11 719; GFX940-NEXT: ;;#ASMSTART 720; GFX940-NEXT: ; def v[0:5] 721; GFX940-NEXT: ;;#ASMEND 722; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 723; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 724; GFX940-NEXT: s_waitcnt vmcnt(0) 725; GFX940-NEXT: s_setpc_b64 s[30:31] 726 %vec0 = call <3 x i64> asm "; def $0", "=v"() 727 %vec1 = call <3 x i64> asm "; def $0", "=v"() 728 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 2> 729 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 730 ret void 731} 732 733define void @v_shuffle_v3i64_v3i64__5_5_3(ptr addrspace(1) inreg %ptr) { 734; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_3: 735; GFX900: ; %bb.0: 736; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 737; GFX900-NEXT: ;;#ASMSTART 738; GFX900-NEXT: ; def v[0:5] 739; GFX900-NEXT: ;;#ASMEND 740; GFX900-NEXT: v_mov_b32_e32 v6, 0 741; GFX900-NEXT: v_mov_b32_e32 v2, v4 742; GFX900-NEXT: v_mov_b32_e32 v3, v5 743; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 744; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 745; GFX900-NEXT: s_waitcnt vmcnt(0) 746; GFX900-NEXT: s_setpc_b64 s[30:31] 747; 748; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_3: 749; GFX90A: ; %bb.0: 750; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 751; GFX90A-NEXT: ;;#ASMSTART 752; GFX90A-NEXT: ; def v[0:5] 753; GFX90A-NEXT: ;;#ASMEND 754; GFX90A-NEXT: v_mov_b32_e32 v6, 0 755; GFX90A-NEXT: v_mov_b32_e32 v2, v4 756; GFX90A-NEXT: v_mov_b32_e32 v3, v5 757; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 758; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 759; GFX90A-NEXT: s_waitcnt vmcnt(0) 760; GFX90A-NEXT: s_setpc_b64 s[30:31] 761; 762; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_3: 763; GFX940: ; %bb.0: 764; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 765; GFX940-NEXT: ;;#ASMSTART 766; GFX940-NEXT: ; def v[0:5] 767; GFX940-NEXT: ;;#ASMEND 768; GFX940-NEXT: v_mov_b32_e32 v6, 0 769; GFX940-NEXT: v_mov_b32_e32 v2, v4 770; GFX940-NEXT: v_mov_b32_e32 v3, v5 771; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 772; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 773; GFX940-NEXT: s_waitcnt vmcnt(0) 774; GFX940-NEXT: s_setpc_b64 s[30:31] 775 %vec0 = call <3 x i64> asm "; def $0", "=v"() 776 %vec1 = call <3 x i64> asm "; def $0", "=v"() 777 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 3> 778 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 779 ret void 780} 781 782define void @v_shuffle_v3i64_v3i64__5_5_4(ptr addrspace(1) inreg %ptr) { 783; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_4: 784; GFX900: ; %bb.0: 785; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 786; GFX900-NEXT: v_mov_b32_e32 v6, 0 787; GFX900-NEXT: ;;#ASMSTART 788; GFX900-NEXT: ; def v[0:5] 789; GFX900-NEXT: ;;#ASMEND 790; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 791; GFX900-NEXT: v_mov_b32_e32 v2, v4 792; GFX900-NEXT: v_mov_b32_e32 v3, v5 793; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 794; GFX900-NEXT: s_waitcnt vmcnt(0) 795; GFX900-NEXT: s_setpc_b64 s[30:31] 796; 797; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_4: 798; GFX90A: ; %bb.0: 799; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 800; GFX90A-NEXT: v_mov_b32_e32 v6, 0 801; GFX90A-NEXT: ;;#ASMSTART 802; GFX90A-NEXT: ; def v[0:5] 803; GFX90A-NEXT: ;;#ASMEND 804; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 805; GFX90A-NEXT: v_mov_b32_e32 v2, v4 806; GFX90A-NEXT: v_mov_b32_e32 v3, v5 807; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 808; GFX90A-NEXT: s_waitcnt vmcnt(0) 809; GFX90A-NEXT: s_setpc_b64 s[30:31] 810; 811; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_4: 812; GFX940: ; %bb.0: 813; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 814; GFX940-NEXT: v_mov_b32_e32 v6, 0 815; GFX940-NEXT: ;;#ASMSTART 816; GFX940-NEXT: ; def v[0:5] 817; GFX940-NEXT: ;;#ASMEND 818; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 819; GFX940-NEXT: v_mov_b32_e32 v2, v4 820; GFX940-NEXT: v_mov_b32_e32 v3, v5 821; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 822; GFX940-NEXT: s_waitcnt vmcnt(0) 823; GFX940-NEXT: s_setpc_b64 s[30:31] 824 %vec0 = call <3 x i64> asm "; def $0", "=v"() 825 %vec1 = call <3 x i64> asm "; def $0", "=v"() 826 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 4> 827 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 828 ret void 829} 830 831define void @v_shuffle_v3i64_v3i64__5_5_5(ptr addrspace(1) inreg %ptr) { 832; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_5: 833; GFX900: ; %bb.0: 834; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 835; GFX900-NEXT: ;;#ASMSTART 836; GFX900-NEXT: ; def v[0:5] 837; GFX900-NEXT: ;;#ASMEND 838; GFX900-NEXT: v_mov_b32_e32 v6, 0 839; GFX900-NEXT: v_mov_b32_e32 v2, v4 840; GFX900-NEXT: v_mov_b32_e32 v3, v5 841; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 842; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 843; GFX900-NEXT: s_waitcnt vmcnt(0) 844; GFX900-NEXT: s_setpc_b64 s[30:31] 845; 846; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_5: 847; GFX90A: ; %bb.0: 848; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 849; GFX90A-NEXT: ;;#ASMSTART 850; GFX90A-NEXT: ; def v[0:5] 851; GFX90A-NEXT: ;;#ASMEND 852; GFX90A-NEXT: v_mov_b32_e32 v6, 0 853; GFX90A-NEXT: v_mov_b32_e32 v2, v4 854; GFX90A-NEXT: v_mov_b32_e32 v3, v5 855; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 856; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 857; GFX90A-NEXT: s_waitcnt vmcnt(0) 858; GFX90A-NEXT: s_setpc_b64 s[30:31] 859; 860; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_5_5: 861; GFX940: ; %bb.0: 862; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 863; GFX940-NEXT: ;;#ASMSTART 864; GFX940-NEXT: ; def v[0:5] 865; GFX940-NEXT: ;;#ASMEND 866; GFX940-NEXT: v_mov_b32_e32 v6, 0 867; GFX940-NEXT: v_mov_b32_e32 v2, v4 868; GFX940-NEXT: v_mov_b32_e32 v3, v5 869; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 870; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 871; GFX940-NEXT: s_waitcnt vmcnt(0) 872; GFX940-NEXT: s_setpc_b64 s[30:31] 873 %vec0 = call <3 x i64> asm "; def $0", "=v"() 874 %vec1 = call <3 x i64> asm "; def $0", "=v"() 875 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 5> 876 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 877 ret void 878} 879 880define void @v_shuffle_v3i64_v3i64__u_0_0(ptr addrspace(1) inreg %ptr) { 881; GFX900-LABEL: v_shuffle_v3i64_v3i64__u_0_0: 882; GFX900: ; %bb.0: 883; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 884; GFX900-NEXT: ;;#ASMSTART 885; GFX900-NEXT: ; def v[0:5] 886; GFX900-NEXT: ;;#ASMEND 887; GFX900-NEXT: v_mov_b32_e32 v6, 0 888; GFX900-NEXT: v_mov_b32_e32 v2, v0 889; GFX900-NEXT: v_mov_b32_e32 v3, v1 890; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 891; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 892; GFX900-NEXT: s_waitcnt vmcnt(0) 893; GFX900-NEXT: s_setpc_b64 s[30:31] 894; 895; GFX90A-LABEL: v_shuffle_v3i64_v3i64__u_0_0: 896; GFX90A: ; %bb.0: 897; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 898; GFX90A-NEXT: ;;#ASMSTART 899; GFX90A-NEXT: ; def v[0:5] 900; GFX90A-NEXT: ;;#ASMEND 901; GFX90A-NEXT: v_mov_b32_e32 v6, 0 902; GFX90A-NEXT: v_mov_b32_e32 v2, v0 903; GFX90A-NEXT: v_mov_b32_e32 v3, v1 904; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 905; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 906; GFX90A-NEXT: s_waitcnt vmcnt(0) 907; GFX90A-NEXT: s_setpc_b64 s[30:31] 908; 909; GFX940-LABEL: v_shuffle_v3i64_v3i64__u_0_0: 910; GFX940: ; %bb.0: 911; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 912; GFX940-NEXT: ;;#ASMSTART 913; GFX940-NEXT: ; def v[0:5] 914; GFX940-NEXT: ;;#ASMEND 915; GFX940-NEXT: v_mov_b32_e32 v6, 0 916; GFX940-NEXT: v_mov_b32_e32 v2, v0 917; GFX940-NEXT: v_mov_b32_e32 v3, v1 918; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 919; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 920; GFX940-NEXT: s_waitcnt vmcnt(0) 921; GFX940-NEXT: s_setpc_b64 s[30:31] 922 %vec0 = call <3 x i64> asm "; def $0", "=v"() 923 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 0, i32 0> 924 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 925 ret void 926} 927 928define void @v_shuffle_v3i64_v3i64__0_0_0(ptr addrspace(1) inreg %ptr) { 929; GFX900-LABEL: v_shuffle_v3i64_v3i64__0_0_0: 930; GFX900: ; %bb.0: 931; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 932; GFX900-NEXT: ;;#ASMSTART 933; GFX900-NEXT: ; def v[0:5] 934; GFX900-NEXT: ;;#ASMEND 935; GFX900-NEXT: v_mov_b32_e32 v6, 0 936; GFX900-NEXT: v_mov_b32_e32 v2, v0 937; GFX900-NEXT: v_mov_b32_e32 v3, v1 938; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 939; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 940; GFX900-NEXT: s_waitcnt vmcnt(0) 941; GFX900-NEXT: s_setpc_b64 s[30:31] 942; 943; GFX90A-LABEL: v_shuffle_v3i64_v3i64__0_0_0: 944; GFX90A: ; %bb.0: 945; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 946; GFX90A-NEXT: ;;#ASMSTART 947; GFX90A-NEXT: ; def v[0:5] 948; GFX90A-NEXT: ;;#ASMEND 949; GFX90A-NEXT: v_mov_b32_e32 v6, 0 950; GFX90A-NEXT: v_mov_b32_e32 v2, v0 951; GFX90A-NEXT: v_mov_b32_e32 v3, v1 952; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 953; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 954; GFX90A-NEXT: s_waitcnt vmcnt(0) 955; GFX90A-NEXT: s_setpc_b64 s[30:31] 956; 957; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_0_0: 958; GFX940: ; %bb.0: 959; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 960; GFX940-NEXT: ;;#ASMSTART 961; GFX940-NEXT: ; def v[0:5] 962; GFX940-NEXT: ;;#ASMEND 963; GFX940-NEXT: v_mov_b32_e32 v6, 0 964; GFX940-NEXT: v_mov_b32_e32 v2, v0 965; GFX940-NEXT: v_mov_b32_e32 v3, v1 966; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 967; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 968; GFX940-NEXT: s_waitcnt vmcnt(0) 969; GFX940-NEXT: s_setpc_b64 s[30:31] 970 %vec0 = call <3 x i64> asm "; def $0", "=v"() 971 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> zeroinitializer 972 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 973 ret void 974} 975 976define void @v_shuffle_v3i64_v3i64__1_0_0(ptr addrspace(1) inreg %ptr) { 977; GFX900-LABEL: v_shuffle_v3i64_v3i64__1_0_0: 978; GFX900: ; %bb.0: 979; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 980; GFX900-NEXT: ;;#ASMSTART 981; GFX900-NEXT: ; def v[0:5] 982; GFX900-NEXT: ;;#ASMEND 983; GFX900-NEXT: v_mov_b32_e32 v6, 0 984; GFX900-NEXT: v_mov_b32_e32 v4, v0 985; GFX900-NEXT: v_mov_b32_e32 v5, v1 986; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 987; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 988; GFX900-NEXT: s_waitcnt vmcnt(0) 989; GFX900-NEXT: s_setpc_b64 s[30:31] 990; 991; GFX90A-LABEL: v_shuffle_v3i64_v3i64__1_0_0: 992; GFX90A: ; %bb.0: 993; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 994; GFX90A-NEXT: ;;#ASMSTART 995; GFX90A-NEXT: ; def v[0:5] 996; GFX90A-NEXT: ;;#ASMEND 997; GFX90A-NEXT: v_mov_b32_e32 v6, 0 998; GFX90A-NEXT: v_mov_b32_e32 v4, v0 999; GFX90A-NEXT: v_mov_b32_e32 v5, v1 1000; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 1001; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 1002; GFX90A-NEXT: s_waitcnt vmcnt(0) 1003; GFX90A-NEXT: s_setpc_b64 s[30:31] 1004; 1005; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_0_0: 1006; GFX940: ; %bb.0: 1007; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1008; GFX940-NEXT: ;;#ASMSTART 1009; GFX940-NEXT: ; def v[0:5] 1010; GFX940-NEXT: ;;#ASMEND 1011; GFX940-NEXT: v_mov_b32_e32 v6, 0 1012; GFX940-NEXT: v_mov_b32_e32 v4, v0 1013; GFX940-NEXT: v_mov_b32_e32 v5, v1 1014; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 1015; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 1016; GFX940-NEXT: s_waitcnt vmcnt(0) 1017; GFX940-NEXT: s_setpc_b64 s[30:31] 1018 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1019 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 0, i32 0> 1020 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1021 ret void 1022} 1023 1024define void @v_shuffle_v3i64_v3i64__2_0_0(ptr addrspace(1) inreg %ptr) { 1025; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_0_0: 1026; GFX900: ; %bb.0: 1027; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1028; GFX900-NEXT: ;;#ASMSTART 1029; GFX900-NEXT: ; def v[0:5] 1030; GFX900-NEXT: ;;#ASMEND 1031; GFX900-NEXT: v_mov_b32_e32 v6, 0 1032; GFX900-NEXT: v_mov_b32_e32 v2, v4 1033; GFX900-NEXT: v_mov_b32_e32 v3, v5 1034; GFX900-NEXT: v_mov_b32_e32 v4, v0 1035; GFX900-NEXT: v_mov_b32_e32 v5, v1 1036; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 1037; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 1038; GFX900-NEXT: s_waitcnt vmcnt(0) 1039; GFX900-NEXT: s_setpc_b64 s[30:31] 1040; 1041; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_0_0: 1042; GFX90A: ; %bb.0: 1043; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1044; GFX90A-NEXT: ;;#ASMSTART 1045; GFX90A-NEXT: ; def v[0:5] 1046; GFX90A-NEXT: ;;#ASMEND 1047; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1048; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1049; GFX90A-NEXT: v_mov_b32_e32 v3, v5 1050; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1051; GFX90A-NEXT: v_mov_b32_e32 v5, v1 1052; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 1053; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 1054; GFX90A-NEXT: s_waitcnt vmcnt(0) 1055; GFX90A-NEXT: s_setpc_b64 s[30:31] 1056; 1057; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_0_0: 1058; GFX940: ; %bb.0: 1059; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1060; GFX940-NEXT: ;;#ASMSTART 1061; GFX940-NEXT: ; def v[0:5] 1062; GFX940-NEXT: ;;#ASMEND 1063; GFX940-NEXT: v_mov_b32_e32 v6, 0 1064; GFX940-NEXT: v_mov_b32_e32 v2, v4 1065; GFX940-NEXT: v_mov_b32_e32 v3, v5 1066; GFX940-NEXT: v_mov_b32_e32 v4, v0 1067; GFX940-NEXT: v_mov_b32_e32 v5, v1 1068; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 1069; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 1070; GFX940-NEXT: s_waitcnt vmcnt(0) 1071; GFX940-NEXT: s_setpc_b64 s[30:31] 1072 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1073 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 0, i32 0> 1074 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1075 ret void 1076} 1077 1078define void @v_shuffle_v3i64_v3i64__3_0_0(ptr addrspace(1) inreg %ptr) { 1079; GFX900-LABEL: v_shuffle_v3i64_v3i64__3_0_0: 1080; GFX900: ; %bb.0: 1081; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1082; GFX900-NEXT: ;;#ASMSTART 1083; GFX900-NEXT: ; def v[0:5] 1084; GFX900-NEXT: ;;#ASMEND 1085; GFX900-NEXT: v_mov_b32_e32 v6, 0 1086; GFX900-NEXT: v_mov_b32_e32 v2, v0 1087; GFX900-NEXT: v_mov_b32_e32 v3, v1 1088; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 1089; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1090; GFX900-NEXT: s_waitcnt vmcnt(0) 1091; GFX900-NEXT: s_setpc_b64 s[30:31] 1092; 1093; GFX90A-LABEL: v_shuffle_v3i64_v3i64__3_0_0: 1094; GFX90A: ; %bb.0: 1095; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1096; GFX90A-NEXT: ;;#ASMSTART 1097; GFX90A-NEXT: ; def v[0:5] 1098; GFX90A-NEXT: ;;#ASMEND 1099; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1100; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1101; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1102; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 1103; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1104; GFX90A-NEXT: s_waitcnt vmcnt(0) 1105; GFX90A-NEXT: s_setpc_b64 s[30:31] 1106; 1107; GFX940-LABEL: v_shuffle_v3i64_v3i64__3_0_0: 1108; GFX940: ; %bb.0: 1109; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1110; GFX940-NEXT: ;;#ASMSTART 1111; GFX940-NEXT: ; def v[0:5] 1112; GFX940-NEXT: ;;#ASMEND 1113; GFX940-NEXT: v_mov_b32_e32 v6, 0 1114; GFX940-NEXT: v_mov_b32_e32 v2, v0 1115; GFX940-NEXT: v_mov_b32_e32 v3, v1 1116; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 1117; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 1118; GFX940-NEXT: s_waitcnt vmcnt(0) 1119; GFX940-NEXT: s_setpc_b64 s[30:31] 1120 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1121 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 0, i32 0> 1122 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1123 ret void 1124} 1125 1126define void @v_shuffle_v3i64_v3i64__4_0_0(ptr addrspace(1) inreg %ptr) { 1127; GFX900-LABEL: v_shuffle_v3i64_v3i64__4_0_0: 1128; GFX900: ; %bb.0: 1129; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1130; GFX900-NEXT: ;;#ASMSTART 1131; GFX900-NEXT: ; def v[0:5] 1132; GFX900-NEXT: ;;#ASMEND 1133; GFX900-NEXT: ;;#ASMSTART 1134; GFX900-NEXT: ; def v[2:7] 1135; GFX900-NEXT: ;;#ASMEND 1136; GFX900-NEXT: v_mov_b32_e32 v8, 0 1137; GFX900-NEXT: v_mov_b32_e32 v6, v0 1138; GFX900-NEXT: v_mov_b32_e32 v7, v1 1139; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 1140; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 1141; GFX900-NEXT: s_waitcnt vmcnt(0) 1142; GFX900-NEXT: s_setpc_b64 s[30:31] 1143; 1144; GFX90A-LABEL: v_shuffle_v3i64_v3i64__4_0_0: 1145; GFX90A: ; %bb.0: 1146; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1147; GFX90A-NEXT: ;;#ASMSTART 1148; GFX90A-NEXT: ; def v[0:5] 1149; GFX90A-NEXT: ;;#ASMEND 1150; GFX90A-NEXT: ;;#ASMSTART 1151; GFX90A-NEXT: ; def v[2:7] 1152; GFX90A-NEXT: ;;#ASMEND 1153; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1154; GFX90A-NEXT: v_mov_b32_e32 v6, v0 1155; GFX90A-NEXT: v_mov_b32_e32 v7, v1 1156; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 1157; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] 1158; GFX90A-NEXT: s_waitcnt vmcnt(0) 1159; GFX90A-NEXT: s_setpc_b64 s[30:31] 1160; 1161; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_0_0: 1162; GFX940: ; %bb.0: 1163; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1164; GFX940-NEXT: ;;#ASMSTART 1165; GFX940-NEXT: ; def v[0:5] 1166; GFX940-NEXT: ;;#ASMEND 1167; GFX940-NEXT: v_mov_b32_e32 v8, 0 1168; GFX940-NEXT: ;;#ASMSTART 1169; GFX940-NEXT: ; def v[2:7] 1170; GFX940-NEXT: ;;#ASMEND 1171; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 1172; GFX940-NEXT: v_mov_b32_e32 v6, v0 1173; GFX940-NEXT: v_mov_b32_e32 v7, v1 1174; GFX940-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1 1175; GFX940-NEXT: s_waitcnt vmcnt(0) 1176; GFX940-NEXT: s_setpc_b64 s[30:31] 1177 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1178 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1179 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 0, i32 0> 1180 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1181 ret void 1182} 1183 1184define void @v_shuffle_v3i64_v3i64__5_0_0(ptr addrspace(1) inreg %ptr) { 1185; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_0: 1186; GFX900: ; %bb.0: 1187; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1188; GFX900-NEXT: ;;#ASMSTART 1189; GFX900-NEXT: ; def v[0:5] 1190; GFX900-NEXT: ;;#ASMEND 1191; GFX900-NEXT: ;;#ASMSTART 1192; GFX900-NEXT: ; def v[2:7] 1193; GFX900-NEXT: ;;#ASMEND 1194; GFX900-NEXT: v_mov_b32_e32 v8, 0 1195; GFX900-NEXT: v_mov_b32_e32 v2, v6 1196; GFX900-NEXT: v_mov_b32_e32 v3, v7 1197; GFX900-NEXT: v_mov_b32_e32 v4, v0 1198; GFX900-NEXT: v_mov_b32_e32 v5, v1 1199; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 1200; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 1201; GFX900-NEXT: s_waitcnt vmcnt(0) 1202; GFX900-NEXT: s_setpc_b64 s[30:31] 1203; 1204; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_0: 1205; GFX90A: ; %bb.0: 1206; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1207; GFX90A-NEXT: ;;#ASMSTART 1208; GFX90A-NEXT: ; def v[0:5] 1209; GFX90A-NEXT: ;;#ASMEND 1210; GFX90A-NEXT: ;;#ASMSTART 1211; GFX90A-NEXT: ; def v[2:7] 1212; GFX90A-NEXT: ;;#ASMEND 1213; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1214; GFX90A-NEXT: v_mov_b32_e32 v2, v6 1215; GFX90A-NEXT: v_mov_b32_e32 v3, v7 1216; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1217; GFX90A-NEXT: v_mov_b32_e32 v5, v1 1218; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 1219; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 1220; GFX90A-NEXT: s_waitcnt vmcnt(0) 1221; GFX90A-NEXT: s_setpc_b64 s[30:31] 1222; 1223; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_0: 1224; GFX940: ; %bb.0: 1225; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1226; GFX940-NEXT: ;;#ASMSTART 1227; GFX940-NEXT: ; def v[0:5] 1228; GFX940-NEXT: ;;#ASMEND 1229; GFX940-NEXT: v_mov_b32_e32 v8, 0 1230; GFX940-NEXT: ;;#ASMSTART 1231; GFX940-NEXT: ; def v[2:7] 1232; GFX940-NEXT: ;;#ASMEND 1233; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 1234; GFX940-NEXT: v_mov_b32_e32 v2, v6 1235; GFX940-NEXT: v_mov_b32_e32 v3, v7 1236; GFX940-NEXT: v_mov_b32_e32 v4, v0 1237; GFX940-NEXT: v_mov_b32_e32 v5, v1 1238; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 1239; GFX940-NEXT: s_waitcnt vmcnt(0) 1240; GFX940-NEXT: s_setpc_b64 s[30:31] 1241 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1242 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1243 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 0> 1244 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1245 ret void 1246} 1247 1248define void @v_shuffle_v3i64_v3i64__5_u_0(ptr addrspace(1) inreg %ptr) { 1249; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_0: 1250; GFX900: ; %bb.0: 1251; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1252; GFX900-NEXT: v_mov_b32_e32 v8, 0 1253; GFX900-NEXT: ;;#ASMSTART 1254; GFX900-NEXT: ; def v[0:5] 1255; GFX900-NEXT: ;;#ASMEND 1256; GFX900-NEXT: ;;#ASMSTART 1257; GFX900-NEXT: ; def v[2:7] 1258; GFX900-NEXT: ;;#ASMEND 1259; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 1260; GFX900-NEXT: v_mov_b32_e32 v0, v6 1261; GFX900-NEXT: v_mov_b32_e32 v1, v7 1262; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 1263; GFX900-NEXT: s_waitcnt vmcnt(0) 1264; GFX900-NEXT: s_setpc_b64 s[30:31] 1265; 1266; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_0: 1267; GFX90A: ; %bb.0: 1268; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1269; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1270; GFX90A-NEXT: ;;#ASMSTART 1271; GFX90A-NEXT: ; def v[0:5] 1272; GFX90A-NEXT: ;;#ASMEND 1273; GFX90A-NEXT: ;;#ASMSTART 1274; GFX90A-NEXT: ; def v[2:7] 1275; GFX90A-NEXT: ;;#ASMEND 1276; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 1277; GFX90A-NEXT: v_mov_b32_e32 v0, v6 1278; GFX90A-NEXT: v_mov_b32_e32 v1, v7 1279; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 1280; GFX90A-NEXT: s_waitcnt vmcnt(0) 1281; GFX90A-NEXT: s_setpc_b64 s[30:31] 1282; 1283; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_0: 1284; GFX940: ; %bb.0: 1285; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1286; GFX940-NEXT: v_mov_b32_e32 v8, 0 1287; GFX940-NEXT: ;;#ASMSTART 1288; GFX940-NEXT: ; def v[0:5] 1289; GFX940-NEXT: ;;#ASMEND 1290; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 1291; GFX940-NEXT: ;;#ASMSTART 1292; GFX940-NEXT: ; def v[2:7] 1293; GFX940-NEXT: ;;#ASMEND 1294; GFX940-NEXT: s_nop 0 1295; GFX940-NEXT: v_mov_b32_e32 v0, v6 1296; GFX940-NEXT: v_mov_b32_e32 v1, v7 1297; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 1298; GFX940-NEXT: s_waitcnt vmcnt(0) 1299; GFX940-NEXT: s_setpc_b64 s[30:31] 1300 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1301 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1302 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 0> 1303 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1304 ret void 1305} 1306 1307define void @v_shuffle_v3i64_v3i64__5_1_0(ptr addrspace(1) inreg %ptr) { 1308; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_1_0: 1309; GFX900: ; %bb.0: 1310; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1311; GFX900-NEXT: v_mov_b32_e32 v10, 0 1312; GFX900-NEXT: ;;#ASMSTART 1313; GFX900-NEXT: ; def v[0:5] 1314; GFX900-NEXT: ;;#ASMEND 1315; GFX900-NEXT: ;;#ASMSTART 1316; GFX900-NEXT: ; def v[4:9] 1317; GFX900-NEXT: ;;#ASMEND 1318; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16 1319; GFX900-NEXT: v_mov_b32_e32 v0, v8 1320; GFX900-NEXT: v_mov_b32_e32 v1, v9 1321; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 1322; GFX900-NEXT: s_waitcnt vmcnt(0) 1323; GFX900-NEXT: s_setpc_b64 s[30:31] 1324; 1325; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_1_0: 1326; GFX90A: ; %bb.0: 1327; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1328; GFX90A-NEXT: v_mov_b32_e32 v10, 0 1329; GFX90A-NEXT: ;;#ASMSTART 1330; GFX90A-NEXT: ; def v[0:5] 1331; GFX90A-NEXT: ;;#ASMEND 1332; GFX90A-NEXT: ;;#ASMSTART 1333; GFX90A-NEXT: ; def v[4:9] 1334; GFX90A-NEXT: ;;#ASMEND 1335; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16 1336; GFX90A-NEXT: v_mov_b32_e32 v0, v8 1337; GFX90A-NEXT: v_mov_b32_e32 v1, v9 1338; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 1339; GFX90A-NEXT: s_waitcnt vmcnt(0) 1340; GFX90A-NEXT: s_setpc_b64 s[30:31] 1341; 1342; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_0: 1343; GFX940: ; %bb.0: 1344; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1345; GFX940-NEXT: v_mov_b32_e32 v10, 0 1346; GFX940-NEXT: ;;#ASMSTART 1347; GFX940-NEXT: ; def v[0:5] 1348; GFX940-NEXT: ;;#ASMEND 1349; GFX940-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16 sc0 sc1 1350; GFX940-NEXT: ;;#ASMSTART 1351; GFX940-NEXT: ; def v[4:9] 1352; GFX940-NEXT: ;;#ASMEND 1353; GFX940-NEXT: s_nop 0 1354; GFX940-NEXT: v_mov_b32_e32 v0, v8 1355; GFX940-NEXT: v_mov_b32_e32 v1, v9 1356; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 1357; GFX940-NEXT: s_waitcnt vmcnt(0) 1358; GFX940-NEXT: s_setpc_b64 s[30:31] 1359 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1360 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1361 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 0> 1362 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1363 ret void 1364} 1365 1366define void @v_shuffle_v3i64_v3i64__5_2_0(ptr addrspace(1) inreg %ptr) { 1367; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_2_0: 1368; GFX900: ; %bb.0: 1369; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1370; GFX900-NEXT: ;;#ASMSTART 1371; GFX900-NEXT: ; def v[0:5] 1372; GFX900-NEXT: ;;#ASMEND 1373; GFX900-NEXT: v_mov_b32_e32 v12, 0 1374; GFX900-NEXT: ;;#ASMSTART 1375; GFX900-NEXT: ; def v[6:11] 1376; GFX900-NEXT: ;;#ASMEND 1377; GFX900-NEXT: v_mov_b32_e32 v2, v10 1378; GFX900-NEXT: v_mov_b32_e32 v3, v11 1379; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16 1380; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 1381; GFX900-NEXT: s_waitcnt vmcnt(0) 1382; GFX900-NEXT: s_setpc_b64 s[30:31] 1383; 1384; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_2_0: 1385; GFX90A: ; %bb.0: 1386; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1387; GFX90A-NEXT: ;;#ASMSTART 1388; GFX90A-NEXT: ; def v[0:5] 1389; GFX90A-NEXT: ;;#ASMEND 1390; GFX90A-NEXT: v_mov_b32_e32 v12, 0 1391; GFX90A-NEXT: ;;#ASMSTART 1392; GFX90A-NEXT: ; def v[6:11] 1393; GFX90A-NEXT: ;;#ASMEND 1394; GFX90A-NEXT: v_mov_b32_e32 v2, v10 1395; GFX90A-NEXT: v_mov_b32_e32 v3, v11 1396; GFX90A-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16 1397; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 1398; GFX90A-NEXT: s_waitcnt vmcnt(0) 1399; GFX90A-NEXT: s_setpc_b64 s[30:31] 1400; 1401; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_0: 1402; GFX940: ; %bb.0: 1403; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1404; GFX940-NEXT: ;;#ASMSTART 1405; GFX940-NEXT: ; def v[0:5] 1406; GFX940-NEXT: ;;#ASMEND 1407; GFX940-NEXT: v_mov_b32_e32 v12, 0 1408; GFX940-NEXT: ;;#ASMSTART 1409; GFX940-NEXT: ; def v[6:11] 1410; GFX940-NEXT: ;;#ASMEND 1411; GFX940-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16 sc0 sc1 1412; GFX940-NEXT: v_mov_b32_e32 v2, v10 1413; GFX940-NEXT: v_mov_b32_e32 v3, v11 1414; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 1415; GFX940-NEXT: s_waitcnt vmcnt(0) 1416; GFX940-NEXT: s_setpc_b64 s[30:31] 1417 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1418 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1419 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 0> 1420 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1421 ret void 1422} 1423 1424define void @v_shuffle_v3i64_v3i64__5_3_0(ptr addrspace(1) inreg %ptr) { 1425; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_0: 1426; GFX900: ; %bb.0: 1427; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1428; GFX900-NEXT: v_mov_b32_e32 v8, 0 1429; GFX900-NEXT: ;;#ASMSTART 1430; GFX900-NEXT: ; def v[0:5] 1431; GFX900-NEXT: ;;#ASMEND 1432; GFX900-NEXT: ;;#ASMSTART 1433; GFX900-NEXT: ; def v[2:7] 1434; GFX900-NEXT: ;;#ASMEND 1435; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 1436; GFX900-NEXT: v_mov_b32_e32 v0, v6 1437; GFX900-NEXT: v_mov_b32_e32 v1, v7 1438; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 1439; GFX900-NEXT: s_waitcnt vmcnt(0) 1440; GFX900-NEXT: s_setpc_b64 s[30:31] 1441; 1442; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_0: 1443; GFX90A: ; %bb.0: 1444; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1445; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1446; GFX90A-NEXT: ;;#ASMSTART 1447; GFX90A-NEXT: ; def v[0:5] 1448; GFX90A-NEXT: ;;#ASMEND 1449; GFX90A-NEXT: ;;#ASMSTART 1450; GFX90A-NEXT: ; def v[2:7] 1451; GFX90A-NEXT: ;;#ASMEND 1452; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 1453; GFX90A-NEXT: v_mov_b32_e32 v0, v6 1454; GFX90A-NEXT: v_mov_b32_e32 v1, v7 1455; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 1456; GFX90A-NEXT: s_waitcnt vmcnt(0) 1457; GFX90A-NEXT: s_setpc_b64 s[30:31] 1458; 1459; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_0: 1460; GFX940: ; %bb.0: 1461; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1462; GFX940-NEXT: v_mov_b32_e32 v8, 0 1463; GFX940-NEXT: ;;#ASMSTART 1464; GFX940-NEXT: ; def v[0:5] 1465; GFX940-NEXT: ;;#ASMEND 1466; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 1467; GFX940-NEXT: ;;#ASMSTART 1468; GFX940-NEXT: ; def v[2:7] 1469; GFX940-NEXT: ;;#ASMEND 1470; GFX940-NEXT: s_nop 0 1471; GFX940-NEXT: v_mov_b32_e32 v0, v6 1472; GFX940-NEXT: v_mov_b32_e32 v1, v7 1473; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 1474; GFX940-NEXT: s_waitcnt vmcnt(0) 1475; GFX940-NEXT: s_setpc_b64 s[30:31] 1476 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1477 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1478 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 0> 1479 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1480 ret void 1481} 1482 1483define void @v_shuffle_v3i64_v3i64__5_4_0(ptr addrspace(1) inreg %ptr) { 1484; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_0: 1485; GFX900: ; %bb.0: 1486; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1487; GFX900-NEXT: ;;#ASMSTART 1488; GFX900-NEXT: ; def v[0:5] 1489; GFX900-NEXT: ;;#ASMEND 1490; GFX900-NEXT: ;;#ASMSTART 1491; GFX900-NEXT: ; def v[2:7] 1492; GFX900-NEXT: ;;#ASMEND 1493; GFX900-NEXT: v_mov_b32_e32 v8, 0 1494; GFX900-NEXT: v_mov_b32_e32 v2, v6 1495; GFX900-NEXT: v_mov_b32_e32 v3, v7 1496; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 1497; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 1498; GFX900-NEXT: s_waitcnt vmcnt(0) 1499; GFX900-NEXT: s_setpc_b64 s[30:31] 1500; 1501; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_0: 1502; GFX90A: ; %bb.0: 1503; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1504; GFX90A-NEXT: ;;#ASMSTART 1505; GFX90A-NEXT: ; def v[0:5] 1506; GFX90A-NEXT: ;;#ASMEND 1507; GFX90A-NEXT: ;;#ASMSTART 1508; GFX90A-NEXT: ; def v[2:7] 1509; GFX90A-NEXT: ;;#ASMEND 1510; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1511; GFX90A-NEXT: v_mov_b32_e32 v2, v6 1512; GFX90A-NEXT: v_mov_b32_e32 v3, v7 1513; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16 1514; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 1515; GFX90A-NEXT: s_waitcnt vmcnt(0) 1516; GFX90A-NEXT: s_setpc_b64 s[30:31] 1517; 1518; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_0: 1519; GFX940: ; %bb.0: 1520; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1521; GFX940-NEXT: ;;#ASMSTART 1522; GFX940-NEXT: ; def v[0:5] 1523; GFX940-NEXT: ;;#ASMEND 1524; GFX940-NEXT: v_mov_b32_e32 v8, 0 1525; GFX940-NEXT: ;;#ASMSTART 1526; GFX940-NEXT: ; def v[2:7] 1527; GFX940-NEXT: ;;#ASMEND 1528; GFX940-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16 sc0 sc1 1529; GFX940-NEXT: v_mov_b32_e32 v2, v6 1530; GFX940-NEXT: v_mov_b32_e32 v3, v7 1531; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 1532; GFX940-NEXT: s_waitcnt vmcnt(0) 1533; GFX940-NEXT: s_setpc_b64 s[30:31] 1534 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1535 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1536 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 0> 1537 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1538 ret void 1539} 1540 1541define void @v_shuffle_v3i64_v3i64__u_1_1(ptr addrspace(1) inreg %ptr) { 1542; GFX900-LABEL: v_shuffle_v3i64_v3i64__u_1_1: 1543; GFX900: ; %bb.0: 1544; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1545; GFX900-NEXT: v_mov_b32_e32 v6, 0 1546; GFX900-NEXT: ;;#ASMSTART 1547; GFX900-NEXT: ; def v[0:5] 1548; GFX900-NEXT: ;;#ASMEND 1549; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 1550; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1551; GFX900-NEXT: s_waitcnt vmcnt(0) 1552; GFX900-NEXT: s_setpc_b64 s[30:31] 1553; 1554; GFX90A-LABEL: v_shuffle_v3i64_v3i64__u_1_1: 1555; GFX90A: ; %bb.0: 1556; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1557; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1558; GFX90A-NEXT: ;;#ASMSTART 1559; GFX90A-NEXT: ; def v[0:5] 1560; GFX90A-NEXT: ;;#ASMEND 1561; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 1562; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1563; GFX90A-NEXT: s_waitcnt vmcnt(0) 1564; GFX90A-NEXT: s_setpc_b64 s[30:31] 1565; 1566; GFX940-LABEL: v_shuffle_v3i64_v3i64__u_1_1: 1567; GFX940: ; %bb.0: 1568; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1569; GFX940-NEXT: v_mov_b32_e32 v6, 0 1570; GFX940-NEXT: ;;#ASMSTART 1571; GFX940-NEXT: ; def v[0:5] 1572; GFX940-NEXT: ;;#ASMEND 1573; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 1574; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 1575; GFX940-NEXT: s_waitcnt vmcnt(0) 1576; GFX940-NEXT: s_setpc_b64 s[30:31] 1577 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1578 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1> 1579 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1580 ret void 1581} 1582 1583define void @v_shuffle_v3i64_v3i64__0_1_1(ptr addrspace(1) inreg %ptr) { 1584; GFX900-LABEL: v_shuffle_v3i64_v3i64__0_1_1: 1585; GFX900: ; %bb.0: 1586; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1587; GFX900-NEXT: v_mov_b32_e32 v6, 0 1588; GFX900-NEXT: ;;#ASMSTART 1589; GFX900-NEXT: ; def v[0:5] 1590; GFX900-NEXT: ;;#ASMEND 1591; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 1592; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1593; GFX900-NEXT: s_waitcnt vmcnt(0) 1594; GFX900-NEXT: s_setpc_b64 s[30:31] 1595; 1596; GFX90A-LABEL: v_shuffle_v3i64_v3i64__0_1_1: 1597; GFX90A: ; %bb.0: 1598; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1599; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1600; GFX90A-NEXT: ;;#ASMSTART 1601; GFX90A-NEXT: ; def v[0:5] 1602; GFX90A-NEXT: ;;#ASMEND 1603; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 1604; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1605; GFX90A-NEXT: s_waitcnt vmcnt(0) 1606; GFX90A-NEXT: s_setpc_b64 s[30:31] 1607; 1608; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_1_1: 1609; GFX940: ; %bb.0: 1610; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1611; GFX940-NEXT: v_mov_b32_e32 v6, 0 1612; GFX940-NEXT: ;;#ASMSTART 1613; GFX940-NEXT: ; def v[0:5] 1614; GFX940-NEXT: ;;#ASMEND 1615; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 1616; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 1617; GFX940-NEXT: s_waitcnt vmcnt(0) 1618; GFX940-NEXT: s_setpc_b64 s[30:31] 1619 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1620 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1> 1621 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1622 ret void 1623} 1624 1625define void @v_shuffle_v3i64_v3i64__1_1_1(ptr addrspace(1) inreg %ptr) { 1626; GFX900-LABEL: v_shuffle_v3i64_v3i64__1_1_1: 1627; GFX900: ; %bb.0: 1628; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1629; GFX900-NEXT: ;;#ASMSTART 1630; GFX900-NEXT: ; def v[0:5] 1631; GFX900-NEXT: ;;#ASMEND 1632; GFX900-NEXT: v_mov_b32_e32 v6, 0 1633; GFX900-NEXT: v_mov_b32_e32 v4, v2 1634; GFX900-NEXT: v_mov_b32_e32 v5, v3 1635; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 1636; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 1637; GFX900-NEXT: s_waitcnt vmcnt(0) 1638; GFX900-NEXT: s_setpc_b64 s[30:31] 1639; 1640; GFX90A-LABEL: v_shuffle_v3i64_v3i64__1_1_1: 1641; GFX90A: ; %bb.0: 1642; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1643; GFX90A-NEXT: ;;#ASMSTART 1644; GFX90A-NEXT: ; def v[0:5] 1645; GFX90A-NEXT: ;;#ASMEND 1646; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1647; GFX90A-NEXT: v_mov_b32_e32 v4, v2 1648; GFX90A-NEXT: v_mov_b32_e32 v5, v3 1649; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 1650; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 1651; GFX90A-NEXT: s_waitcnt vmcnt(0) 1652; GFX90A-NEXT: s_setpc_b64 s[30:31] 1653; 1654; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_1_1: 1655; GFX940: ; %bb.0: 1656; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1657; GFX940-NEXT: ;;#ASMSTART 1658; GFX940-NEXT: ; def v[0:5] 1659; GFX940-NEXT: ;;#ASMEND 1660; GFX940-NEXT: v_mov_b32_e32 v6, 0 1661; GFX940-NEXT: v_mov_b32_e32 v4, v2 1662; GFX940-NEXT: v_mov_b32_e32 v5, v3 1663; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 1664; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 1665; GFX940-NEXT: s_waitcnt vmcnt(0) 1666; GFX940-NEXT: s_setpc_b64 s[30:31] 1667 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1668 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1> 1669 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1670 ret void 1671} 1672 1673define void @v_shuffle_v3i64_v3i64__2_1_1(ptr addrspace(1) inreg %ptr) { 1674; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_1_1: 1675; GFX900: ; %bb.0: 1676; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1677; GFX900-NEXT: ;;#ASMSTART 1678; GFX900-NEXT: ; def v[0:5] 1679; GFX900-NEXT: ;;#ASMEND 1680; GFX900-NEXT: v_mov_b32_e32 v6, 0 1681; GFX900-NEXT: v_mov_b32_e32 v0, v4 1682; GFX900-NEXT: v_mov_b32_e32 v1, v5 1683; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 1684; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1685; GFX900-NEXT: s_waitcnt vmcnt(0) 1686; GFX900-NEXT: s_setpc_b64 s[30:31] 1687; 1688; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_1_1: 1689; GFX90A: ; %bb.0: 1690; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1691; GFX90A-NEXT: ;;#ASMSTART 1692; GFX90A-NEXT: ; def v[0:5] 1693; GFX90A-NEXT: ;;#ASMEND 1694; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1695; GFX90A-NEXT: v_mov_b32_e32 v0, v4 1696; GFX90A-NEXT: v_mov_b32_e32 v1, v5 1697; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 1698; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1699; GFX90A-NEXT: s_waitcnt vmcnt(0) 1700; GFX90A-NEXT: s_setpc_b64 s[30:31] 1701; 1702; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_1_1: 1703; GFX940: ; %bb.0: 1704; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1705; GFX940-NEXT: ;;#ASMSTART 1706; GFX940-NEXT: ; def v[0:5] 1707; GFX940-NEXT: ;;#ASMEND 1708; GFX940-NEXT: v_mov_b32_e32 v6, 0 1709; GFX940-NEXT: v_mov_b32_e32 v0, v4 1710; GFX940-NEXT: v_mov_b32_e32 v1, v5 1711; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 1712; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 1713; GFX940-NEXT: s_waitcnt vmcnt(0) 1714; GFX940-NEXT: s_setpc_b64 s[30:31] 1715 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1716 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1> 1717 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1718 ret void 1719} 1720 1721define void @v_shuffle_v3i64_v3i64__3_1_1(ptr addrspace(1) inreg %ptr) { 1722; GFX900-LABEL: v_shuffle_v3i64_v3i64__3_1_1: 1723; GFX900: ; %bb.0: 1724; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1725; GFX900-NEXT: v_mov_b32_e32 v6, 0 1726; GFX900-NEXT: ;;#ASMSTART 1727; GFX900-NEXT: ; def v[0:5] 1728; GFX900-NEXT: ;;#ASMEND 1729; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 1730; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1731; GFX900-NEXT: s_waitcnt vmcnt(0) 1732; GFX900-NEXT: s_setpc_b64 s[30:31] 1733; 1734; GFX90A-LABEL: v_shuffle_v3i64_v3i64__3_1_1: 1735; GFX90A: ; %bb.0: 1736; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1737; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1738; GFX90A-NEXT: ;;#ASMSTART 1739; GFX90A-NEXT: ; def v[0:5] 1740; GFX90A-NEXT: ;;#ASMEND 1741; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 1742; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1743; GFX90A-NEXT: s_waitcnt vmcnt(0) 1744; GFX90A-NEXT: s_setpc_b64 s[30:31] 1745; 1746; GFX940-LABEL: v_shuffle_v3i64_v3i64__3_1_1: 1747; GFX940: ; %bb.0: 1748; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1749; GFX940-NEXT: v_mov_b32_e32 v6, 0 1750; GFX940-NEXT: ;;#ASMSTART 1751; GFX940-NEXT: ; def v[0:5] 1752; GFX940-NEXT: ;;#ASMEND 1753; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 1754; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 1755; GFX940-NEXT: s_waitcnt vmcnt(0) 1756; GFX940-NEXT: s_setpc_b64 s[30:31] 1757 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1758 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1> 1759 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1760 ret void 1761} 1762 1763define void @v_shuffle_v3i64_v3i64__4_1_1(ptr addrspace(1) inreg %ptr) { 1764; GFX900-LABEL: v_shuffle_v3i64_v3i64__4_1_1: 1765; GFX900: ; %bb.0: 1766; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1767; GFX900-NEXT: ;;#ASMSTART 1768; GFX900-NEXT: ; def v[0:5] 1769; GFX900-NEXT: ;;#ASMEND 1770; GFX900-NEXT: ;;#ASMSTART 1771; GFX900-NEXT: ; def v[4:9] 1772; GFX900-NEXT: ;;#ASMEND 1773; GFX900-NEXT: v_mov_b32_e32 v10, 0 1774; GFX900-NEXT: v_mov_b32_e32 v8, v2 1775; GFX900-NEXT: v_mov_b32_e32 v9, v3 1776; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 1777; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 1778; GFX900-NEXT: s_waitcnt vmcnt(0) 1779; GFX900-NEXT: s_setpc_b64 s[30:31] 1780; 1781; GFX90A-LABEL: v_shuffle_v3i64_v3i64__4_1_1: 1782; GFX90A: ; %bb.0: 1783; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1784; GFX90A-NEXT: ;;#ASMSTART 1785; GFX90A-NEXT: ; def v[0:5] 1786; GFX90A-NEXT: ;;#ASMEND 1787; GFX90A-NEXT: ;;#ASMSTART 1788; GFX90A-NEXT: ; def v[4:9] 1789; GFX90A-NEXT: ;;#ASMEND 1790; GFX90A-NEXT: v_mov_b32_e32 v10, 0 1791; GFX90A-NEXT: v_mov_b32_e32 v8, v2 1792; GFX90A-NEXT: v_mov_b32_e32 v9, v3 1793; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 1794; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] 1795; GFX90A-NEXT: s_waitcnt vmcnt(0) 1796; GFX90A-NEXT: s_setpc_b64 s[30:31] 1797; 1798; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_1_1: 1799; GFX940: ; %bb.0: 1800; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1801; GFX940-NEXT: ;;#ASMSTART 1802; GFX940-NEXT: ; def v[0:5] 1803; GFX940-NEXT: ;;#ASMEND 1804; GFX940-NEXT: v_mov_b32_e32 v10, 0 1805; GFX940-NEXT: ;;#ASMSTART 1806; GFX940-NEXT: ; def v[4:9] 1807; GFX940-NEXT: ;;#ASMEND 1808; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 1809; GFX940-NEXT: v_mov_b32_e32 v8, v2 1810; GFX940-NEXT: v_mov_b32_e32 v9, v3 1811; GFX940-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1 1812; GFX940-NEXT: s_waitcnt vmcnt(0) 1813; GFX940-NEXT: s_setpc_b64 s[30:31] 1814 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1815 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1816 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 1, i32 1> 1817 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1818 ret void 1819} 1820 1821define void @v_shuffle_v3i64_v3i64__5_1_1(ptr addrspace(1) inreg %ptr) { 1822; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_1_1: 1823; GFX900: ; %bb.0: 1824; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1825; GFX900-NEXT: ;;#ASMSTART 1826; GFX900-NEXT: ; def v[0:5] 1827; GFX900-NEXT: ;;#ASMEND 1828; GFX900-NEXT: v_mov_b32_e32 v10, 0 1829; GFX900-NEXT: ;;#ASMSTART 1830; GFX900-NEXT: ; def v[4:9] 1831; GFX900-NEXT: ;;#ASMEND 1832; GFX900-NEXT: v_mov_b32_e32 v0, v8 1833; GFX900-NEXT: v_mov_b32_e32 v1, v9 1834; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 1835; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 1836; GFX900-NEXT: s_waitcnt vmcnt(0) 1837; GFX900-NEXT: s_setpc_b64 s[30:31] 1838; 1839; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_1_1: 1840; GFX90A: ; %bb.0: 1841; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1842; GFX90A-NEXT: ;;#ASMSTART 1843; GFX90A-NEXT: ; def v[0:5] 1844; GFX90A-NEXT: ;;#ASMEND 1845; GFX90A-NEXT: v_mov_b32_e32 v10, 0 1846; GFX90A-NEXT: ;;#ASMSTART 1847; GFX90A-NEXT: ; def v[4:9] 1848; GFX90A-NEXT: ;;#ASMEND 1849; GFX90A-NEXT: v_mov_b32_e32 v0, v8 1850; GFX90A-NEXT: v_mov_b32_e32 v1, v9 1851; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 1852; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 1853; GFX90A-NEXT: s_waitcnt vmcnt(0) 1854; GFX90A-NEXT: s_setpc_b64 s[30:31] 1855; 1856; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_1: 1857; GFX940: ; %bb.0: 1858; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1859; GFX940-NEXT: ;;#ASMSTART 1860; GFX940-NEXT: ; def v[0:5] 1861; GFX940-NEXT: ;;#ASMEND 1862; GFX940-NEXT: v_mov_b32_e32 v10, 0 1863; GFX940-NEXT: ;;#ASMSTART 1864; GFX940-NEXT: ; def v[4:9] 1865; GFX940-NEXT: ;;#ASMEND 1866; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 1867; GFX940-NEXT: v_mov_b32_e32 v0, v8 1868; GFX940-NEXT: v_mov_b32_e32 v1, v9 1869; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 1870; GFX940-NEXT: s_waitcnt vmcnt(0) 1871; GFX940-NEXT: s_setpc_b64 s[30:31] 1872 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1873 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1874 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 1> 1875 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1876 ret void 1877} 1878 1879define void @v_shuffle_v3i64_v3i64__5_u_1(ptr addrspace(1) inreg %ptr) { 1880; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_1: 1881; GFX900: ; %bb.0: 1882; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1883; GFX900-NEXT: ;;#ASMSTART 1884; GFX900-NEXT: ; def v[0:5] 1885; GFX900-NEXT: ;;#ASMEND 1886; GFX900-NEXT: v_mov_b32_e32 v10, 0 1887; GFX900-NEXT: ;;#ASMSTART 1888; GFX900-NEXT: ; def v[4:9] 1889; GFX900-NEXT: ;;#ASMEND 1890; GFX900-NEXT: v_mov_b32_e32 v0, v8 1891; GFX900-NEXT: v_mov_b32_e32 v1, v9 1892; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 1893; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 1894; GFX900-NEXT: s_waitcnt vmcnt(0) 1895; GFX900-NEXT: s_setpc_b64 s[30:31] 1896; 1897; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_1: 1898; GFX90A: ; %bb.0: 1899; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1900; GFX90A-NEXT: ;;#ASMSTART 1901; GFX90A-NEXT: ; def v[0:5] 1902; GFX90A-NEXT: ;;#ASMEND 1903; GFX90A-NEXT: v_mov_b32_e32 v10, 0 1904; GFX90A-NEXT: ;;#ASMSTART 1905; GFX90A-NEXT: ; def v[4:9] 1906; GFX90A-NEXT: ;;#ASMEND 1907; GFX90A-NEXT: v_mov_b32_e32 v0, v8 1908; GFX90A-NEXT: v_mov_b32_e32 v1, v9 1909; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 1910; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 1911; GFX90A-NEXT: s_waitcnt vmcnt(0) 1912; GFX90A-NEXT: s_setpc_b64 s[30:31] 1913; 1914; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_1: 1915; GFX940: ; %bb.0: 1916; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1917; GFX940-NEXT: ;;#ASMSTART 1918; GFX940-NEXT: ; def v[0:5] 1919; GFX940-NEXT: ;;#ASMEND 1920; GFX940-NEXT: v_mov_b32_e32 v10, 0 1921; GFX940-NEXT: ;;#ASMSTART 1922; GFX940-NEXT: ; def v[4:9] 1923; GFX940-NEXT: ;;#ASMEND 1924; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 1925; GFX940-NEXT: v_mov_b32_e32 v0, v8 1926; GFX940-NEXT: v_mov_b32_e32 v1, v9 1927; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 1928; GFX940-NEXT: s_waitcnt vmcnt(0) 1929; GFX940-NEXT: s_setpc_b64 s[30:31] 1930 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1931 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1932 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 1> 1933 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1934 ret void 1935} 1936 1937define void @v_shuffle_v3i64_v3i64__5_0_1(ptr addrspace(1) inreg %ptr) { 1938; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_1: 1939; GFX900: ; %bb.0: 1940; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1941; GFX900-NEXT: ;;#ASMSTART 1942; GFX900-NEXT: ; def v[0:5] 1943; GFX900-NEXT: ;;#ASMEND 1944; GFX900-NEXT: v_mov_b32_e32 v10, 0 1945; GFX900-NEXT: ;;#ASMSTART 1946; GFX900-NEXT: ; def v[4:9] 1947; GFX900-NEXT: ;;#ASMEND 1948; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 1949; GFX900-NEXT: v_mov_b32_e32 v2, v8 1950; GFX900-NEXT: v_mov_b32_e32 v3, v9 1951; GFX900-NEXT: v_mov_b32_e32 v4, v0 1952; GFX900-NEXT: v_mov_b32_e32 v5, v1 1953; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 1954; GFX900-NEXT: s_waitcnt vmcnt(0) 1955; GFX900-NEXT: s_setpc_b64 s[30:31] 1956; 1957; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_1: 1958; GFX90A: ; %bb.0: 1959; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1960; GFX90A-NEXT: ;;#ASMSTART 1961; GFX90A-NEXT: ; def v[0:5] 1962; GFX90A-NEXT: ;;#ASMEND 1963; GFX90A-NEXT: v_mov_b32_e32 v10, 0 1964; GFX90A-NEXT: ;;#ASMSTART 1965; GFX90A-NEXT: ; def v[4:9] 1966; GFX90A-NEXT: ;;#ASMEND 1967; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 1968; GFX90A-NEXT: v_mov_b32_e32 v2, v8 1969; GFX90A-NEXT: v_mov_b32_e32 v3, v9 1970; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1971; GFX90A-NEXT: v_mov_b32_e32 v5, v1 1972; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 1973; GFX90A-NEXT: s_waitcnt vmcnt(0) 1974; GFX90A-NEXT: s_setpc_b64 s[30:31] 1975; 1976; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_1: 1977; GFX940: ; %bb.0: 1978; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1979; GFX940-NEXT: ;;#ASMSTART 1980; GFX940-NEXT: ; def v[0:5] 1981; GFX940-NEXT: ;;#ASMEND 1982; GFX940-NEXT: v_mov_b32_e32 v10, 0 1983; GFX940-NEXT: ;;#ASMSTART 1984; GFX940-NEXT: ; def v[4:9] 1985; GFX940-NEXT: ;;#ASMEND 1986; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 1987; GFX940-NEXT: v_mov_b32_e32 v2, v8 1988; GFX940-NEXT: v_mov_b32_e32 v3, v9 1989; GFX940-NEXT: v_mov_b32_e32 v4, v0 1990; GFX940-NEXT: v_mov_b32_e32 v5, v1 1991; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 1992; GFX940-NEXT: s_waitcnt vmcnt(0) 1993; GFX940-NEXT: s_setpc_b64 s[30:31] 1994 %vec0 = call <3 x i64> asm "; def $0", "=v"() 1995 %vec1 = call <3 x i64> asm "; def $0", "=v"() 1996 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 1> 1997 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 1998 ret void 1999} 2000 2001define void @v_shuffle_v3i64_v3i64__5_2_1(ptr addrspace(1) inreg %ptr) { 2002; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_2_1: 2003; GFX900: ; %bb.0: 2004; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2005; GFX900-NEXT: v_mov_b32_e32 v12, 0 2006; GFX900-NEXT: ;;#ASMSTART 2007; GFX900-NEXT: ; def v[0:5] 2008; GFX900-NEXT: ;;#ASMEND 2009; GFX900-NEXT: ;;#ASMSTART 2010; GFX900-NEXT: ; def v[6:11] 2011; GFX900-NEXT: ;;#ASMEND 2012; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16 2013; GFX900-NEXT: v_mov_b32_e32 v2, v10 2014; GFX900-NEXT: v_mov_b32_e32 v3, v11 2015; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 2016; GFX900-NEXT: s_waitcnt vmcnt(0) 2017; GFX900-NEXT: s_setpc_b64 s[30:31] 2018; 2019; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_2_1: 2020; GFX90A: ; %bb.0: 2021; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2022; GFX90A-NEXT: v_mov_b32_e32 v12, 0 2023; GFX90A-NEXT: ;;#ASMSTART 2024; GFX90A-NEXT: ; def v[0:5] 2025; GFX90A-NEXT: ;;#ASMEND 2026; GFX90A-NEXT: ;;#ASMSTART 2027; GFX90A-NEXT: ; def v[6:11] 2028; GFX90A-NEXT: ;;#ASMEND 2029; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16 2030; GFX90A-NEXT: v_mov_b32_e32 v2, v10 2031; GFX90A-NEXT: v_mov_b32_e32 v3, v11 2032; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 2033; GFX90A-NEXT: s_waitcnt vmcnt(0) 2034; GFX90A-NEXT: s_setpc_b64 s[30:31] 2035; 2036; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_1: 2037; GFX940: ; %bb.0: 2038; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2039; GFX940-NEXT: v_mov_b32_e32 v12, 0 2040; GFX940-NEXT: ;;#ASMSTART 2041; GFX940-NEXT: ; def v[0:5] 2042; GFX940-NEXT: ;;#ASMEND 2043; GFX940-NEXT: ;;#ASMSTART 2044; GFX940-NEXT: ; def v[6:11] 2045; GFX940-NEXT: ;;#ASMEND 2046; GFX940-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16 sc0 sc1 2047; GFX940-NEXT: v_mov_b32_e32 v2, v10 2048; GFX940-NEXT: v_mov_b32_e32 v3, v11 2049; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 2050; GFX940-NEXT: s_waitcnt vmcnt(0) 2051; GFX940-NEXT: s_setpc_b64 s[30:31] 2052 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2053 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2054 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 1> 2055 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2056 ret void 2057} 2058 2059define void @v_shuffle_v3i64_v3i64__5_3_1(ptr addrspace(1) inreg %ptr) { 2060; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_1: 2061; GFX900: ; %bb.0: 2062; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2063; GFX900-NEXT: v_mov_b32_e32 v10, 0 2064; GFX900-NEXT: ;;#ASMSTART 2065; GFX900-NEXT: ; def v[0:5] 2066; GFX900-NEXT: ;;#ASMEND 2067; GFX900-NEXT: ;;#ASMSTART 2068; GFX900-NEXT: ; def v[4:9] 2069; GFX900-NEXT: ;;#ASMEND 2070; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 2071; GFX900-NEXT: v_mov_b32_e32 v0, v8 2072; GFX900-NEXT: v_mov_b32_e32 v1, v9 2073; GFX900-NEXT: v_mov_b32_e32 v2, v4 2074; GFX900-NEXT: v_mov_b32_e32 v3, v5 2075; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 2076; GFX900-NEXT: s_waitcnt vmcnt(0) 2077; GFX900-NEXT: s_setpc_b64 s[30:31] 2078; 2079; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_1: 2080; GFX90A: ; %bb.0: 2081; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2082; GFX90A-NEXT: v_mov_b32_e32 v10, 0 2083; GFX90A-NEXT: ;;#ASMSTART 2084; GFX90A-NEXT: ; def v[0:5] 2085; GFX90A-NEXT: ;;#ASMEND 2086; GFX90A-NEXT: ;;#ASMSTART 2087; GFX90A-NEXT: ; def v[4:9] 2088; GFX90A-NEXT: ;;#ASMEND 2089; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 2090; GFX90A-NEXT: v_mov_b32_e32 v0, v8 2091; GFX90A-NEXT: v_mov_b32_e32 v1, v9 2092; GFX90A-NEXT: v_mov_b32_e32 v2, v4 2093; GFX90A-NEXT: v_mov_b32_e32 v3, v5 2094; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 2095; GFX90A-NEXT: s_waitcnt vmcnt(0) 2096; GFX90A-NEXT: s_setpc_b64 s[30:31] 2097; 2098; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_1: 2099; GFX940: ; %bb.0: 2100; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2101; GFX940-NEXT: v_mov_b32_e32 v10, 0 2102; GFX940-NEXT: ;;#ASMSTART 2103; GFX940-NEXT: ; def v[0:5] 2104; GFX940-NEXT: ;;#ASMEND 2105; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 2106; GFX940-NEXT: ;;#ASMSTART 2107; GFX940-NEXT: ; def v[4:9] 2108; GFX940-NEXT: ;;#ASMEND 2109; GFX940-NEXT: s_nop 0 2110; GFX940-NEXT: v_mov_b32_e32 v0, v8 2111; GFX940-NEXT: v_mov_b32_e32 v1, v9 2112; GFX940-NEXT: v_mov_b32_e32 v2, v4 2113; GFX940-NEXT: v_mov_b32_e32 v3, v5 2114; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 2115; GFX940-NEXT: s_waitcnt vmcnt(0) 2116; GFX940-NEXT: s_setpc_b64 s[30:31] 2117 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2118 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2119 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 1> 2120 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2121 ret void 2122} 2123 2124define void @v_shuffle_v3i64_v3i64__5_4_1(ptr addrspace(1) inreg %ptr) { 2125; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_1: 2126; GFX900: ; %bb.0: 2127; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2128; GFX900-NEXT: ;;#ASMSTART 2129; GFX900-NEXT: ; def v[0:5] 2130; GFX900-NEXT: ;;#ASMEND 2131; GFX900-NEXT: ;;#ASMSTART 2132; GFX900-NEXT: ; def v[4:9] 2133; GFX900-NEXT: ;;#ASMEND 2134; GFX900-NEXT: v_mov_b32_e32 v10, 0 2135; GFX900-NEXT: v_mov_b32_e32 v4, v8 2136; GFX900-NEXT: v_mov_b32_e32 v5, v9 2137; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 2138; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] 2139; GFX900-NEXT: s_waitcnt vmcnt(0) 2140; GFX900-NEXT: s_setpc_b64 s[30:31] 2141; 2142; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_1: 2143; GFX90A: ; %bb.0: 2144; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2145; GFX90A-NEXT: ;;#ASMSTART 2146; GFX90A-NEXT: ; def v[0:5] 2147; GFX90A-NEXT: ;;#ASMEND 2148; GFX90A-NEXT: ;;#ASMSTART 2149; GFX90A-NEXT: ; def v[4:9] 2150; GFX90A-NEXT: ;;#ASMEND 2151; GFX90A-NEXT: v_mov_b32_e32 v10, 0 2152; GFX90A-NEXT: v_mov_b32_e32 v4, v8 2153; GFX90A-NEXT: v_mov_b32_e32 v5, v9 2154; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16 2155; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] 2156; GFX90A-NEXT: s_waitcnt vmcnt(0) 2157; GFX90A-NEXT: s_setpc_b64 s[30:31] 2158; 2159; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_1: 2160; GFX940: ; %bb.0: 2161; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2162; GFX940-NEXT: ;;#ASMSTART 2163; GFX940-NEXT: ; def v[0:5] 2164; GFX940-NEXT: ;;#ASMEND 2165; GFX940-NEXT: v_mov_b32_e32 v10, 0 2166; GFX940-NEXT: ;;#ASMSTART 2167; GFX940-NEXT: ; def v[4:9] 2168; GFX940-NEXT: ;;#ASMEND 2169; GFX940-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16 sc0 sc1 2170; GFX940-NEXT: v_mov_b32_e32 v4, v8 2171; GFX940-NEXT: v_mov_b32_e32 v5, v9 2172; GFX940-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1 2173; GFX940-NEXT: s_waitcnt vmcnt(0) 2174; GFX940-NEXT: s_setpc_b64 s[30:31] 2175 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2176 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2177 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 1> 2178 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2179 ret void 2180} 2181 2182define void @v_shuffle_v3i64_v3i64__u_2_2(ptr addrspace(1) inreg %ptr) { 2183; GFX900-LABEL: v_shuffle_v3i64_v3i64__u_2_2: 2184; GFX900: ; %bb.0: 2185; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2186; GFX900-NEXT: v_mov_b32_e32 v6, 0 2187; GFX900-NEXT: ;;#ASMSTART 2188; GFX900-NEXT: ; def v[0:5] 2189; GFX900-NEXT: ;;#ASMEND 2190; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2191; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2192; GFX900-NEXT: s_waitcnt vmcnt(0) 2193; GFX900-NEXT: s_setpc_b64 s[30:31] 2194; 2195; GFX90A-LABEL: v_shuffle_v3i64_v3i64__u_2_2: 2196; GFX90A: ; %bb.0: 2197; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2198; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2199; GFX90A-NEXT: ;;#ASMSTART 2200; GFX90A-NEXT: ; def v[0:5] 2201; GFX90A-NEXT: ;;#ASMEND 2202; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2203; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2204; GFX90A-NEXT: s_waitcnt vmcnt(0) 2205; GFX90A-NEXT: s_setpc_b64 s[30:31] 2206; 2207; GFX940-LABEL: v_shuffle_v3i64_v3i64__u_2_2: 2208; GFX940: ; %bb.0: 2209; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2210; GFX940-NEXT: v_mov_b32_e32 v6, 0 2211; GFX940-NEXT: ;;#ASMSTART 2212; GFX940-NEXT: ; def v[0:5] 2213; GFX940-NEXT: ;;#ASMEND 2214; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 2215; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 2216; GFX940-NEXT: s_waitcnt vmcnt(0) 2217; GFX940-NEXT: s_setpc_b64 s[30:31] 2218 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2219 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2> 2220 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2221 ret void 2222} 2223 2224define void @v_shuffle_v3i64_v3i64__0_2_2(ptr addrspace(1) inreg %ptr) { 2225; GFX900-LABEL: v_shuffle_v3i64_v3i64__0_2_2: 2226; GFX900: ; %bb.0: 2227; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2228; GFX900-NEXT: ;;#ASMSTART 2229; GFX900-NEXT: ; def v[0:5] 2230; GFX900-NEXT: ;;#ASMEND 2231; GFX900-NEXT: v_mov_b32_e32 v6, 0 2232; GFX900-NEXT: v_mov_b32_e32 v2, v4 2233; GFX900-NEXT: v_mov_b32_e32 v3, v5 2234; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2235; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2236; GFX900-NEXT: s_waitcnt vmcnt(0) 2237; GFX900-NEXT: s_setpc_b64 s[30:31] 2238; 2239; GFX90A-LABEL: v_shuffle_v3i64_v3i64__0_2_2: 2240; GFX90A: ; %bb.0: 2241; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2242; GFX90A-NEXT: ;;#ASMSTART 2243; GFX90A-NEXT: ; def v[0:5] 2244; GFX90A-NEXT: ;;#ASMEND 2245; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2246; GFX90A-NEXT: v_mov_b32_e32 v2, v4 2247; GFX90A-NEXT: v_mov_b32_e32 v3, v5 2248; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2249; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2250; GFX90A-NEXT: s_waitcnt vmcnt(0) 2251; GFX90A-NEXT: s_setpc_b64 s[30:31] 2252; 2253; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_2_2: 2254; GFX940: ; %bb.0: 2255; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2256; GFX940-NEXT: ;;#ASMSTART 2257; GFX940-NEXT: ; def v[0:5] 2258; GFX940-NEXT: ;;#ASMEND 2259; GFX940-NEXT: v_mov_b32_e32 v6, 0 2260; GFX940-NEXT: v_mov_b32_e32 v2, v4 2261; GFX940-NEXT: v_mov_b32_e32 v3, v5 2262; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 2263; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 2264; GFX940-NEXT: s_waitcnt vmcnt(0) 2265; GFX940-NEXT: s_setpc_b64 s[30:31] 2266 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2267 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2> 2268 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2269 ret void 2270} 2271 2272define void @v_shuffle_v3i64_v3i64__1_2_2(ptr addrspace(1) inreg %ptr) { 2273; GFX900-LABEL: v_shuffle_v3i64_v3i64__1_2_2: 2274; GFX900: ; %bb.0: 2275; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2276; GFX900-NEXT: v_mov_b32_e32 v6, 0 2277; GFX900-NEXT: ;;#ASMSTART 2278; GFX900-NEXT: ; def v[0:5] 2279; GFX900-NEXT: ;;#ASMEND 2280; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2281; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2282; GFX900-NEXT: s_waitcnt vmcnt(0) 2283; GFX900-NEXT: s_setpc_b64 s[30:31] 2284; 2285; GFX90A-LABEL: v_shuffle_v3i64_v3i64__1_2_2: 2286; GFX90A: ; %bb.0: 2287; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2288; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2289; GFX90A-NEXT: ;;#ASMSTART 2290; GFX90A-NEXT: ; def v[0:5] 2291; GFX90A-NEXT: ;;#ASMEND 2292; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2293; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2294; GFX90A-NEXT: s_waitcnt vmcnt(0) 2295; GFX90A-NEXT: s_setpc_b64 s[30:31] 2296; 2297; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_2_2: 2298; GFX940: ; %bb.0: 2299; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2300; GFX940-NEXT: v_mov_b32_e32 v6, 0 2301; GFX940-NEXT: ;;#ASMSTART 2302; GFX940-NEXT: ; def v[0:5] 2303; GFX940-NEXT: ;;#ASMEND 2304; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 2305; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 2306; GFX940-NEXT: s_waitcnt vmcnt(0) 2307; GFX940-NEXT: s_setpc_b64 s[30:31] 2308 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2309 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2> 2310 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2311 ret void 2312} 2313 2314define void @v_shuffle_v3i64_v3i64__2_2_2(ptr addrspace(1) inreg %ptr) { 2315; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_2_2: 2316; GFX900: ; %bb.0: 2317; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2318; GFX900-NEXT: ;;#ASMSTART 2319; GFX900-NEXT: ; def v[0:5] 2320; GFX900-NEXT: ;;#ASMEND 2321; GFX900-NEXT: v_mov_b32_e32 v6, 0 2322; GFX900-NEXT: v_mov_b32_e32 v2, v4 2323; GFX900-NEXT: v_mov_b32_e32 v3, v5 2324; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2325; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2326; GFX900-NEXT: s_waitcnt vmcnt(0) 2327; GFX900-NEXT: s_setpc_b64 s[30:31] 2328; 2329; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_2_2: 2330; GFX90A: ; %bb.0: 2331; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2332; GFX90A-NEXT: ;;#ASMSTART 2333; GFX90A-NEXT: ; def v[0:5] 2334; GFX90A-NEXT: ;;#ASMEND 2335; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2336; GFX90A-NEXT: v_mov_b32_e32 v2, v4 2337; GFX90A-NEXT: v_mov_b32_e32 v3, v5 2338; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2339; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2340; GFX90A-NEXT: s_waitcnt vmcnt(0) 2341; GFX90A-NEXT: s_setpc_b64 s[30:31] 2342; 2343; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_2_2: 2344; GFX940: ; %bb.0: 2345; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2346; GFX940-NEXT: ;;#ASMSTART 2347; GFX940-NEXT: ; def v[0:5] 2348; GFX940-NEXT: ;;#ASMEND 2349; GFX940-NEXT: v_mov_b32_e32 v6, 0 2350; GFX940-NEXT: v_mov_b32_e32 v2, v4 2351; GFX940-NEXT: v_mov_b32_e32 v3, v5 2352; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 2353; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 2354; GFX940-NEXT: s_waitcnt vmcnt(0) 2355; GFX940-NEXT: s_setpc_b64 s[30:31] 2356 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2357 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2> 2358 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2359 ret void 2360} 2361 2362define void @v_shuffle_v3i64_v3i64__3_2_2(ptr addrspace(1) inreg %ptr) { 2363; GFX900-LABEL: v_shuffle_v3i64_v3i64__3_2_2: 2364; GFX900: ; %bb.0: 2365; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2366; GFX900-NEXT: v_mov_b32_e32 v6, 0 2367; GFX900-NEXT: ;;#ASMSTART 2368; GFX900-NEXT: ; def v[0:5] 2369; GFX900-NEXT: ;;#ASMEND 2370; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2371; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2372; GFX900-NEXT: s_waitcnt vmcnt(0) 2373; GFX900-NEXT: s_setpc_b64 s[30:31] 2374; 2375; GFX90A-LABEL: v_shuffle_v3i64_v3i64__3_2_2: 2376; GFX90A: ; %bb.0: 2377; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2378; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2379; GFX90A-NEXT: ;;#ASMSTART 2380; GFX90A-NEXT: ; def v[0:5] 2381; GFX90A-NEXT: ;;#ASMEND 2382; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2383; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2384; GFX90A-NEXT: s_waitcnt vmcnt(0) 2385; GFX90A-NEXT: s_setpc_b64 s[30:31] 2386; 2387; GFX940-LABEL: v_shuffle_v3i64_v3i64__3_2_2: 2388; GFX940: ; %bb.0: 2389; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2390; GFX940-NEXT: v_mov_b32_e32 v6, 0 2391; GFX940-NEXT: ;;#ASMSTART 2392; GFX940-NEXT: ; def v[0:5] 2393; GFX940-NEXT: ;;#ASMEND 2394; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 2395; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 2396; GFX940-NEXT: s_waitcnt vmcnt(0) 2397; GFX940-NEXT: s_setpc_b64 s[30:31] 2398 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2399 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 2, i32 2> 2400 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2401 ret void 2402} 2403 2404define void @v_shuffle_v3i64_v3i64__4_2_2(ptr addrspace(1) inreg %ptr) { 2405; GFX900-LABEL: v_shuffle_v3i64_v3i64__4_2_2: 2406; GFX900: ; %bb.0: 2407; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2408; GFX900-NEXT: ;;#ASMSTART 2409; GFX900-NEXT: ; def v[6:11] 2410; GFX900-NEXT: ;;#ASMEND 2411; GFX900-NEXT: v_mov_b32_e32 v12, 0 2412; GFX900-NEXT: ;;#ASMSTART 2413; GFX900-NEXT: ; def v[0:5] 2414; GFX900-NEXT: ;;#ASMEND 2415; GFX900-NEXT: v_mov_b32_e32 v10, v4 2416; GFX900-NEXT: v_mov_b32_e32 v11, v5 2417; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2418; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 2419; GFX900-NEXT: s_waitcnt vmcnt(0) 2420; GFX900-NEXT: s_setpc_b64 s[30:31] 2421; 2422; GFX90A-LABEL: v_shuffle_v3i64_v3i64__4_2_2: 2423; GFX90A: ; %bb.0: 2424; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2425; GFX90A-NEXT: ;;#ASMSTART 2426; GFX90A-NEXT: ; def v[6:11] 2427; GFX90A-NEXT: ;;#ASMEND 2428; GFX90A-NEXT: v_mov_b32_e32 v12, 0 2429; GFX90A-NEXT: ;;#ASMSTART 2430; GFX90A-NEXT: ; def v[0:5] 2431; GFX90A-NEXT: ;;#ASMEND 2432; GFX90A-NEXT: v_mov_b32_e32 v10, v4 2433; GFX90A-NEXT: v_mov_b32_e32 v11, v5 2434; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2435; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 2436; GFX90A-NEXT: s_waitcnt vmcnt(0) 2437; GFX90A-NEXT: s_setpc_b64 s[30:31] 2438; 2439; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_2_2: 2440; GFX940: ; %bb.0: 2441; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2442; GFX940-NEXT: ;;#ASMSTART 2443; GFX940-NEXT: ; def v[6:11] 2444; GFX940-NEXT: ;;#ASMEND 2445; GFX940-NEXT: v_mov_b32_e32 v12, 0 2446; GFX940-NEXT: ;;#ASMSTART 2447; GFX940-NEXT: ; def v[0:5] 2448; GFX940-NEXT: ;;#ASMEND 2449; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 2450; GFX940-NEXT: v_mov_b32_e32 v10, v4 2451; GFX940-NEXT: v_mov_b32_e32 v11, v5 2452; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 2453; GFX940-NEXT: s_waitcnt vmcnt(0) 2454; GFX940-NEXT: s_setpc_b64 s[30:31] 2455 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2456 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2457 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 2, i32 2> 2458 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2459 ret void 2460} 2461 2462define void @v_shuffle_v3i64_v3i64__5_2_2(ptr addrspace(1) inreg %ptr) { 2463; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_2_2: 2464; GFX900: ; %bb.0: 2465; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2466; GFX900-NEXT: ;;#ASMSTART 2467; GFX900-NEXT: ; def v[0:5] 2468; GFX900-NEXT: ;;#ASMEND 2469; GFX900-NEXT: v_mov_b32_e32 v12, 0 2470; GFX900-NEXT: ;;#ASMSTART 2471; GFX900-NEXT: ; def v[6:11] 2472; GFX900-NEXT: ;;#ASMEND 2473; GFX900-NEXT: v_mov_b32_e32 v2, v10 2474; GFX900-NEXT: v_mov_b32_e32 v3, v11 2475; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2476; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 2477; GFX900-NEXT: s_waitcnt vmcnt(0) 2478; GFX900-NEXT: s_setpc_b64 s[30:31] 2479; 2480; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_2_2: 2481; GFX90A: ; %bb.0: 2482; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2483; GFX90A-NEXT: ;;#ASMSTART 2484; GFX90A-NEXT: ; def v[0:5] 2485; GFX90A-NEXT: ;;#ASMEND 2486; GFX90A-NEXT: v_mov_b32_e32 v12, 0 2487; GFX90A-NEXT: ;;#ASMSTART 2488; GFX90A-NEXT: ; def v[6:11] 2489; GFX90A-NEXT: ;;#ASMEND 2490; GFX90A-NEXT: v_mov_b32_e32 v2, v10 2491; GFX90A-NEXT: v_mov_b32_e32 v3, v11 2492; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2493; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 2494; GFX90A-NEXT: s_waitcnt vmcnt(0) 2495; GFX90A-NEXT: s_setpc_b64 s[30:31] 2496; 2497; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_2: 2498; GFX940: ; %bb.0: 2499; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2500; GFX940-NEXT: ;;#ASMSTART 2501; GFX940-NEXT: ; def v[0:5] 2502; GFX940-NEXT: ;;#ASMEND 2503; GFX940-NEXT: v_mov_b32_e32 v12, 0 2504; GFX940-NEXT: ;;#ASMSTART 2505; GFX940-NEXT: ; def v[6:11] 2506; GFX940-NEXT: ;;#ASMEND 2507; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 2508; GFX940-NEXT: v_mov_b32_e32 v2, v10 2509; GFX940-NEXT: v_mov_b32_e32 v3, v11 2510; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 2511; GFX940-NEXT: s_waitcnt vmcnt(0) 2512; GFX940-NEXT: s_setpc_b64 s[30:31] 2513 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2514 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2515 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 2> 2516 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2517 ret void 2518} 2519 2520define void @v_shuffle_v3i64_v3i64__5_u_2(ptr addrspace(1) inreg %ptr) { 2521; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_2: 2522; GFX900: ; %bb.0: 2523; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2524; GFX900-NEXT: ;;#ASMSTART 2525; GFX900-NEXT: ; def v[0:5] 2526; GFX900-NEXT: ;;#ASMEND 2527; GFX900-NEXT: v_mov_b32_e32 v12, 0 2528; GFX900-NEXT: ;;#ASMSTART 2529; GFX900-NEXT: ; def v[6:11] 2530; GFX900-NEXT: ;;#ASMEND 2531; GFX900-NEXT: v_mov_b32_e32 v0, v10 2532; GFX900-NEXT: v_mov_b32_e32 v1, v11 2533; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2534; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 2535; GFX900-NEXT: s_waitcnt vmcnt(0) 2536; GFX900-NEXT: s_setpc_b64 s[30:31] 2537; 2538; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_2: 2539; GFX90A: ; %bb.0: 2540; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2541; GFX90A-NEXT: ;;#ASMSTART 2542; GFX90A-NEXT: ; def v[0:5] 2543; GFX90A-NEXT: ;;#ASMEND 2544; GFX90A-NEXT: v_mov_b32_e32 v12, 0 2545; GFX90A-NEXT: ;;#ASMSTART 2546; GFX90A-NEXT: ; def v[6:11] 2547; GFX90A-NEXT: ;;#ASMEND 2548; GFX90A-NEXT: v_mov_b32_e32 v0, v10 2549; GFX90A-NEXT: v_mov_b32_e32 v1, v11 2550; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2551; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 2552; GFX90A-NEXT: s_waitcnt vmcnt(0) 2553; GFX90A-NEXT: s_setpc_b64 s[30:31] 2554; 2555; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_2: 2556; GFX940: ; %bb.0: 2557; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2558; GFX940-NEXT: ;;#ASMSTART 2559; GFX940-NEXT: ; def v[0:5] 2560; GFX940-NEXT: ;;#ASMEND 2561; GFX940-NEXT: v_mov_b32_e32 v12, 0 2562; GFX940-NEXT: ;;#ASMSTART 2563; GFX940-NEXT: ; def v[6:11] 2564; GFX940-NEXT: ;;#ASMEND 2565; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 2566; GFX940-NEXT: v_mov_b32_e32 v0, v10 2567; GFX940-NEXT: v_mov_b32_e32 v1, v11 2568; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 2569; GFX940-NEXT: s_waitcnt vmcnt(0) 2570; GFX940-NEXT: s_setpc_b64 s[30:31] 2571 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2572 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2573 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 2> 2574 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2575 ret void 2576} 2577 2578define void @v_shuffle_v3i64_v3i64__5_0_2(ptr addrspace(1) inreg %ptr) { 2579; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_2: 2580; GFX900: ; %bb.0: 2581; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2582; GFX900-NEXT: v_mov_b32_e32 v12, 0 2583; GFX900-NEXT: ;;#ASMSTART 2584; GFX900-NEXT: ; def v[0:5] 2585; GFX900-NEXT: ;;#ASMEND 2586; GFX900-NEXT: ;;#ASMSTART 2587; GFX900-NEXT: ; def v[6:11] 2588; GFX900-NEXT: ;;#ASMEND 2589; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2590; GFX900-NEXT: v_mov_b32_e32 v2, v10 2591; GFX900-NEXT: v_mov_b32_e32 v3, v11 2592; GFX900-NEXT: v_mov_b32_e32 v4, v0 2593; GFX900-NEXT: v_mov_b32_e32 v5, v1 2594; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 2595; GFX900-NEXT: s_waitcnt vmcnt(0) 2596; GFX900-NEXT: s_setpc_b64 s[30:31] 2597; 2598; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_2: 2599; GFX90A: ; %bb.0: 2600; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2601; GFX90A-NEXT: v_mov_b32_e32 v12, 0 2602; GFX90A-NEXT: ;;#ASMSTART 2603; GFX90A-NEXT: ; def v[0:5] 2604; GFX90A-NEXT: ;;#ASMEND 2605; GFX90A-NEXT: ;;#ASMSTART 2606; GFX90A-NEXT: ; def v[6:11] 2607; GFX90A-NEXT: ;;#ASMEND 2608; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2609; GFX90A-NEXT: v_mov_b32_e32 v2, v10 2610; GFX90A-NEXT: v_mov_b32_e32 v3, v11 2611; GFX90A-NEXT: v_mov_b32_e32 v4, v0 2612; GFX90A-NEXT: v_mov_b32_e32 v5, v1 2613; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 2614; GFX90A-NEXT: s_waitcnt vmcnt(0) 2615; GFX90A-NEXT: s_setpc_b64 s[30:31] 2616; 2617; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_2: 2618; GFX940: ; %bb.0: 2619; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2620; GFX940-NEXT: v_mov_b32_e32 v12, 0 2621; GFX940-NEXT: ;;#ASMSTART 2622; GFX940-NEXT: ; def v[0:5] 2623; GFX940-NEXT: ;;#ASMEND 2624; GFX940-NEXT: ;;#ASMSTART 2625; GFX940-NEXT: ; def v[6:11] 2626; GFX940-NEXT: ;;#ASMEND 2627; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 2628; GFX940-NEXT: v_mov_b32_e32 v2, v10 2629; GFX940-NEXT: v_mov_b32_e32 v3, v11 2630; GFX940-NEXT: v_mov_b32_e32 v4, v0 2631; GFX940-NEXT: v_mov_b32_e32 v5, v1 2632; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 2633; GFX940-NEXT: s_waitcnt vmcnt(0) 2634; GFX940-NEXT: s_setpc_b64 s[30:31] 2635 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2636 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2637 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 2> 2638 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2639 ret void 2640} 2641 2642define void @v_shuffle_v3i64_v3i64__5_1_2(ptr addrspace(1) inreg %ptr) { 2643; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_1_2: 2644; GFX900: ; %bb.0: 2645; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2646; GFX900-NEXT: ;;#ASMSTART 2647; GFX900-NEXT: ; def v[0:5] 2648; GFX900-NEXT: ;;#ASMEND 2649; GFX900-NEXT: v_mov_b32_e32 v12, 0 2650; GFX900-NEXT: ;;#ASMSTART 2651; GFX900-NEXT: ; def v[6:11] 2652; GFX900-NEXT: ;;#ASMEND 2653; GFX900-NEXT: v_mov_b32_e32 v0, v10 2654; GFX900-NEXT: v_mov_b32_e32 v1, v11 2655; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2656; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 2657; GFX900-NEXT: s_waitcnt vmcnt(0) 2658; GFX900-NEXT: s_setpc_b64 s[30:31] 2659; 2660; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_1_2: 2661; GFX90A: ; %bb.0: 2662; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2663; GFX90A-NEXT: ;;#ASMSTART 2664; GFX90A-NEXT: ; def v[0:5] 2665; GFX90A-NEXT: ;;#ASMEND 2666; GFX90A-NEXT: v_mov_b32_e32 v12, 0 2667; GFX90A-NEXT: ;;#ASMSTART 2668; GFX90A-NEXT: ; def v[6:11] 2669; GFX90A-NEXT: ;;#ASMEND 2670; GFX90A-NEXT: v_mov_b32_e32 v0, v10 2671; GFX90A-NEXT: v_mov_b32_e32 v1, v11 2672; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2673; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 2674; GFX90A-NEXT: s_waitcnt vmcnt(0) 2675; GFX90A-NEXT: s_setpc_b64 s[30:31] 2676; 2677; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_2: 2678; GFX940: ; %bb.0: 2679; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2680; GFX940-NEXT: ;;#ASMSTART 2681; GFX940-NEXT: ; def v[0:5] 2682; GFX940-NEXT: ;;#ASMEND 2683; GFX940-NEXT: v_mov_b32_e32 v12, 0 2684; GFX940-NEXT: ;;#ASMSTART 2685; GFX940-NEXT: ; def v[6:11] 2686; GFX940-NEXT: ;;#ASMEND 2687; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 2688; GFX940-NEXT: v_mov_b32_e32 v0, v10 2689; GFX940-NEXT: v_mov_b32_e32 v1, v11 2690; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 2691; GFX940-NEXT: s_waitcnt vmcnt(0) 2692; GFX940-NEXT: s_setpc_b64 s[30:31] 2693 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2694 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2695 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 2> 2696 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2697 ret void 2698} 2699 2700define void @v_shuffle_v3i64_v3i64__5_3_2(ptr addrspace(1) inreg %ptr) { 2701; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_2: 2702; GFX900: ; %bb.0: 2703; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2704; GFX900-NEXT: ;;#ASMSTART 2705; GFX900-NEXT: ; def v[0:5] 2706; GFX900-NEXT: ;;#ASMEND 2707; GFX900-NEXT: v_mov_b32_e32 v12, 0 2708; GFX900-NEXT: ;;#ASMSTART 2709; GFX900-NEXT: ; def v[6:11] 2710; GFX900-NEXT: ;;#ASMEND 2711; GFX900-NEXT: v_mov_b32_e32 v0, v10 2712; GFX900-NEXT: v_mov_b32_e32 v1, v11 2713; GFX900-NEXT: v_mov_b32_e32 v2, v6 2714; GFX900-NEXT: v_mov_b32_e32 v3, v7 2715; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2716; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 2717; GFX900-NEXT: s_waitcnt vmcnt(0) 2718; GFX900-NEXT: s_setpc_b64 s[30:31] 2719; 2720; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_2: 2721; GFX90A: ; %bb.0: 2722; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2723; GFX90A-NEXT: ;;#ASMSTART 2724; GFX90A-NEXT: ; def v[0:5] 2725; GFX90A-NEXT: ;;#ASMEND 2726; GFX90A-NEXT: v_mov_b32_e32 v12, 0 2727; GFX90A-NEXT: ;;#ASMSTART 2728; GFX90A-NEXT: ; def v[6:11] 2729; GFX90A-NEXT: ;;#ASMEND 2730; GFX90A-NEXT: v_mov_b32_e32 v0, v10 2731; GFX90A-NEXT: v_mov_b32_e32 v1, v11 2732; GFX90A-NEXT: v_mov_b32_e32 v2, v6 2733; GFX90A-NEXT: v_mov_b32_e32 v3, v7 2734; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2735; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] 2736; GFX90A-NEXT: s_waitcnt vmcnt(0) 2737; GFX90A-NEXT: s_setpc_b64 s[30:31] 2738; 2739; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_2: 2740; GFX940: ; %bb.0: 2741; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2742; GFX940-NEXT: ;;#ASMSTART 2743; GFX940-NEXT: ; def v[0:5] 2744; GFX940-NEXT: ;;#ASMEND 2745; GFX940-NEXT: v_mov_b32_e32 v12, 0 2746; GFX940-NEXT: ;;#ASMSTART 2747; GFX940-NEXT: ; def v[6:11] 2748; GFX940-NEXT: ;;#ASMEND 2749; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 2750; GFX940-NEXT: v_mov_b32_e32 v0, v10 2751; GFX940-NEXT: v_mov_b32_e32 v1, v11 2752; GFX940-NEXT: v_mov_b32_e32 v2, v6 2753; GFX940-NEXT: v_mov_b32_e32 v3, v7 2754; GFX940-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1 2755; GFX940-NEXT: s_waitcnt vmcnt(0) 2756; GFX940-NEXT: s_setpc_b64 s[30:31] 2757 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2758 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2759 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 2> 2760 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2761 ret void 2762} 2763 2764define void @v_shuffle_v3i64_v3i64__5_4_2(ptr addrspace(1) inreg %ptr) { 2765; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_2: 2766; GFX900: ; %bb.0: 2767; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2768; GFX900-NEXT: ;;#ASMSTART 2769; GFX900-NEXT: ; def v[6:11] 2770; GFX900-NEXT: ;;#ASMEND 2771; GFX900-NEXT: v_mov_b32_e32 v12, 0 2772; GFX900-NEXT: v_mov_b32_e32 v6, v10 2773; GFX900-NEXT: v_mov_b32_e32 v7, v11 2774; GFX900-NEXT: ;;#ASMSTART 2775; GFX900-NEXT: ; def v[0:5] 2776; GFX900-NEXT: ;;#ASMEND 2777; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2778; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] 2779; GFX900-NEXT: s_waitcnt vmcnt(0) 2780; GFX900-NEXT: s_setpc_b64 s[30:31] 2781; 2782; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_2: 2783; GFX90A: ; %bb.0: 2784; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2785; GFX90A-NEXT: ;;#ASMSTART 2786; GFX90A-NEXT: ; def v[6:11] 2787; GFX90A-NEXT: ;;#ASMEND 2788; GFX90A-NEXT: v_mov_b32_e32 v12, 0 2789; GFX90A-NEXT: v_mov_b32_e32 v6, v10 2790; GFX90A-NEXT: v_mov_b32_e32 v7, v11 2791; GFX90A-NEXT: ;;#ASMSTART 2792; GFX90A-NEXT: ; def v[0:5] 2793; GFX90A-NEXT: ;;#ASMEND 2794; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16 2795; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] 2796; GFX90A-NEXT: s_waitcnt vmcnt(0) 2797; GFX90A-NEXT: s_setpc_b64 s[30:31] 2798; 2799; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_2: 2800; GFX940: ; %bb.0: 2801; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2802; GFX940-NEXT: ;;#ASMSTART 2803; GFX940-NEXT: ; def v[6:11] 2804; GFX940-NEXT: ;;#ASMEND 2805; GFX940-NEXT: v_mov_b32_e32 v12, 0 2806; GFX940-NEXT: v_mov_b32_e32 v6, v10 2807; GFX940-NEXT: v_mov_b32_e32 v7, v11 2808; GFX940-NEXT: ;;#ASMSTART 2809; GFX940-NEXT: ; def v[0:5] 2810; GFX940-NEXT: ;;#ASMEND 2811; GFX940-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16 sc0 sc1 2812; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 2813; GFX940-NEXT: s_waitcnt vmcnt(0) 2814; GFX940-NEXT: s_setpc_b64 s[30:31] 2815 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2816 %vec1 = call <3 x i64> asm "; def $0", "=v"() 2817 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 2> 2818 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2819 ret void 2820} 2821 2822define void @v_shuffle_v3i64_v3i64__u_3_3(ptr addrspace(1) inreg %ptr) { 2823; GFX9-LABEL: v_shuffle_v3i64_v3i64__u_3_3: 2824; GFX9: ; %bb.0: 2825; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2826; GFX9-NEXT: s_setpc_b64 s[30:31] 2827 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2828 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 3, i32 3> 2829 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2830 ret void 2831} 2832 2833define void @v_shuffle_v3i64_v3i64__0_3_3(ptr addrspace(1) inreg %ptr) { 2834; GFX900-LABEL: v_shuffle_v3i64_v3i64__0_3_3: 2835; GFX900: ; %bb.0: 2836; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2837; GFX900-NEXT: v_mov_b32_e32 v6, 0 2838; GFX900-NEXT: ;;#ASMSTART 2839; GFX900-NEXT: ; def v[0:5] 2840; GFX900-NEXT: ;;#ASMEND 2841; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2842; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2843; GFX900-NEXT: s_waitcnt vmcnt(0) 2844; GFX900-NEXT: s_setpc_b64 s[30:31] 2845; 2846; GFX90A-LABEL: v_shuffle_v3i64_v3i64__0_3_3: 2847; GFX90A: ; %bb.0: 2848; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2849; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2850; GFX90A-NEXT: ;;#ASMSTART 2851; GFX90A-NEXT: ; def v[0:5] 2852; GFX90A-NEXT: ;;#ASMEND 2853; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 2854; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2855; GFX90A-NEXT: s_waitcnt vmcnt(0) 2856; GFX90A-NEXT: s_setpc_b64 s[30:31] 2857; 2858; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_3_3: 2859; GFX940: ; %bb.0: 2860; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2861; GFX940-NEXT: v_mov_b32_e32 v6, 0 2862; GFX940-NEXT: ;;#ASMSTART 2863; GFX940-NEXT: ; def v[0:5] 2864; GFX940-NEXT: ;;#ASMEND 2865; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 2866; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 2867; GFX940-NEXT: s_waitcnt vmcnt(0) 2868; GFX940-NEXT: s_setpc_b64 s[30:31] 2869 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2870 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 3, i32 3> 2871 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2872 ret void 2873} 2874 2875define void @v_shuffle_v3i64_v3i64__1_3_3(ptr addrspace(1) inreg %ptr) { 2876; GFX900-LABEL: v_shuffle_v3i64_v3i64__1_3_3: 2877; GFX900: ; %bb.0: 2878; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2879; GFX900-NEXT: v_mov_b32_e32 v6, 0 2880; GFX900-NEXT: ;;#ASMSTART 2881; GFX900-NEXT: ; def v[0:5] 2882; GFX900-NEXT: ;;#ASMEND 2883; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2884; GFX900-NEXT: s_waitcnt vmcnt(0) 2885; GFX900-NEXT: s_setpc_b64 s[30:31] 2886; 2887; GFX90A-LABEL: v_shuffle_v3i64_v3i64__1_3_3: 2888; GFX90A: ; %bb.0: 2889; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2890; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2891; GFX90A-NEXT: ;;#ASMSTART 2892; GFX90A-NEXT: ; def v[0:5] 2893; GFX90A-NEXT: ;;#ASMEND 2894; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2895; GFX90A-NEXT: s_waitcnt vmcnt(0) 2896; GFX90A-NEXT: s_setpc_b64 s[30:31] 2897; 2898; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_3_3: 2899; GFX940: ; %bb.0: 2900; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2901; GFX940-NEXT: v_mov_b32_e32 v6, 0 2902; GFX940-NEXT: ;;#ASMSTART 2903; GFX940-NEXT: ; def v[0:5] 2904; GFX940-NEXT: ;;#ASMEND 2905; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 2906; GFX940-NEXT: s_waitcnt vmcnt(0) 2907; GFX940-NEXT: s_setpc_b64 s[30:31] 2908 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2909 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 3, i32 3> 2910 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2911 ret void 2912} 2913 2914define void @v_shuffle_v3i64_v3i64__2_3_3(ptr addrspace(1) inreg %ptr) { 2915; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_3_3: 2916; GFX900: ; %bb.0: 2917; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2918; GFX900-NEXT: ;;#ASMSTART 2919; GFX900-NEXT: ; def v[0:5] 2920; GFX900-NEXT: ;;#ASMEND 2921; GFX900-NEXT: v_mov_b32_e32 v6, 0 2922; GFX900-NEXT: v_mov_b32_e32 v0, v4 2923; GFX900-NEXT: v_mov_b32_e32 v1, v5 2924; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2925; GFX900-NEXT: s_waitcnt vmcnt(0) 2926; GFX900-NEXT: s_setpc_b64 s[30:31] 2927; 2928; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_3_3: 2929; GFX90A: ; %bb.0: 2930; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2931; GFX90A-NEXT: ;;#ASMSTART 2932; GFX90A-NEXT: ; def v[0:5] 2933; GFX90A-NEXT: ;;#ASMEND 2934; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2935; GFX90A-NEXT: v_mov_b32_e32 v0, v4 2936; GFX90A-NEXT: v_mov_b32_e32 v1, v5 2937; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2938; GFX90A-NEXT: s_waitcnt vmcnt(0) 2939; GFX90A-NEXT: s_setpc_b64 s[30:31] 2940; 2941; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_3_3: 2942; GFX940: ; %bb.0: 2943; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2944; GFX940-NEXT: ;;#ASMSTART 2945; GFX940-NEXT: ; def v[0:5] 2946; GFX940-NEXT: ;;#ASMEND 2947; GFX940-NEXT: v_mov_b32_e32 v6, 0 2948; GFX940-NEXT: v_mov_b32_e32 v0, v4 2949; GFX940-NEXT: v_mov_b32_e32 v1, v5 2950; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 2951; GFX940-NEXT: s_waitcnt vmcnt(0) 2952; GFX940-NEXT: s_setpc_b64 s[30:31] 2953 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2954 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 3, i32 3> 2955 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2956 ret void 2957} 2958 2959define void @v_shuffle_v3i64_v3i64__3_3_3(ptr addrspace(1) inreg %ptr) { 2960; GFX9-LABEL: v_shuffle_v3i64_v3i64__3_3_3: 2961; GFX9: ; %bb.0: 2962; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2963; GFX9-NEXT: s_setpc_b64 s[30:31] 2964 %vec0 = call <3 x i64> asm "; def $0", "=v"() 2965 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 3, i32 3> 2966 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 2967 ret void 2968} 2969 2970define void @v_shuffle_v3i64_v3i64__4_3_3(ptr addrspace(1) inreg %ptr) { 2971; GFX900-LABEL: v_shuffle_v3i64_v3i64__4_3_3: 2972; GFX900: ; %bb.0: 2973; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2974; GFX900-NEXT: ;;#ASMSTART 2975; GFX900-NEXT: ; def v[0:5] 2976; GFX900-NEXT: ;;#ASMEND 2977; GFX900-NEXT: v_mov_b32_e32 v6, 0 2978; GFX900-NEXT: v_mov_b32_e32 v4, v0 2979; GFX900-NEXT: v_mov_b32_e32 v5, v1 2980; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 2981; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2982; GFX900-NEXT: s_waitcnt vmcnt(0) 2983; GFX900-NEXT: s_setpc_b64 s[30:31] 2984; 2985; GFX90A-LABEL: v_shuffle_v3i64_v3i64__4_3_3: 2986; GFX90A: ; %bb.0: 2987; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2988; GFX90A-NEXT: ;;#ASMSTART 2989; GFX90A-NEXT: ; def v[0:5] 2990; GFX90A-NEXT: ;;#ASMEND 2991; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2992; GFX90A-NEXT: v_mov_b32_e32 v4, v0 2993; GFX90A-NEXT: v_mov_b32_e32 v5, v1 2994; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 2995; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 2996; GFX90A-NEXT: s_waitcnt vmcnt(0) 2997; GFX90A-NEXT: s_setpc_b64 s[30:31] 2998; 2999; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_3_3: 3000; GFX940: ; %bb.0: 3001; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3002; GFX940-NEXT: ;;#ASMSTART 3003; GFX940-NEXT: ; def v[0:5] 3004; GFX940-NEXT: ;;#ASMEND 3005; GFX940-NEXT: v_mov_b32_e32 v6, 0 3006; GFX940-NEXT: v_mov_b32_e32 v4, v0 3007; GFX940-NEXT: v_mov_b32_e32 v5, v1 3008; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 3009; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 3010; GFX940-NEXT: s_waitcnt vmcnt(0) 3011; GFX940-NEXT: s_setpc_b64 s[30:31] 3012 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3013 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3014 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 3, i32 3> 3015 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3016 ret void 3017} 3018 3019define void @v_shuffle_v3i64_v3i64__5_3_3(ptr addrspace(1) inreg %ptr) { 3020; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_3: 3021; GFX900: ; %bb.0: 3022; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3023; GFX900-NEXT: ;;#ASMSTART 3024; GFX900-NEXT: ; def v[0:5] 3025; GFX900-NEXT: ;;#ASMEND 3026; GFX900-NEXT: v_mov_b32_e32 v6, 0 3027; GFX900-NEXT: v_mov_b32_e32 v2, v4 3028; GFX900-NEXT: v_mov_b32_e32 v3, v5 3029; GFX900-NEXT: v_mov_b32_e32 v4, v0 3030; GFX900-NEXT: v_mov_b32_e32 v5, v1 3031; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 3032; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3033; GFX900-NEXT: s_waitcnt vmcnt(0) 3034; GFX900-NEXT: s_setpc_b64 s[30:31] 3035; 3036; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_3: 3037; GFX90A: ; %bb.0: 3038; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3039; GFX90A-NEXT: ;;#ASMSTART 3040; GFX90A-NEXT: ; def v[0:5] 3041; GFX90A-NEXT: ;;#ASMEND 3042; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3043; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3044; GFX90A-NEXT: v_mov_b32_e32 v3, v5 3045; GFX90A-NEXT: v_mov_b32_e32 v4, v0 3046; GFX90A-NEXT: v_mov_b32_e32 v5, v1 3047; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 3048; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3049; GFX90A-NEXT: s_waitcnt vmcnt(0) 3050; GFX90A-NEXT: s_setpc_b64 s[30:31] 3051; 3052; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_3: 3053; GFX940: ; %bb.0: 3054; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3055; GFX940-NEXT: ;;#ASMSTART 3056; GFX940-NEXT: ; def v[0:5] 3057; GFX940-NEXT: ;;#ASMEND 3058; GFX940-NEXT: v_mov_b32_e32 v6, 0 3059; GFX940-NEXT: v_mov_b32_e32 v2, v4 3060; GFX940-NEXT: v_mov_b32_e32 v3, v5 3061; GFX940-NEXT: v_mov_b32_e32 v4, v0 3062; GFX940-NEXT: v_mov_b32_e32 v5, v1 3063; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 3064; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 3065; GFX940-NEXT: s_waitcnt vmcnt(0) 3066; GFX940-NEXT: s_setpc_b64 s[30:31] 3067 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3068 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3069 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 3> 3070 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3071 ret void 3072} 3073 3074define void @v_shuffle_v3i64_v3i64__5_u_3(ptr addrspace(1) inreg %ptr) { 3075; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_3: 3076; GFX900: ; %bb.0: 3077; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3078; GFX900-NEXT: v_mov_b32_e32 v6, 0 3079; GFX900-NEXT: ;;#ASMSTART 3080; GFX900-NEXT: ; def v[0:5] 3081; GFX900-NEXT: ;;#ASMEND 3082; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 3083; GFX900-NEXT: v_mov_b32_e32 v0, v4 3084; GFX900-NEXT: v_mov_b32_e32 v1, v5 3085; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3086; GFX900-NEXT: s_waitcnt vmcnt(0) 3087; GFX900-NEXT: s_setpc_b64 s[30:31] 3088; 3089; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_3: 3090; GFX90A: ; %bb.0: 3091; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3092; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3093; GFX90A-NEXT: ;;#ASMSTART 3094; GFX90A-NEXT: ; def v[0:5] 3095; GFX90A-NEXT: ;;#ASMEND 3096; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 3097; GFX90A-NEXT: v_mov_b32_e32 v0, v4 3098; GFX90A-NEXT: v_mov_b32_e32 v1, v5 3099; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3100; GFX90A-NEXT: s_waitcnt vmcnt(0) 3101; GFX90A-NEXT: s_setpc_b64 s[30:31] 3102; 3103; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_3: 3104; GFX940: ; %bb.0: 3105; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3106; GFX940-NEXT: v_mov_b32_e32 v6, 0 3107; GFX940-NEXT: ;;#ASMSTART 3108; GFX940-NEXT: ; def v[0:5] 3109; GFX940-NEXT: ;;#ASMEND 3110; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 3111; GFX940-NEXT: v_mov_b32_e32 v0, v4 3112; GFX940-NEXT: v_mov_b32_e32 v1, v5 3113; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 3114; GFX940-NEXT: s_waitcnt vmcnt(0) 3115; GFX940-NEXT: s_setpc_b64 s[30:31] 3116 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3117 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3118 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 3> 3119 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3120 ret void 3121} 3122 3123define void @v_shuffle_v3i64_v3i64__5_0_3(ptr addrspace(1) inreg %ptr) { 3124; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_3: 3125; GFX900: ; %bb.0: 3126; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3127; GFX900-NEXT: ;;#ASMSTART 3128; GFX900-NEXT: ; def v[0:5] 3129; GFX900-NEXT: ;;#ASMEND 3130; GFX900-NEXT: v_mov_b32_e32 v8, 0 3131; GFX900-NEXT: ;;#ASMSTART 3132; GFX900-NEXT: ; def v[2:7] 3133; GFX900-NEXT: ;;#ASMEND 3134; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16 3135; GFX900-NEXT: v_mov_b32_e32 v2, v6 3136; GFX900-NEXT: v_mov_b32_e32 v3, v7 3137; GFX900-NEXT: v_mov_b32_e32 v4, v0 3138; GFX900-NEXT: v_mov_b32_e32 v5, v1 3139; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 3140; GFX900-NEXT: s_waitcnt vmcnt(0) 3141; GFX900-NEXT: s_setpc_b64 s[30:31] 3142; 3143; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_3: 3144; GFX90A: ; %bb.0: 3145; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3146; GFX90A-NEXT: ;;#ASMSTART 3147; GFX90A-NEXT: ; def v[0:5] 3148; GFX90A-NEXT: ;;#ASMEND 3149; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3150; GFX90A-NEXT: ;;#ASMSTART 3151; GFX90A-NEXT: ; def v[2:7] 3152; GFX90A-NEXT: ;;#ASMEND 3153; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16 3154; GFX90A-NEXT: v_mov_b32_e32 v2, v6 3155; GFX90A-NEXT: v_mov_b32_e32 v3, v7 3156; GFX90A-NEXT: v_mov_b32_e32 v4, v0 3157; GFX90A-NEXT: v_mov_b32_e32 v5, v1 3158; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 3159; GFX90A-NEXT: s_waitcnt vmcnt(0) 3160; GFX90A-NEXT: s_setpc_b64 s[30:31] 3161; 3162; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_3: 3163; GFX940: ; %bb.0: 3164; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3165; GFX940-NEXT: ;;#ASMSTART 3166; GFX940-NEXT: ; def v[0:5] 3167; GFX940-NEXT: ;;#ASMEND 3168; GFX940-NEXT: v_mov_b32_e32 v8, 0 3169; GFX940-NEXT: ;;#ASMSTART 3170; GFX940-NEXT: ; def v[2:7] 3171; GFX940-NEXT: ;;#ASMEND 3172; GFX940-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16 sc0 sc1 3173; GFX940-NEXT: v_mov_b32_e32 v2, v6 3174; GFX940-NEXT: v_mov_b32_e32 v3, v7 3175; GFX940-NEXT: v_mov_b32_e32 v4, v0 3176; GFX940-NEXT: v_mov_b32_e32 v5, v1 3177; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 3178; GFX940-NEXT: s_waitcnt vmcnt(0) 3179; GFX940-NEXT: s_setpc_b64 s[30:31] 3180 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3181 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3182 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 3> 3183 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3184 ret void 3185} 3186 3187define void @v_shuffle_v3i64_v3i64__5_1_3(ptr addrspace(1) inreg %ptr) { 3188; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_1_3: 3189; GFX900: ; %bb.0: 3190; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3191; GFX900-NEXT: ;;#ASMSTART 3192; GFX900-NEXT: ; def v[0:5] 3193; GFX900-NEXT: ;;#ASMEND 3194; GFX900-NEXT: v_mov_b32_e32 v10, 0 3195; GFX900-NEXT: ;;#ASMSTART 3196; GFX900-NEXT: ; def v[4:9] 3197; GFX900-NEXT: ;;#ASMEND 3198; GFX900-NEXT: v_mov_b32_e32 v0, v8 3199; GFX900-NEXT: v_mov_b32_e32 v1, v9 3200; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16 3201; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 3202; GFX900-NEXT: s_waitcnt vmcnt(0) 3203; GFX900-NEXT: s_setpc_b64 s[30:31] 3204; 3205; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_1_3: 3206; GFX90A: ; %bb.0: 3207; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3208; GFX90A-NEXT: ;;#ASMSTART 3209; GFX90A-NEXT: ; def v[0:5] 3210; GFX90A-NEXT: ;;#ASMEND 3211; GFX90A-NEXT: v_mov_b32_e32 v10, 0 3212; GFX90A-NEXT: ;;#ASMSTART 3213; GFX90A-NEXT: ; def v[4:9] 3214; GFX90A-NEXT: ;;#ASMEND 3215; GFX90A-NEXT: v_mov_b32_e32 v0, v8 3216; GFX90A-NEXT: v_mov_b32_e32 v1, v9 3217; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16 3218; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 3219; GFX90A-NEXT: s_waitcnt vmcnt(0) 3220; GFX90A-NEXT: s_setpc_b64 s[30:31] 3221; 3222; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_3: 3223; GFX940: ; %bb.0: 3224; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3225; GFX940-NEXT: ;;#ASMSTART 3226; GFX940-NEXT: ; def v[0:5] 3227; GFX940-NEXT: ;;#ASMEND 3228; GFX940-NEXT: v_mov_b32_e32 v10, 0 3229; GFX940-NEXT: ;;#ASMSTART 3230; GFX940-NEXT: ; def v[4:9] 3231; GFX940-NEXT: ;;#ASMEND 3232; GFX940-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16 sc0 sc1 3233; GFX940-NEXT: v_mov_b32_e32 v0, v8 3234; GFX940-NEXT: v_mov_b32_e32 v1, v9 3235; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 3236; GFX940-NEXT: s_waitcnt vmcnt(0) 3237; GFX940-NEXT: s_setpc_b64 s[30:31] 3238 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3239 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3240 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 3> 3241 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3242 ret void 3243} 3244 3245define void @v_shuffle_v3i64_v3i64__5_2_3(ptr addrspace(1) inreg %ptr) { 3246; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_2_3: 3247; GFX900: ; %bb.0: 3248; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3249; GFX900-NEXT: ;;#ASMSTART 3250; GFX900-NEXT: ; def v[0:5] 3251; GFX900-NEXT: ;;#ASMEND 3252; GFX900-NEXT: v_mov_b32_e32 v12, 0 3253; GFX900-NEXT: ;;#ASMSTART 3254; GFX900-NEXT: ; def v[6:11] 3255; GFX900-NEXT: ;;#ASMEND 3256; GFX900-NEXT: v_mov_b32_e32 v2, v10 3257; GFX900-NEXT: v_mov_b32_e32 v3, v11 3258; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16 3259; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 3260; GFX900-NEXT: s_waitcnt vmcnt(0) 3261; GFX900-NEXT: s_setpc_b64 s[30:31] 3262; 3263; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_2_3: 3264; GFX90A: ; %bb.0: 3265; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3266; GFX90A-NEXT: ;;#ASMSTART 3267; GFX90A-NEXT: ; def v[0:5] 3268; GFX90A-NEXT: ;;#ASMEND 3269; GFX90A-NEXT: v_mov_b32_e32 v12, 0 3270; GFX90A-NEXT: ;;#ASMSTART 3271; GFX90A-NEXT: ; def v[6:11] 3272; GFX90A-NEXT: ;;#ASMEND 3273; GFX90A-NEXT: v_mov_b32_e32 v2, v10 3274; GFX90A-NEXT: v_mov_b32_e32 v3, v11 3275; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16 3276; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 3277; GFX90A-NEXT: s_waitcnt vmcnt(0) 3278; GFX90A-NEXT: s_setpc_b64 s[30:31] 3279; 3280; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_3: 3281; GFX940: ; %bb.0: 3282; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3283; GFX940-NEXT: ;;#ASMSTART 3284; GFX940-NEXT: ; def v[0:5] 3285; GFX940-NEXT: ;;#ASMEND 3286; GFX940-NEXT: v_mov_b32_e32 v12, 0 3287; GFX940-NEXT: ;;#ASMSTART 3288; GFX940-NEXT: ; def v[6:11] 3289; GFX940-NEXT: ;;#ASMEND 3290; GFX940-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16 sc0 sc1 3291; GFX940-NEXT: v_mov_b32_e32 v2, v10 3292; GFX940-NEXT: v_mov_b32_e32 v3, v11 3293; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 3294; GFX940-NEXT: s_waitcnt vmcnt(0) 3295; GFX940-NEXT: s_setpc_b64 s[30:31] 3296 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3297 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3298 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 3> 3299 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3300 ret void 3301} 3302 3303define void @v_shuffle_v3i64_v3i64__5_4_3(ptr addrspace(1) inreg %ptr) { 3304; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_3: 3305; GFX900: ; %bb.0: 3306; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3307; GFX900-NEXT: v_mov_b32_e32 v6, 0 3308; GFX900-NEXT: ;;#ASMSTART 3309; GFX900-NEXT: ; def v[0:5] 3310; GFX900-NEXT: ;;#ASMEND 3311; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 3312; GFX900-NEXT: v_mov_b32_e32 v0, v4 3313; GFX900-NEXT: v_mov_b32_e32 v1, v5 3314; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3315; GFX900-NEXT: s_waitcnt vmcnt(0) 3316; GFX900-NEXT: s_setpc_b64 s[30:31] 3317; 3318; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_3: 3319; GFX90A: ; %bb.0: 3320; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3321; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3322; GFX90A-NEXT: ;;#ASMSTART 3323; GFX90A-NEXT: ; def v[0:5] 3324; GFX90A-NEXT: ;;#ASMEND 3325; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16 3326; GFX90A-NEXT: v_mov_b32_e32 v0, v4 3327; GFX90A-NEXT: v_mov_b32_e32 v1, v5 3328; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3329; GFX90A-NEXT: s_waitcnt vmcnt(0) 3330; GFX90A-NEXT: s_setpc_b64 s[30:31] 3331; 3332; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_3: 3333; GFX940: ; %bb.0: 3334; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3335; GFX940-NEXT: v_mov_b32_e32 v6, 0 3336; GFX940-NEXT: ;;#ASMSTART 3337; GFX940-NEXT: ; def v[0:5] 3338; GFX940-NEXT: ;;#ASMEND 3339; GFX940-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16 sc0 sc1 3340; GFX940-NEXT: v_mov_b32_e32 v0, v4 3341; GFX940-NEXT: v_mov_b32_e32 v1, v5 3342; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 3343; GFX940-NEXT: s_waitcnt vmcnt(0) 3344; GFX940-NEXT: s_setpc_b64 s[30:31] 3345 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3346 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3347 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 3> 3348 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3349 ret void 3350} 3351 3352define void @v_shuffle_v3i64_v3i64__u_4_4(ptr addrspace(1) inreg %ptr) { 3353; GFX900-LABEL: v_shuffle_v3i64_v3i64__u_4_4: 3354; GFX900: ; %bb.0: 3355; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3356; GFX900-NEXT: v_mov_b32_e32 v6, 0 3357; GFX900-NEXT: ;;#ASMSTART 3358; GFX900-NEXT: ; def v[0:5] 3359; GFX900-NEXT: ;;#ASMEND 3360; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3361; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3362; GFX900-NEXT: s_waitcnt vmcnt(0) 3363; GFX900-NEXT: s_setpc_b64 s[30:31] 3364; 3365; GFX90A-LABEL: v_shuffle_v3i64_v3i64__u_4_4: 3366; GFX90A: ; %bb.0: 3367; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3368; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3369; GFX90A-NEXT: ;;#ASMSTART 3370; GFX90A-NEXT: ; def v[0:5] 3371; GFX90A-NEXT: ;;#ASMEND 3372; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3373; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3374; GFX90A-NEXT: s_waitcnt vmcnt(0) 3375; GFX90A-NEXT: s_setpc_b64 s[30:31] 3376; 3377; GFX940-LABEL: v_shuffle_v3i64_v3i64__u_4_4: 3378; GFX940: ; %bb.0: 3379; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3380; GFX940-NEXT: v_mov_b32_e32 v6, 0 3381; GFX940-NEXT: ;;#ASMSTART 3382; GFX940-NEXT: ; def v[0:5] 3383; GFX940-NEXT: ;;#ASMEND 3384; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 3385; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 3386; GFX940-NEXT: s_waitcnt vmcnt(0) 3387; GFX940-NEXT: s_setpc_b64 s[30:31] 3388 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3389 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3390 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 poison, i32 4, i32 4> 3391 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3392 ret void 3393} 3394 3395define void @v_shuffle_v3i64_v3i64__0_4_4(ptr addrspace(1) inreg %ptr) { 3396; GFX900-LABEL: v_shuffle_v3i64_v3i64__0_4_4: 3397; GFX900: ; %bb.0: 3398; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3399; GFX900-NEXT: ;;#ASMSTART 3400; GFX900-NEXT: ; def v[0:5] 3401; GFX900-NEXT: ;;#ASMEND 3402; GFX900-NEXT: ;;#ASMSTART 3403; GFX900-NEXT: ; def v[2:7] 3404; GFX900-NEXT: ;;#ASMEND 3405; GFX900-NEXT: v_mov_b32_e32 v8, 0 3406; GFX900-NEXT: v_mov_b32_e32 v2, v4 3407; GFX900-NEXT: v_mov_b32_e32 v3, v5 3408; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16 3409; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 3410; GFX900-NEXT: s_waitcnt vmcnt(0) 3411; GFX900-NEXT: s_setpc_b64 s[30:31] 3412; 3413; GFX90A-LABEL: v_shuffle_v3i64_v3i64__0_4_4: 3414; GFX90A: ; %bb.0: 3415; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3416; GFX90A-NEXT: ;;#ASMSTART 3417; GFX90A-NEXT: ; def v[0:5] 3418; GFX90A-NEXT: ;;#ASMEND 3419; GFX90A-NEXT: ;;#ASMSTART 3420; GFX90A-NEXT: ; def v[2:7] 3421; GFX90A-NEXT: ;;#ASMEND 3422; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3423; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3424; GFX90A-NEXT: v_mov_b32_e32 v3, v5 3425; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16 3426; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 3427; GFX90A-NEXT: s_waitcnt vmcnt(0) 3428; GFX90A-NEXT: s_setpc_b64 s[30:31] 3429; 3430; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_4_4: 3431; GFX940: ; %bb.0: 3432; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3433; GFX940-NEXT: ;;#ASMSTART 3434; GFX940-NEXT: ; def v[0:5] 3435; GFX940-NEXT: ;;#ASMEND 3436; GFX940-NEXT: v_mov_b32_e32 v8, 0 3437; GFX940-NEXT: ;;#ASMSTART 3438; GFX940-NEXT: ; def v[2:7] 3439; GFX940-NEXT: ;;#ASMEND 3440; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 3441; GFX940-NEXT: v_mov_b32_e32 v2, v4 3442; GFX940-NEXT: v_mov_b32_e32 v3, v5 3443; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 3444; GFX940-NEXT: s_waitcnt vmcnt(0) 3445; GFX940-NEXT: s_setpc_b64 s[30:31] 3446 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3447 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3448 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 0, i32 4, i32 4> 3449 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3450 ret void 3451} 3452 3453define void @v_shuffle_v3i64_v3i64__1_4_4(ptr addrspace(1) inreg %ptr) { 3454; GFX900-LABEL: v_shuffle_v3i64_v3i64__1_4_4: 3455; GFX900: ; %bb.0: 3456; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3457; GFX900-NEXT: ;;#ASMSTART 3458; GFX900-NEXT: ; def v[0:5] 3459; GFX900-NEXT: ;;#ASMEND 3460; GFX900-NEXT: ;;#ASMSTART 3461; GFX900-NEXT: ; def v[4:9] 3462; GFX900-NEXT: ;;#ASMEND 3463; GFX900-NEXT: v_mov_b32_e32 v10, 0 3464; GFX900-NEXT: v_mov_b32_e32 v4, v6 3465; GFX900-NEXT: v_mov_b32_e32 v5, v7 3466; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16 3467; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 3468; GFX900-NEXT: s_waitcnt vmcnt(0) 3469; GFX900-NEXT: s_setpc_b64 s[30:31] 3470; 3471; GFX90A-LABEL: v_shuffle_v3i64_v3i64__1_4_4: 3472; GFX90A: ; %bb.0: 3473; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3474; GFX90A-NEXT: ;;#ASMSTART 3475; GFX90A-NEXT: ; def v[0:5] 3476; GFX90A-NEXT: ;;#ASMEND 3477; GFX90A-NEXT: ;;#ASMSTART 3478; GFX90A-NEXT: ; def v[4:9] 3479; GFX90A-NEXT: ;;#ASMEND 3480; GFX90A-NEXT: v_mov_b32_e32 v10, 0 3481; GFX90A-NEXT: v_mov_b32_e32 v4, v6 3482; GFX90A-NEXT: v_mov_b32_e32 v5, v7 3483; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16 3484; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 3485; GFX90A-NEXT: s_waitcnt vmcnt(0) 3486; GFX90A-NEXT: s_setpc_b64 s[30:31] 3487; 3488; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_4_4: 3489; GFX940: ; %bb.0: 3490; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3491; GFX940-NEXT: ;;#ASMSTART 3492; GFX940-NEXT: ; def v[0:5] 3493; GFX940-NEXT: ;;#ASMEND 3494; GFX940-NEXT: v_mov_b32_e32 v10, 0 3495; GFX940-NEXT: ;;#ASMSTART 3496; GFX940-NEXT: ; def v[4:9] 3497; GFX940-NEXT: ;;#ASMEND 3498; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 sc0 sc1 3499; GFX940-NEXT: v_mov_b32_e32 v4, v6 3500; GFX940-NEXT: v_mov_b32_e32 v5, v7 3501; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 3502; GFX940-NEXT: s_waitcnt vmcnt(0) 3503; GFX940-NEXT: s_setpc_b64 s[30:31] 3504 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3505 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3506 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 1, i32 4, i32 4> 3507 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3508 ret void 3509} 3510 3511define void @v_shuffle_v3i64_v3i64__2_4_4(ptr addrspace(1) inreg %ptr) { 3512; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_4_4: 3513; GFX900: ; %bb.0: 3514; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3515; GFX900-NEXT: ;;#ASMSTART 3516; GFX900-NEXT: ; def v[6:11] 3517; GFX900-NEXT: ;;#ASMEND 3518; GFX900-NEXT: v_mov_b32_e32 v12, 0 3519; GFX900-NEXT: ;;#ASMSTART 3520; GFX900-NEXT: ; def v[0:5] 3521; GFX900-NEXT: ;;#ASMEND 3522; GFX900-NEXT: v_mov_b32_e32 v6, v4 3523; GFX900-NEXT: v_mov_b32_e32 v7, v5 3524; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16 3525; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] 3526; GFX900-NEXT: s_waitcnt vmcnt(0) 3527; GFX900-NEXT: s_setpc_b64 s[30:31] 3528; 3529; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_4_4: 3530; GFX90A: ; %bb.0: 3531; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3532; GFX90A-NEXT: ;;#ASMSTART 3533; GFX90A-NEXT: ; def v[6:11] 3534; GFX90A-NEXT: ;;#ASMEND 3535; GFX90A-NEXT: v_mov_b32_e32 v12, 0 3536; GFX90A-NEXT: ;;#ASMSTART 3537; GFX90A-NEXT: ; def v[0:5] 3538; GFX90A-NEXT: ;;#ASMEND 3539; GFX90A-NEXT: v_mov_b32_e32 v6, v4 3540; GFX90A-NEXT: v_mov_b32_e32 v7, v5 3541; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16 3542; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] 3543; GFX90A-NEXT: s_waitcnt vmcnt(0) 3544; GFX90A-NEXT: s_setpc_b64 s[30:31] 3545; 3546; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_4_4: 3547; GFX940: ; %bb.0: 3548; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3549; GFX940-NEXT: ;;#ASMSTART 3550; GFX940-NEXT: ; def v[6:11] 3551; GFX940-NEXT: ;;#ASMEND 3552; GFX940-NEXT: v_mov_b32_e32 v12, 0 3553; GFX940-NEXT: ;;#ASMSTART 3554; GFX940-NEXT: ; def v[0:5] 3555; GFX940-NEXT: ;;#ASMEND 3556; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 sc0 sc1 3557; GFX940-NEXT: v_mov_b32_e32 v6, v4 3558; GFX940-NEXT: v_mov_b32_e32 v7, v5 3559; GFX940-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1 3560; GFX940-NEXT: s_waitcnt vmcnt(0) 3561; GFX940-NEXT: s_setpc_b64 s[30:31] 3562 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3563 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3564 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 2, i32 4, i32 4> 3565 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3566 ret void 3567} 3568 3569define void @v_shuffle_v3i64_v3i64__3_4_4(ptr addrspace(1) inreg %ptr) { 3570; GFX900-LABEL: v_shuffle_v3i64_v3i64__3_4_4: 3571; GFX900: ; %bb.0: 3572; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3573; GFX900-NEXT: v_mov_b32_e32 v6, 0 3574; GFX900-NEXT: ;;#ASMSTART 3575; GFX900-NEXT: ; def v[0:5] 3576; GFX900-NEXT: ;;#ASMEND 3577; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3578; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3579; GFX900-NEXT: s_waitcnt vmcnt(0) 3580; GFX900-NEXT: s_setpc_b64 s[30:31] 3581; 3582; GFX90A-LABEL: v_shuffle_v3i64_v3i64__3_4_4: 3583; GFX90A: ; %bb.0: 3584; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3585; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3586; GFX90A-NEXT: ;;#ASMSTART 3587; GFX90A-NEXT: ; def v[0:5] 3588; GFX90A-NEXT: ;;#ASMEND 3589; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3590; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3591; GFX90A-NEXT: s_waitcnt vmcnt(0) 3592; GFX90A-NEXT: s_setpc_b64 s[30:31] 3593; 3594; GFX940-LABEL: v_shuffle_v3i64_v3i64__3_4_4: 3595; GFX940: ; %bb.0: 3596; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3597; GFX940-NEXT: v_mov_b32_e32 v6, 0 3598; GFX940-NEXT: ;;#ASMSTART 3599; GFX940-NEXT: ; def v[0:5] 3600; GFX940-NEXT: ;;#ASMEND 3601; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 3602; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 3603; GFX940-NEXT: s_waitcnt vmcnt(0) 3604; GFX940-NEXT: s_setpc_b64 s[30:31] 3605 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3606 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3607 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 3, i32 4, i32 4> 3608 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3609 ret void 3610} 3611 3612define void @v_shuffle_v3i64_v3i64__4_4_4(ptr addrspace(1) inreg %ptr) { 3613; GFX900-LABEL: v_shuffle_v3i64_v3i64__4_4_4: 3614; GFX900: ; %bb.0: 3615; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3616; GFX900-NEXT: ;;#ASMSTART 3617; GFX900-NEXT: ; def v[0:5] 3618; GFX900-NEXT: ;;#ASMEND 3619; GFX900-NEXT: v_mov_b32_e32 v6, 0 3620; GFX900-NEXT: v_mov_b32_e32 v4, v2 3621; GFX900-NEXT: v_mov_b32_e32 v5, v3 3622; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3623; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3624; GFX900-NEXT: s_waitcnt vmcnt(0) 3625; GFX900-NEXT: s_setpc_b64 s[30:31] 3626; 3627; GFX90A-LABEL: v_shuffle_v3i64_v3i64__4_4_4: 3628; GFX90A: ; %bb.0: 3629; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3630; GFX90A-NEXT: ;;#ASMSTART 3631; GFX90A-NEXT: ; def v[0:5] 3632; GFX90A-NEXT: ;;#ASMEND 3633; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3634; GFX90A-NEXT: v_mov_b32_e32 v4, v2 3635; GFX90A-NEXT: v_mov_b32_e32 v5, v3 3636; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3637; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3638; GFX90A-NEXT: s_waitcnt vmcnt(0) 3639; GFX90A-NEXT: s_setpc_b64 s[30:31] 3640; 3641; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_4_4: 3642; GFX940: ; %bb.0: 3643; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3644; GFX940-NEXT: ;;#ASMSTART 3645; GFX940-NEXT: ; def v[0:5] 3646; GFX940-NEXT: ;;#ASMEND 3647; GFX940-NEXT: v_mov_b32_e32 v6, 0 3648; GFX940-NEXT: v_mov_b32_e32 v4, v2 3649; GFX940-NEXT: v_mov_b32_e32 v5, v3 3650; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 3651; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 3652; GFX940-NEXT: s_waitcnt vmcnt(0) 3653; GFX940-NEXT: s_setpc_b64 s[30:31] 3654 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3655 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3656 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 4, i32 4> 3657 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3658 ret void 3659} 3660 3661define void @v_shuffle_v3i64_v3i64__5_4_4(ptr addrspace(1) inreg %ptr) { 3662; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_4: 3663; GFX900: ; %bb.0: 3664; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3665; GFX900-NEXT: ;;#ASMSTART 3666; GFX900-NEXT: ; def v[0:5] 3667; GFX900-NEXT: ;;#ASMEND 3668; GFX900-NEXT: v_mov_b32_e32 v6, 0 3669; GFX900-NEXT: v_mov_b32_e32 v0, v4 3670; GFX900-NEXT: v_mov_b32_e32 v1, v5 3671; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3672; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3673; GFX900-NEXT: s_waitcnt vmcnt(0) 3674; GFX900-NEXT: s_setpc_b64 s[30:31] 3675; 3676; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_4: 3677; GFX90A: ; %bb.0: 3678; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3679; GFX90A-NEXT: ;;#ASMSTART 3680; GFX90A-NEXT: ; def v[0:5] 3681; GFX90A-NEXT: ;;#ASMEND 3682; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3683; GFX90A-NEXT: v_mov_b32_e32 v0, v4 3684; GFX90A-NEXT: v_mov_b32_e32 v1, v5 3685; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3686; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3687; GFX90A-NEXT: s_waitcnt vmcnt(0) 3688; GFX90A-NEXT: s_setpc_b64 s[30:31] 3689; 3690; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_4: 3691; GFX940: ; %bb.0: 3692; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3693; GFX940-NEXT: ;;#ASMSTART 3694; GFX940-NEXT: ; def v[0:5] 3695; GFX940-NEXT: ;;#ASMEND 3696; GFX940-NEXT: v_mov_b32_e32 v6, 0 3697; GFX940-NEXT: v_mov_b32_e32 v0, v4 3698; GFX940-NEXT: v_mov_b32_e32 v1, v5 3699; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 3700; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 3701; GFX940-NEXT: s_waitcnt vmcnt(0) 3702; GFX940-NEXT: s_setpc_b64 s[30:31] 3703 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3704 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3705 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4> 3706 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3707 ret void 3708} 3709 3710define void @v_shuffle_v3i64_v3i64__5_u_4(ptr addrspace(1) inreg %ptr) { 3711; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_4: 3712; GFX900: ; %bb.0: 3713; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3714; GFX900-NEXT: ;;#ASMSTART 3715; GFX900-NEXT: ; def v[0:5] 3716; GFX900-NEXT: ;;#ASMEND 3717; GFX900-NEXT: v_mov_b32_e32 v6, 0 3718; GFX900-NEXT: v_mov_b32_e32 v0, v4 3719; GFX900-NEXT: v_mov_b32_e32 v1, v5 3720; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3721; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3722; GFX900-NEXT: s_waitcnt vmcnt(0) 3723; GFX900-NEXT: s_setpc_b64 s[30:31] 3724; 3725; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_4: 3726; GFX90A: ; %bb.0: 3727; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3728; GFX90A-NEXT: ;;#ASMSTART 3729; GFX90A-NEXT: ; def v[0:5] 3730; GFX90A-NEXT: ;;#ASMEND 3731; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3732; GFX90A-NEXT: v_mov_b32_e32 v0, v4 3733; GFX90A-NEXT: v_mov_b32_e32 v1, v5 3734; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3735; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3736; GFX90A-NEXT: s_waitcnt vmcnt(0) 3737; GFX90A-NEXT: s_setpc_b64 s[30:31] 3738; 3739; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_4: 3740; GFX940: ; %bb.0: 3741; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3742; GFX940-NEXT: ;;#ASMSTART 3743; GFX940-NEXT: ; def v[0:5] 3744; GFX940-NEXT: ;;#ASMEND 3745; GFX940-NEXT: v_mov_b32_e32 v6, 0 3746; GFX940-NEXT: v_mov_b32_e32 v0, v4 3747; GFX940-NEXT: v_mov_b32_e32 v1, v5 3748; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 3749; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 3750; GFX940-NEXT: s_waitcnt vmcnt(0) 3751; GFX940-NEXT: s_setpc_b64 s[30:31] 3752 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3753 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3754 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 4> 3755 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3756 ret void 3757} 3758 3759define void @v_shuffle_v3i64_v3i64__5_0_4(ptr addrspace(1) inreg %ptr) { 3760; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_4: 3761; GFX900: ; %bb.0: 3762; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3763; GFX900-NEXT: ;;#ASMSTART 3764; GFX900-NEXT: ; def v[0:5] 3765; GFX900-NEXT: ;;#ASMEND 3766; GFX900-NEXT: v_mov_b32_e32 v8, 0 3767; GFX900-NEXT: ;;#ASMSTART 3768; GFX900-NEXT: ; def v[2:7] 3769; GFX900-NEXT: ;;#ASMEND 3770; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16 3771; GFX900-NEXT: v_mov_b32_e32 v2, v6 3772; GFX900-NEXT: v_mov_b32_e32 v3, v7 3773; GFX900-NEXT: v_mov_b32_e32 v4, v0 3774; GFX900-NEXT: v_mov_b32_e32 v5, v1 3775; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 3776; GFX900-NEXT: s_waitcnt vmcnt(0) 3777; GFX900-NEXT: s_setpc_b64 s[30:31] 3778; 3779; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_4: 3780; GFX90A: ; %bb.0: 3781; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3782; GFX90A-NEXT: ;;#ASMSTART 3783; GFX90A-NEXT: ; def v[0:5] 3784; GFX90A-NEXT: ;;#ASMEND 3785; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3786; GFX90A-NEXT: ;;#ASMSTART 3787; GFX90A-NEXT: ; def v[2:7] 3788; GFX90A-NEXT: ;;#ASMEND 3789; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16 3790; GFX90A-NEXT: v_mov_b32_e32 v2, v6 3791; GFX90A-NEXT: v_mov_b32_e32 v3, v7 3792; GFX90A-NEXT: v_mov_b32_e32 v4, v0 3793; GFX90A-NEXT: v_mov_b32_e32 v5, v1 3794; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 3795; GFX90A-NEXT: s_waitcnt vmcnt(0) 3796; GFX90A-NEXT: s_setpc_b64 s[30:31] 3797; 3798; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_4: 3799; GFX940: ; %bb.0: 3800; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3801; GFX940-NEXT: ;;#ASMSTART 3802; GFX940-NEXT: ; def v[0:5] 3803; GFX940-NEXT: ;;#ASMEND 3804; GFX940-NEXT: v_mov_b32_e32 v8, 0 3805; GFX940-NEXT: ;;#ASMSTART 3806; GFX940-NEXT: ; def v[2:7] 3807; GFX940-NEXT: ;;#ASMEND 3808; GFX940-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16 sc0 sc1 3809; GFX940-NEXT: v_mov_b32_e32 v2, v6 3810; GFX940-NEXT: v_mov_b32_e32 v3, v7 3811; GFX940-NEXT: v_mov_b32_e32 v4, v0 3812; GFX940-NEXT: v_mov_b32_e32 v5, v1 3813; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 3814; GFX940-NEXT: s_waitcnt vmcnt(0) 3815; GFX940-NEXT: s_setpc_b64 s[30:31] 3816 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3817 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3818 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 4> 3819 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3820 ret void 3821} 3822 3823define void @v_shuffle_v3i64_v3i64__5_1_4(ptr addrspace(1) inreg %ptr) { 3824; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_1_4: 3825; GFX900: ; %bb.0: 3826; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3827; GFX900-NEXT: ;;#ASMSTART 3828; GFX900-NEXT: ; def v[0:5] 3829; GFX900-NEXT: ;;#ASMEND 3830; GFX900-NEXT: v_mov_b32_e32 v10, 0 3831; GFX900-NEXT: ;;#ASMSTART 3832; GFX900-NEXT: ; def v[4:9] 3833; GFX900-NEXT: ;;#ASMEND 3834; GFX900-NEXT: v_mov_b32_e32 v0, v8 3835; GFX900-NEXT: v_mov_b32_e32 v1, v9 3836; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16 3837; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 3838; GFX900-NEXT: s_waitcnt vmcnt(0) 3839; GFX900-NEXT: s_setpc_b64 s[30:31] 3840; 3841; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_1_4: 3842; GFX90A: ; %bb.0: 3843; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3844; GFX90A-NEXT: ;;#ASMSTART 3845; GFX90A-NEXT: ; def v[0:5] 3846; GFX90A-NEXT: ;;#ASMEND 3847; GFX90A-NEXT: v_mov_b32_e32 v10, 0 3848; GFX90A-NEXT: ;;#ASMSTART 3849; GFX90A-NEXT: ; def v[4:9] 3850; GFX90A-NEXT: ;;#ASMEND 3851; GFX90A-NEXT: v_mov_b32_e32 v0, v8 3852; GFX90A-NEXT: v_mov_b32_e32 v1, v9 3853; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16 3854; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 3855; GFX90A-NEXT: s_waitcnt vmcnt(0) 3856; GFX90A-NEXT: s_setpc_b64 s[30:31] 3857; 3858; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_4: 3859; GFX940: ; %bb.0: 3860; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3861; GFX940-NEXT: ;;#ASMSTART 3862; GFX940-NEXT: ; def v[0:5] 3863; GFX940-NEXT: ;;#ASMEND 3864; GFX940-NEXT: v_mov_b32_e32 v10, 0 3865; GFX940-NEXT: ;;#ASMSTART 3866; GFX940-NEXT: ; def v[4:9] 3867; GFX940-NEXT: ;;#ASMEND 3868; GFX940-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16 sc0 sc1 3869; GFX940-NEXT: v_mov_b32_e32 v0, v8 3870; GFX940-NEXT: v_mov_b32_e32 v1, v9 3871; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 3872; GFX940-NEXT: s_waitcnt vmcnt(0) 3873; GFX940-NEXT: s_setpc_b64 s[30:31] 3874 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3875 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3876 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 4> 3877 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3878 ret void 3879} 3880 3881define void @v_shuffle_v3i64_v3i64__5_2_4(ptr addrspace(1) inreg %ptr) { 3882; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_2_4: 3883; GFX900: ; %bb.0: 3884; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3885; GFX900-NEXT: ;;#ASMSTART 3886; GFX900-NEXT: ; def v[0:5] 3887; GFX900-NEXT: ;;#ASMEND 3888; GFX900-NEXT: v_mov_b32_e32 v12, 0 3889; GFX900-NEXT: ;;#ASMSTART 3890; GFX900-NEXT: ; def v[6:11] 3891; GFX900-NEXT: ;;#ASMEND 3892; GFX900-NEXT: v_mov_b32_e32 v2, v10 3893; GFX900-NEXT: v_mov_b32_e32 v3, v11 3894; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16 3895; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 3896; GFX900-NEXT: s_waitcnt vmcnt(0) 3897; GFX900-NEXT: s_setpc_b64 s[30:31] 3898; 3899; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_2_4: 3900; GFX90A: ; %bb.0: 3901; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3902; GFX90A-NEXT: ;;#ASMSTART 3903; GFX90A-NEXT: ; def v[0:5] 3904; GFX90A-NEXT: ;;#ASMEND 3905; GFX90A-NEXT: v_mov_b32_e32 v12, 0 3906; GFX90A-NEXT: ;;#ASMSTART 3907; GFX90A-NEXT: ; def v[6:11] 3908; GFX90A-NEXT: ;;#ASMEND 3909; GFX90A-NEXT: v_mov_b32_e32 v2, v10 3910; GFX90A-NEXT: v_mov_b32_e32 v3, v11 3911; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16 3912; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 3913; GFX90A-NEXT: s_waitcnt vmcnt(0) 3914; GFX90A-NEXT: s_setpc_b64 s[30:31] 3915; 3916; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_4: 3917; GFX940: ; %bb.0: 3918; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3919; GFX940-NEXT: ;;#ASMSTART 3920; GFX940-NEXT: ; def v[0:5] 3921; GFX940-NEXT: ;;#ASMEND 3922; GFX940-NEXT: v_mov_b32_e32 v12, 0 3923; GFX940-NEXT: ;;#ASMSTART 3924; GFX940-NEXT: ; def v[6:11] 3925; GFX940-NEXT: ;;#ASMEND 3926; GFX940-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16 sc0 sc1 3927; GFX940-NEXT: v_mov_b32_e32 v2, v10 3928; GFX940-NEXT: v_mov_b32_e32 v3, v11 3929; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 3930; GFX940-NEXT: s_waitcnt vmcnt(0) 3931; GFX940-NEXT: s_setpc_b64 s[30:31] 3932 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3933 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3934 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 4> 3935 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3936 ret void 3937} 3938 3939define void @v_shuffle_v3i64_v3i64__5_3_4(ptr addrspace(1) inreg %ptr) { 3940; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_4: 3941; GFX900: ; %bb.0: 3942; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3943; GFX900-NEXT: v_mov_b32_e32 v6, 0 3944; GFX900-NEXT: ;;#ASMSTART 3945; GFX900-NEXT: ; def v[0:5] 3946; GFX900-NEXT: ;;#ASMEND 3947; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3948; GFX900-NEXT: v_mov_b32_e32 v2, v4 3949; GFX900-NEXT: v_mov_b32_e32 v3, v5 3950; GFX900-NEXT: v_mov_b32_e32 v4, v0 3951; GFX900-NEXT: v_mov_b32_e32 v5, v1 3952; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3953; GFX900-NEXT: s_waitcnt vmcnt(0) 3954; GFX900-NEXT: s_setpc_b64 s[30:31] 3955; 3956; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_4: 3957; GFX90A: ; %bb.0: 3958; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3959; GFX90A-NEXT: v_mov_b32_e32 v6, 0 3960; GFX90A-NEXT: ;;#ASMSTART 3961; GFX90A-NEXT: ; def v[0:5] 3962; GFX90A-NEXT: ;;#ASMEND 3963; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16 3964; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3965; GFX90A-NEXT: v_mov_b32_e32 v3, v5 3966; GFX90A-NEXT: v_mov_b32_e32 v4, v0 3967; GFX90A-NEXT: v_mov_b32_e32 v5, v1 3968; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 3969; GFX90A-NEXT: s_waitcnt vmcnt(0) 3970; GFX90A-NEXT: s_setpc_b64 s[30:31] 3971; 3972; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_4: 3973; GFX940: ; %bb.0: 3974; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3975; GFX940-NEXT: v_mov_b32_e32 v6, 0 3976; GFX940-NEXT: ;;#ASMSTART 3977; GFX940-NEXT: ; def v[0:5] 3978; GFX940-NEXT: ;;#ASMEND 3979; GFX940-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16 sc0 sc1 3980; GFX940-NEXT: v_mov_b32_e32 v2, v4 3981; GFX940-NEXT: v_mov_b32_e32 v3, v5 3982; GFX940-NEXT: v_mov_b32_e32 v4, v0 3983; GFX940-NEXT: v_mov_b32_e32 v5, v1 3984; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 3985; GFX940-NEXT: s_waitcnt vmcnt(0) 3986; GFX940-NEXT: s_setpc_b64 s[30:31] 3987 %vec0 = call <3 x i64> asm "; def $0", "=v"() 3988 %vec1 = call <3 x i64> asm "; def $0", "=v"() 3989 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 4> 3990 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 3991 ret void 3992} 3993 3994define void @v_shuffle_v3i64_v3i64__u_5_5(ptr addrspace(1) inreg %ptr) { 3995; GFX900-LABEL: v_shuffle_v3i64_v3i64__u_5_5: 3996; GFX900: ; %bb.0: 3997; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3998; GFX900-NEXT: v_mov_b32_e32 v6, 0 3999; GFX900-NEXT: ;;#ASMSTART 4000; GFX900-NEXT: ; def v[0:5] 4001; GFX900-NEXT: ;;#ASMEND 4002; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4003; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 4004; GFX900-NEXT: s_waitcnt vmcnt(0) 4005; GFX900-NEXT: s_setpc_b64 s[30:31] 4006; 4007; GFX90A-LABEL: v_shuffle_v3i64_v3i64__u_5_5: 4008; GFX90A: ; %bb.0: 4009; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4010; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4011; GFX90A-NEXT: ;;#ASMSTART 4012; GFX90A-NEXT: ; def v[0:5] 4013; GFX90A-NEXT: ;;#ASMEND 4014; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4015; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 4016; GFX90A-NEXT: s_waitcnt vmcnt(0) 4017; GFX90A-NEXT: s_setpc_b64 s[30:31] 4018; 4019; GFX940-LABEL: v_shuffle_v3i64_v3i64__u_5_5: 4020; GFX940: ; %bb.0: 4021; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4022; GFX940-NEXT: v_mov_b32_e32 v6, 0 4023; GFX940-NEXT: ;;#ASMSTART 4024; GFX940-NEXT: ; def v[0:5] 4025; GFX940-NEXT: ;;#ASMEND 4026; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 4027; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 4028; GFX940-NEXT: s_waitcnt vmcnt(0) 4029; GFX940-NEXT: s_setpc_b64 s[30:31] 4030 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4031 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4032 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 poison, i32 5, i32 5> 4033 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4034 ret void 4035} 4036 4037define void @v_shuffle_v3i64_v3i64__0_5_5(ptr addrspace(1) inreg %ptr) { 4038; GFX900-LABEL: v_shuffle_v3i64_v3i64__0_5_5: 4039; GFX900: ; %bb.0: 4040; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4041; GFX900-NEXT: ;;#ASMSTART 4042; GFX900-NEXT: ; def v[0:5] 4043; GFX900-NEXT: ;;#ASMEND 4044; GFX900-NEXT: ;;#ASMSTART 4045; GFX900-NEXT: ; def v[2:7] 4046; GFX900-NEXT: ;;#ASMEND 4047; GFX900-NEXT: v_mov_b32_e32 v8, 0 4048; GFX900-NEXT: v_mov_b32_e32 v2, v6 4049; GFX900-NEXT: v_mov_b32_e32 v3, v7 4050; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16 4051; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 4052; GFX900-NEXT: s_waitcnt vmcnt(0) 4053; GFX900-NEXT: s_setpc_b64 s[30:31] 4054; 4055; GFX90A-LABEL: v_shuffle_v3i64_v3i64__0_5_5: 4056; GFX90A: ; %bb.0: 4057; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4058; GFX90A-NEXT: ;;#ASMSTART 4059; GFX90A-NEXT: ; def v[0:5] 4060; GFX90A-NEXT: ;;#ASMEND 4061; GFX90A-NEXT: ;;#ASMSTART 4062; GFX90A-NEXT: ; def v[2:7] 4063; GFX90A-NEXT: ;;#ASMEND 4064; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4065; GFX90A-NEXT: v_mov_b32_e32 v2, v6 4066; GFX90A-NEXT: v_mov_b32_e32 v3, v7 4067; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16 4068; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 4069; GFX90A-NEXT: s_waitcnt vmcnt(0) 4070; GFX90A-NEXT: s_setpc_b64 s[30:31] 4071; 4072; GFX940-LABEL: v_shuffle_v3i64_v3i64__0_5_5: 4073; GFX940: ; %bb.0: 4074; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4075; GFX940-NEXT: ;;#ASMSTART 4076; GFX940-NEXT: ; def v[0:5] 4077; GFX940-NEXT: ;;#ASMEND 4078; GFX940-NEXT: v_mov_b32_e32 v8, 0 4079; GFX940-NEXT: ;;#ASMSTART 4080; GFX940-NEXT: ; def v[2:7] 4081; GFX940-NEXT: ;;#ASMEND 4082; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 4083; GFX940-NEXT: v_mov_b32_e32 v2, v6 4084; GFX940-NEXT: v_mov_b32_e32 v3, v7 4085; GFX940-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1 4086; GFX940-NEXT: s_waitcnt vmcnt(0) 4087; GFX940-NEXT: s_setpc_b64 s[30:31] 4088 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4089 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4090 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 0, i32 5, i32 5> 4091 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4092 ret void 4093} 4094 4095define void @v_shuffle_v3i64_v3i64__1_5_5(ptr addrspace(1) inreg %ptr) { 4096; GFX900-LABEL: v_shuffle_v3i64_v3i64__1_5_5: 4097; GFX900: ; %bb.0: 4098; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4099; GFX900-NEXT: ;;#ASMSTART 4100; GFX900-NEXT: ; def v[0:5] 4101; GFX900-NEXT: ;;#ASMEND 4102; GFX900-NEXT: ;;#ASMSTART 4103; GFX900-NEXT: ; def v[4:9] 4104; GFX900-NEXT: ;;#ASMEND 4105; GFX900-NEXT: v_mov_b32_e32 v10, 0 4106; GFX900-NEXT: v_mov_b32_e32 v4, v8 4107; GFX900-NEXT: v_mov_b32_e32 v5, v9 4108; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16 4109; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 4110; GFX900-NEXT: s_waitcnt vmcnt(0) 4111; GFX900-NEXT: s_setpc_b64 s[30:31] 4112; 4113; GFX90A-LABEL: v_shuffle_v3i64_v3i64__1_5_5: 4114; GFX90A: ; %bb.0: 4115; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4116; GFX90A-NEXT: ;;#ASMSTART 4117; GFX90A-NEXT: ; def v[0:5] 4118; GFX90A-NEXT: ;;#ASMEND 4119; GFX90A-NEXT: ;;#ASMSTART 4120; GFX90A-NEXT: ; def v[4:9] 4121; GFX90A-NEXT: ;;#ASMEND 4122; GFX90A-NEXT: v_mov_b32_e32 v10, 0 4123; GFX90A-NEXT: v_mov_b32_e32 v4, v8 4124; GFX90A-NEXT: v_mov_b32_e32 v5, v9 4125; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16 4126; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] 4127; GFX90A-NEXT: s_waitcnt vmcnt(0) 4128; GFX90A-NEXT: s_setpc_b64 s[30:31] 4129; 4130; GFX940-LABEL: v_shuffle_v3i64_v3i64__1_5_5: 4131; GFX940: ; %bb.0: 4132; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4133; GFX940-NEXT: ;;#ASMSTART 4134; GFX940-NEXT: ; def v[0:5] 4135; GFX940-NEXT: ;;#ASMEND 4136; GFX940-NEXT: v_mov_b32_e32 v10, 0 4137; GFX940-NEXT: ;;#ASMSTART 4138; GFX940-NEXT: ; def v[4:9] 4139; GFX940-NEXT: ;;#ASMEND 4140; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 sc0 sc1 4141; GFX940-NEXT: v_mov_b32_e32 v4, v8 4142; GFX940-NEXT: v_mov_b32_e32 v5, v9 4143; GFX940-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1 4144; GFX940-NEXT: s_waitcnt vmcnt(0) 4145; GFX940-NEXT: s_setpc_b64 s[30:31] 4146 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4147 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4148 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 1, i32 5, i32 5> 4149 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4150 ret void 4151} 4152 4153define void @v_shuffle_v3i64_v3i64__2_5_5(ptr addrspace(1) inreg %ptr) { 4154; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_5_5: 4155; GFX900: ; %bb.0: 4156; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4157; GFX900-NEXT: ;;#ASMSTART 4158; GFX900-NEXT: ; def v[6:11] 4159; GFX900-NEXT: ;;#ASMEND 4160; GFX900-NEXT: v_mov_b32_e32 v12, 0 4161; GFX900-NEXT: ;;#ASMSTART 4162; GFX900-NEXT: ; def v[0:5] 4163; GFX900-NEXT: ;;#ASMEND 4164; GFX900-NEXT: v_mov_b32_e32 v8, v4 4165; GFX900-NEXT: v_mov_b32_e32 v9, v5 4166; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16 4167; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4168; GFX900-NEXT: s_waitcnt vmcnt(0) 4169; GFX900-NEXT: s_setpc_b64 s[30:31] 4170; 4171; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_5_5: 4172; GFX90A: ; %bb.0: 4173; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4174; GFX90A-NEXT: ;;#ASMSTART 4175; GFX90A-NEXT: ; def v[6:11] 4176; GFX90A-NEXT: ;;#ASMEND 4177; GFX90A-NEXT: v_mov_b32_e32 v12, 0 4178; GFX90A-NEXT: ;;#ASMSTART 4179; GFX90A-NEXT: ; def v[0:5] 4180; GFX90A-NEXT: ;;#ASMEND 4181; GFX90A-NEXT: v_mov_b32_e32 v8, v4 4182; GFX90A-NEXT: v_mov_b32_e32 v9, v5 4183; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16 4184; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] 4185; GFX90A-NEXT: s_waitcnt vmcnt(0) 4186; GFX90A-NEXT: s_setpc_b64 s[30:31] 4187; 4188; GFX940-LABEL: v_shuffle_v3i64_v3i64__2_5_5: 4189; GFX940: ; %bb.0: 4190; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4191; GFX940-NEXT: ;;#ASMSTART 4192; GFX940-NEXT: ; def v[6:11] 4193; GFX940-NEXT: ;;#ASMEND 4194; GFX940-NEXT: v_mov_b32_e32 v12, 0 4195; GFX940-NEXT: ;;#ASMSTART 4196; GFX940-NEXT: ; def v[0:5] 4197; GFX940-NEXT: ;;#ASMEND 4198; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 sc0 sc1 4199; GFX940-NEXT: v_mov_b32_e32 v8, v4 4200; GFX940-NEXT: v_mov_b32_e32 v9, v5 4201; GFX940-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1 4202; GFX940-NEXT: s_waitcnt vmcnt(0) 4203; GFX940-NEXT: s_setpc_b64 s[30:31] 4204 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4205 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4206 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 2, i32 5, i32 5> 4207 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4208 ret void 4209} 4210 4211define void @v_shuffle_v3i64_v3i64__3_5_5(ptr addrspace(1) inreg %ptr) { 4212; GFX900-LABEL: v_shuffle_v3i64_v3i64__3_5_5: 4213; GFX900: ; %bb.0: 4214; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4215; GFX900-NEXT: ;;#ASMSTART 4216; GFX900-NEXT: ; def v[0:5] 4217; GFX900-NEXT: ;;#ASMEND 4218; GFX900-NEXT: v_mov_b32_e32 v6, 0 4219; GFX900-NEXT: v_mov_b32_e32 v2, v4 4220; GFX900-NEXT: v_mov_b32_e32 v3, v5 4221; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4222; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4223; GFX900-NEXT: s_waitcnt vmcnt(0) 4224; GFX900-NEXT: s_setpc_b64 s[30:31] 4225; 4226; GFX90A-LABEL: v_shuffle_v3i64_v3i64__3_5_5: 4227; GFX90A: ; %bb.0: 4228; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4229; GFX90A-NEXT: ;;#ASMSTART 4230; GFX90A-NEXT: ; def v[0:5] 4231; GFX90A-NEXT: ;;#ASMEND 4232; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4233; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4234; GFX90A-NEXT: v_mov_b32_e32 v3, v5 4235; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4236; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4237; GFX90A-NEXT: s_waitcnt vmcnt(0) 4238; GFX90A-NEXT: s_setpc_b64 s[30:31] 4239; 4240; GFX940-LABEL: v_shuffle_v3i64_v3i64__3_5_5: 4241; GFX940: ; %bb.0: 4242; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4243; GFX940-NEXT: ;;#ASMSTART 4244; GFX940-NEXT: ; def v[0:5] 4245; GFX940-NEXT: ;;#ASMEND 4246; GFX940-NEXT: v_mov_b32_e32 v6, 0 4247; GFX940-NEXT: v_mov_b32_e32 v2, v4 4248; GFX940-NEXT: v_mov_b32_e32 v3, v5 4249; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 4250; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 4251; GFX940-NEXT: s_waitcnt vmcnt(0) 4252; GFX940-NEXT: s_setpc_b64 s[30:31] 4253 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4254 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4255 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 3, i32 5, i32 5> 4256 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4257 ret void 4258} 4259 4260define void @v_shuffle_v3i64_v3i64__4_5_5(ptr addrspace(1) inreg %ptr) { 4261; GFX900-LABEL: v_shuffle_v3i64_v3i64__4_5_5: 4262; GFX900: ; %bb.0: 4263; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4264; GFX900-NEXT: v_mov_b32_e32 v6, 0 4265; GFX900-NEXT: ;;#ASMSTART 4266; GFX900-NEXT: ; def v[0:5] 4267; GFX900-NEXT: ;;#ASMEND 4268; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4269; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 4270; GFX900-NEXT: s_waitcnt vmcnt(0) 4271; GFX900-NEXT: s_setpc_b64 s[30:31] 4272; 4273; GFX90A-LABEL: v_shuffle_v3i64_v3i64__4_5_5: 4274; GFX90A: ; %bb.0: 4275; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4276; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4277; GFX90A-NEXT: ;;#ASMSTART 4278; GFX90A-NEXT: ; def v[0:5] 4279; GFX90A-NEXT: ;;#ASMEND 4280; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4281; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 4282; GFX90A-NEXT: s_waitcnt vmcnt(0) 4283; GFX90A-NEXT: s_setpc_b64 s[30:31] 4284; 4285; GFX940-LABEL: v_shuffle_v3i64_v3i64__4_5_5: 4286; GFX940: ; %bb.0: 4287; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4288; GFX940-NEXT: v_mov_b32_e32 v6, 0 4289; GFX940-NEXT: ;;#ASMSTART 4290; GFX940-NEXT: ; def v[0:5] 4291; GFX940-NEXT: ;;#ASMEND 4292; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 4293; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 4294; GFX940-NEXT: s_waitcnt vmcnt(0) 4295; GFX940-NEXT: s_setpc_b64 s[30:31] 4296 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4297 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4298 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 5, i32 5> 4299 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4300 ret void 4301} 4302 4303define void @v_shuffle_v3i64_v3i64__5_u_5(ptr addrspace(1) inreg %ptr) { 4304; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_5: 4305; GFX900: ; %bb.0: 4306; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4307; GFX900-NEXT: ;;#ASMSTART 4308; GFX900-NEXT: ; def v[0:5] 4309; GFX900-NEXT: ;;#ASMEND 4310; GFX900-NEXT: v_mov_b32_e32 v6, 0 4311; GFX900-NEXT: v_mov_b32_e32 v0, v4 4312; GFX900-NEXT: v_mov_b32_e32 v1, v5 4313; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4314; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4315; GFX900-NEXT: s_waitcnt vmcnt(0) 4316; GFX900-NEXT: s_setpc_b64 s[30:31] 4317; 4318; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_5: 4319; GFX90A: ; %bb.0: 4320; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4321; GFX90A-NEXT: ;;#ASMSTART 4322; GFX90A-NEXT: ; def v[0:5] 4323; GFX90A-NEXT: ;;#ASMEND 4324; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4325; GFX90A-NEXT: v_mov_b32_e32 v0, v4 4326; GFX90A-NEXT: v_mov_b32_e32 v1, v5 4327; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4328; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4329; GFX90A-NEXT: s_waitcnt vmcnt(0) 4330; GFX90A-NEXT: s_setpc_b64 s[30:31] 4331; 4332; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_u_5: 4333; GFX940: ; %bb.0: 4334; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4335; GFX940-NEXT: ;;#ASMSTART 4336; GFX940-NEXT: ; def v[0:5] 4337; GFX940-NEXT: ;;#ASMEND 4338; GFX940-NEXT: v_mov_b32_e32 v6, 0 4339; GFX940-NEXT: v_mov_b32_e32 v0, v4 4340; GFX940-NEXT: v_mov_b32_e32 v1, v5 4341; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 4342; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 4343; GFX940-NEXT: s_waitcnt vmcnt(0) 4344; GFX940-NEXT: s_setpc_b64 s[30:31] 4345 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4346 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4347 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 5> 4348 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4349 ret void 4350} 4351 4352define void @v_shuffle_v3i64_v3i64__5_0_5(ptr addrspace(1) inreg %ptr) { 4353; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_5: 4354; GFX900: ; %bb.0: 4355; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4356; GFX900-NEXT: ;;#ASMSTART 4357; GFX900-NEXT: ; def v[0:5] 4358; GFX900-NEXT: ;;#ASMEND 4359; GFX900-NEXT: ;;#ASMSTART 4360; GFX900-NEXT: ; def v[2:7] 4361; GFX900-NEXT: ;;#ASMEND 4362; GFX900-NEXT: v_mov_b32_e32 v8, 0 4363; GFX900-NEXT: v_mov_b32_e32 v2, v6 4364; GFX900-NEXT: v_mov_b32_e32 v3, v7 4365; GFX900-NEXT: v_mov_b32_e32 v4, v0 4366; GFX900-NEXT: v_mov_b32_e32 v5, v1 4367; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16 4368; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 4369; GFX900-NEXT: s_waitcnt vmcnt(0) 4370; GFX900-NEXT: s_setpc_b64 s[30:31] 4371; 4372; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_5: 4373; GFX90A: ; %bb.0: 4374; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4375; GFX90A-NEXT: ;;#ASMSTART 4376; GFX90A-NEXT: ; def v[0:5] 4377; GFX90A-NEXT: ;;#ASMEND 4378; GFX90A-NEXT: ;;#ASMSTART 4379; GFX90A-NEXT: ; def v[2:7] 4380; GFX90A-NEXT: ;;#ASMEND 4381; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4382; GFX90A-NEXT: v_mov_b32_e32 v2, v6 4383; GFX90A-NEXT: v_mov_b32_e32 v3, v7 4384; GFX90A-NEXT: v_mov_b32_e32 v4, v0 4385; GFX90A-NEXT: v_mov_b32_e32 v5, v1 4386; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16 4387; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] 4388; GFX90A-NEXT: s_waitcnt vmcnt(0) 4389; GFX90A-NEXT: s_setpc_b64 s[30:31] 4390; 4391; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_0_5: 4392; GFX940: ; %bb.0: 4393; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4394; GFX940-NEXT: ;;#ASMSTART 4395; GFX940-NEXT: ; def v[0:5] 4396; GFX940-NEXT: ;;#ASMEND 4397; GFX940-NEXT: v_mov_b32_e32 v8, 0 4398; GFX940-NEXT: ;;#ASMSTART 4399; GFX940-NEXT: ; def v[2:7] 4400; GFX940-NEXT: ;;#ASMEND 4401; GFX940-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16 sc0 sc1 4402; GFX940-NEXT: v_mov_b32_e32 v2, v6 4403; GFX940-NEXT: v_mov_b32_e32 v3, v7 4404; GFX940-NEXT: v_mov_b32_e32 v4, v0 4405; GFX940-NEXT: v_mov_b32_e32 v5, v1 4406; GFX940-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1 4407; GFX940-NEXT: s_waitcnt vmcnt(0) 4408; GFX940-NEXT: s_setpc_b64 s[30:31] 4409 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4410 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4411 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 5> 4412 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4413 ret void 4414} 4415 4416define void @v_shuffle_v3i64_v3i64__5_1_5(ptr addrspace(1) inreg %ptr) { 4417; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_1_5: 4418; GFX900: ; %bb.0: 4419; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4420; GFX900-NEXT: ;;#ASMSTART 4421; GFX900-NEXT: ; def v[0:5] 4422; GFX900-NEXT: ;;#ASMEND 4423; GFX900-NEXT: v_mov_b32_e32 v10, 0 4424; GFX900-NEXT: ;;#ASMSTART 4425; GFX900-NEXT: ; def v[4:9] 4426; GFX900-NEXT: ;;#ASMEND 4427; GFX900-NEXT: v_mov_b32_e32 v0, v8 4428; GFX900-NEXT: v_mov_b32_e32 v1, v9 4429; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16 4430; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 4431; GFX900-NEXT: s_waitcnt vmcnt(0) 4432; GFX900-NEXT: s_setpc_b64 s[30:31] 4433; 4434; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_1_5: 4435; GFX90A: ; %bb.0: 4436; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4437; GFX90A-NEXT: ;;#ASMSTART 4438; GFX90A-NEXT: ; def v[0:5] 4439; GFX90A-NEXT: ;;#ASMEND 4440; GFX90A-NEXT: v_mov_b32_e32 v10, 0 4441; GFX90A-NEXT: ;;#ASMSTART 4442; GFX90A-NEXT: ; def v[4:9] 4443; GFX90A-NEXT: ;;#ASMEND 4444; GFX90A-NEXT: v_mov_b32_e32 v0, v8 4445; GFX90A-NEXT: v_mov_b32_e32 v1, v9 4446; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16 4447; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] 4448; GFX90A-NEXT: s_waitcnt vmcnt(0) 4449; GFX90A-NEXT: s_setpc_b64 s[30:31] 4450; 4451; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_1_5: 4452; GFX940: ; %bb.0: 4453; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4454; GFX940-NEXT: ;;#ASMSTART 4455; GFX940-NEXT: ; def v[0:5] 4456; GFX940-NEXT: ;;#ASMEND 4457; GFX940-NEXT: v_mov_b32_e32 v10, 0 4458; GFX940-NEXT: ;;#ASMSTART 4459; GFX940-NEXT: ; def v[4:9] 4460; GFX940-NEXT: ;;#ASMEND 4461; GFX940-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16 sc0 sc1 4462; GFX940-NEXT: v_mov_b32_e32 v0, v8 4463; GFX940-NEXT: v_mov_b32_e32 v1, v9 4464; GFX940-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1 4465; GFX940-NEXT: s_waitcnt vmcnt(0) 4466; GFX940-NEXT: s_setpc_b64 s[30:31] 4467 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4468 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4469 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 5> 4470 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4471 ret void 4472} 4473 4474define void @v_shuffle_v3i64_v3i64__5_2_5(ptr addrspace(1) inreg %ptr) { 4475; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_2_5: 4476; GFX900: ; %bb.0: 4477; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4478; GFX900-NEXT: ;;#ASMSTART 4479; GFX900-NEXT: ; def v[0:5] 4480; GFX900-NEXT: ;;#ASMEND 4481; GFX900-NEXT: v_mov_b32_e32 v12, 0 4482; GFX900-NEXT: ;;#ASMSTART 4483; GFX900-NEXT: ; def v[6:11] 4484; GFX900-NEXT: ;;#ASMEND 4485; GFX900-NEXT: v_mov_b32_e32 v2, v10 4486; GFX900-NEXT: v_mov_b32_e32 v3, v11 4487; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16 4488; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 4489; GFX900-NEXT: s_waitcnt vmcnt(0) 4490; GFX900-NEXT: s_setpc_b64 s[30:31] 4491; 4492; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_2_5: 4493; GFX90A: ; %bb.0: 4494; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4495; GFX90A-NEXT: ;;#ASMSTART 4496; GFX90A-NEXT: ; def v[0:5] 4497; GFX90A-NEXT: ;;#ASMEND 4498; GFX90A-NEXT: v_mov_b32_e32 v12, 0 4499; GFX90A-NEXT: ;;#ASMSTART 4500; GFX90A-NEXT: ; def v[6:11] 4501; GFX90A-NEXT: ;;#ASMEND 4502; GFX90A-NEXT: v_mov_b32_e32 v2, v10 4503; GFX90A-NEXT: v_mov_b32_e32 v3, v11 4504; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16 4505; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] 4506; GFX90A-NEXT: s_waitcnt vmcnt(0) 4507; GFX90A-NEXT: s_setpc_b64 s[30:31] 4508; 4509; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_2_5: 4510; GFX940: ; %bb.0: 4511; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4512; GFX940-NEXT: ;;#ASMSTART 4513; GFX940-NEXT: ; def v[0:5] 4514; GFX940-NEXT: ;;#ASMEND 4515; GFX940-NEXT: v_mov_b32_e32 v12, 0 4516; GFX940-NEXT: ;;#ASMSTART 4517; GFX940-NEXT: ; def v[6:11] 4518; GFX940-NEXT: ;;#ASMEND 4519; GFX940-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16 sc0 sc1 4520; GFX940-NEXT: v_mov_b32_e32 v2, v10 4521; GFX940-NEXT: v_mov_b32_e32 v3, v11 4522; GFX940-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1 4523; GFX940-NEXT: s_waitcnt vmcnt(0) 4524; GFX940-NEXT: s_setpc_b64 s[30:31] 4525 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4526 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4527 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 5> 4528 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4529 ret void 4530} 4531 4532define void @v_shuffle_v3i64_v3i64__5_3_5(ptr addrspace(1) inreg %ptr) { 4533; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_5: 4534; GFX900: ; %bb.0: 4535; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4536; GFX900-NEXT: v_mov_b32_e32 v6, 0 4537; GFX900-NEXT: ;;#ASMSTART 4538; GFX900-NEXT: ; def v[0:5] 4539; GFX900-NEXT: ;;#ASMEND 4540; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4541; GFX900-NEXT: v_mov_b32_e32 v2, v4 4542; GFX900-NEXT: v_mov_b32_e32 v3, v5 4543; GFX900-NEXT: v_mov_b32_e32 v4, v0 4544; GFX900-NEXT: v_mov_b32_e32 v5, v1 4545; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 4546; GFX900-NEXT: s_waitcnt vmcnt(0) 4547; GFX900-NEXT: s_setpc_b64 s[30:31] 4548; 4549; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_5: 4550; GFX90A: ; %bb.0: 4551; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4552; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4553; GFX90A-NEXT: ;;#ASMSTART 4554; GFX90A-NEXT: ; def v[0:5] 4555; GFX90A-NEXT: ;;#ASMEND 4556; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4557; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4558; GFX90A-NEXT: v_mov_b32_e32 v3, v5 4559; GFX90A-NEXT: v_mov_b32_e32 v4, v0 4560; GFX90A-NEXT: v_mov_b32_e32 v5, v1 4561; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] 4562; GFX90A-NEXT: s_waitcnt vmcnt(0) 4563; GFX90A-NEXT: s_setpc_b64 s[30:31] 4564; 4565; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_3_5: 4566; GFX940: ; %bb.0: 4567; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4568; GFX940-NEXT: v_mov_b32_e32 v6, 0 4569; GFX940-NEXT: ;;#ASMSTART 4570; GFX940-NEXT: ; def v[0:5] 4571; GFX940-NEXT: ;;#ASMEND 4572; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 4573; GFX940-NEXT: v_mov_b32_e32 v2, v4 4574; GFX940-NEXT: v_mov_b32_e32 v3, v5 4575; GFX940-NEXT: v_mov_b32_e32 v4, v0 4576; GFX940-NEXT: v_mov_b32_e32 v5, v1 4577; GFX940-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1 4578; GFX940-NEXT: s_waitcnt vmcnt(0) 4579; GFX940-NEXT: s_setpc_b64 s[30:31] 4580 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4581 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4582 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 5> 4583 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4584 ret void 4585} 4586 4587define void @v_shuffle_v3i64_v3i64__5_4_5(ptr addrspace(1) inreg %ptr) { 4588; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_5: 4589; GFX900: ; %bb.0: 4590; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4591; GFX900-NEXT: ;;#ASMSTART 4592; GFX900-NEXT: ; def v[0:5] 4593; GFX900-NEXT: ;;#ASMEND 4594; GFX900-NEXT: v_mov_b32_e32 v6, 0 4595; GFX900-NEXT: v_mov_b32_e32 v0, v4 4596; GFX900-NEXT: v_mov_b32_e32 v1, v5 4597; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4598; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4599; GFX900-NEXT: s_waitcnt vmcnt(0) 4600; GFX900-NEXT: s_setpc_b64 s[30:31] 4601; 4602; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_5: 4603; GFX90A: ; %bb.0: 4604; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4605; GFX90A-NEXT: ;;#ASMSTART 4606; GFX90A-NEXT: ; def v[0:5] 4607; GFX90A-NEXT: ;;#ASMEND 4608; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4609; GFX90A-NEXT: v_mov_b32_e32 v0, v4 4610; GFX90A-NEXT: v_mov_b32_e32 v1, v5 4611; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16 4612; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4613; GFX90A-NEXT: s_waitcnt vmcnt(0) 4614; GFX90A-NEXT: s_setpc_b64 s[30:31] 4615; 4616; GFX940-LABEL: v_shuffle_v3i64_v3i64__5_4_5: 4617; GFX940: ; %bb.0: 4618; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4619; GFX940-NEXT: ;;#ASMSTART 4620; GFX940-NEXT: ; def v[0:5] 4621; GFX940-NEXT: ;;#ASMEND 4622; GFX940-NEXT: v_mov_b32_e32 v6, 0 4623; GFX940-NEXT: v_mov_b32_e32 v0, v4 4624; GFX940-NEXT: v_mov_b32_e32 v1, v5 4625; GFX940-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16 sc0 sc1 4626; GFX940-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1 4627; GFX940-NEXT: s_waitcnt vmcnt(0) 4628; GFX940-NEXT: s_setpc_b64 s[30:31] 4629 %vec0 = call <3 x i64> asm "; def $0", "=v"() 4630 %vec1 = call <3 x i64> asm "; def $0", "=v"() 4631 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 5> 4632 store <3 x i64> %shuf, ptr addrspace(1) %ptr, align 32 4633 ret void 4634} 4635 4636define void @s_shuffle_v3i64_v3i64__u_u_u() { 4637; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_u_u: 4638; GFX9: ; %bb.0: 4639; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4640; GFX9-NEXT: ;;#ASMSTART 4641; GFX9-NEXT: ; use s[8:13] 4642; GFX9-NEXT: ;;#ASMEND 4643; GFX9-NEXT: s_setpc_b64 s[30:31] 4644 %vec0 = call <3 x i64> asm "; def $0", "=s"() 4645 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> poison 4646 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 4647 ret void 4648} 4649 4650define void @s_shuffle_v3i64_v3i64__0_u_u() { 4651; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_u_u: 4652; GFX900: ; %bb.0: 4653; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4654; GFX900-NEXT: ;;#ASMSTART 4655; GFX900-NEXT: ; def s[8:13] 4656; GFX900-NEXT: ;;#ASMEND 4657; GFX900-NEXT: ;;#ASMSTART 4658; GFX900-NEXT: ; use s[8:13] 4659; GFX900-NEXT: ;;#ASMEND 4660; GFX900-NEXT: s_setpc_b64 s[30:31] 4661; 4662; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_u_u: 4663; GFX90A: ; %bb.0: 4664; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4665; GFX90A-NEXT: ;;#ASMSTART 4666; GFX90A-NEXT: ; def s[8:13] 4667; GFX90A-NEXT: ;;#ASMEND 4668; GFX90A-NEXT: ;;#ASMSTART 4669; GFX90A-NEXT: ; use s[8:13] 4670; GFX90A-NEXT: ;;#ASMEND 4671; GFX90A-NEXT: s_setpc_b64 s[30:31] 4672; 4673; GFX940-LABEL: s_shuffle_v3i64_v3i64__0_u_u: 4674; GFX940: ; %bb.0: 4675; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4676; GFX940-NEXT: ;;#ASMSTART 4677; GFX940-NEXT: ; def s[8:13] 4678; GFX940-NEXT: ;;#ASMEND 4679; GFX940-NEXT: s_nop 0 4680; GFX940-NEXT: ;;#ASMSTART 4681; GFX940-NEXT: ; use s[8:13] 4682; GFX940-NEXT: ;;#ASMEND 4683; GFX940-NEXT: s_setpc_b64 s[30:31] 4684 %vec0 = call <3 x i64> asm "; def $0", "=s"() 4685 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 poison, i32 poison> 4686 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 4687 ret void 4688} 4689 4690define void @s_shuffle_v3i64_v3i64__1_u_u() { 4691; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_u_u: 4692; GFX900: ; %bb.0: 4693; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4694; GFX900-NEXT: ;;#ASMSTART 4695; GFX900-NEXT: ; def s[4:9] 4696; GFX900-NEXT: ;;#ASMEND 4697; GFX900-NEXT: s_mov_b32 s8, s6 4698; GFX900-NEXT: s_mov_b32 s9, s7 4699; GFX900-NEXT: ;;#ASMSTART 4700; GFX900-NEXT: ; use s[8:13] 4701; GFX900-NEXT: ;;#ASMEND 4702; GFX900-NEXT: s_setpc_b64 s[30:31] 4703; 4704; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_u_u: 4705; GFX90A: ; %bb.0: 4706; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4707; GFX90A-NEXT: ;;#ASMSTART 4708; GFX90A-NEXT: ; def s[4:9] 4709; GFX90A-NEXT: ;;#ASMEND 4710; GFX90A-NEXT: s_mov_b32 s8, s6 4711; GFX90A-NEXT: s_mov_b32 s9, s7 4712; GFX90A-NEXT: ;;#ASMSTART 4713; GFX90A-NEXT: ; use s[8:13] 4714; GFX90A-NEXT: ;;#ASMEND 4715; GFX90A-NEXT: s_setpc_b64 s[30:31] 4716; 4717; GFX940-LABEL: s_shuffle_v3i64_v3i64__1_u_u: 4718; GFX940: ; %bb.0: 4719; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4720; GFX940-NEXT: ;;#ASMSTART 4721; GFX940-NEXT: ; def s[0:5] 4722; GFX940-NEXT: ;;#ASMEND 4723; GFX940-NEXT: s_mov_b32 s8, s2 4724; GFX940-NEXT: s_mov_b32 s9, s3 4725; GFX940-NEXT: ;;#ASMSTART 4726; GFX940-NEXT: ; use s[8:13] 4727; GFX940-NEXT: ;;#ASMEND 4728; GFX940-NEXT: s_setpc_b64 s[30:31] 4729 %vec0 = call <3 x i64> asm "; def $0", "=s"() 4730 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 poison, i32 poison> 4731 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 4732 ret void 4733} 4734 4735define void @s_shuffle_v3i64_v3i64__2_u_u() { 4736; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_u_u: 4737; GFX900: ; %bb.0: 4738; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4739; GFX900-NEXT: ;;#ASMSTART 4740; GFX900-NEXT: ; def s[4:9] 4741; GFX900-NEXT: ;;#ASMEND 4742; GFX900-NEXT: ;;#ASMSTART 4743; GFX900-NEXT: ; use s[8:13] 4744; GFX900-NEXT: ;;#ASMEND 4745; GFX900-NEXT: s_setpc_b64 s[30:31] 4746; 4747; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_u_u: 4748; GFX90A: ; %bb.0: 4749; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4750; GFX90A-NEXT: ;;#ASMSTART 4751; GFX90A-NEXT: ; def s[4:9] 4752; GFX90A-NEXT: ;;#ASMEND 4753; GFX90A-NEXT: ;;#ASMSTART 4754; GFX90A-NEXT: ; use s[8:13] 4755; GFX90A-NEXT: ;;#ASMEND 4756; GFX90A-NEXT: s_setpc_b64 s[30:31] 4757; 4758; GFX940-LABEL: s_shuffle_v3i64_v3i64__2_u_u: 4759; GFX940: ; %bb.0: 4760; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4761; GFX940-NEXT: ;;#ASMSTART 4762; GFX940-NEXT: ; def s[0:5] 4763; GFX940-NEXT: ;;#ASMEND 4764; GFX940-NEXT: s_mov_b32 s8, s4 4765; GFX940-NEXT: s_mov_b32 s9, s5 4766; GFX940-NEXT: ;;#ASMSTART 4767; GFX940-NEXT: ; use s[8:13] 4768; GFX940-NEXT: ;;#ASMEND 4769; GFX940-NEXT: s_setpc_b64 s[30:31] 4770 %vec0 = call <3 x i64> asm "; def $0", "=s"() 4771 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 poison, i32 poison> 4772 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 4773 ret void 4774} 4775 4776define void @s_shuffle_v3i64_v3i64__3_u_u() { 4777; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_u_u: 4778; GFX9: ; %bb.0: 4779; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4780; GFX9-NEXT: ;;#ASMSTART 4781; GFX9-NEXT: ; use s[8:13] 4782; GFX9-NEXT: ;;#ASMEND 4783; GFX9-NEXT: s_setpc_b64 s[30:31] 4784 %vec0 = call <3 x i64> asm "; def $0", "=s"() 4785 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 poison, i32 poison> 4786 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 4787 ret void 4788} 4789 4790define void @s_shuffle_v3i64_v3i64__4_u_u() { 4791; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_u_u: 4792; GFX900: ; %bb.0: 4793; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4794; GFX900-NEXT: ;;#ASMSTART 4795; GFX900-NEXT: ; def s[4:9] 4796; GFX900-NEXT: ;;#ASMEND 4797; GFX900-NEXT: s_mov_b32 s8, s6 4798; GFX900-NEXT: s_mov_b32 s9, s7 4799; GFX900-NEXT: ;;#ASMSTART 4800; GFX900-NEXT: ; use s[8:13] 4801; GFX900-NEXT: ;;#ASMEND 4802; GFX900-NEXT: s_setpc_b64 s[30:31] 4803; 4804; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_u_u: 4805; GFX90A: ; %bb.0: 4806; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4807; GFX90A-NEXT: ;;#ASMSTART 4808; GFX90A-NEXT: ; def s[4:9] 4809; GFX90A-NEXT: ;;#ASMEND 4810; GFX90A-NEXT: s_mov_b32 s8, s6 4811; GFX90A-NEXT: s_mov_b32 s9, s7 4812; GFX90A-NEXT: ;;#ASMSTART 4813; GFX90A-NEXT: ; use s[8:13] 4814; GFX90A-NEXT: ;;#ASMEND 4815; GFX90A-NEXT: s_setpc_b64 s[30:31] 4816; 4817; GFX940-LABEL: s_shuffle_v3i64_v3i64__4_u_u: 4818; GFX940: ; %bb.0: 4819; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4820; GFX940-NEXT: ;;#ASMSTART 4821; GFX940-NEXT: ; def s[0:5] 4822; GFX940-NEXT: ;;#ASMEND 4823; GFX940-NEXT: s_mov_b32 s8, s2 4824; GFX940-NEXT: s_mov_b32 s9, s3 4825; GFX940-NEXT: ;;#ASMSTART 4826; GFX940-NEXT: ; use s[8:13] 4827; GFX940-NEXT: ;;#ASMEND 4828; GFX940-NEXT: s_setpc_b64 s[30:31] 4829 %vec0 = call <3 x i64> asm "; def $0", "=s"() 4830 %vec1 = call <3 x i64> asm "; def $0", "=s"() 4831 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 poison, i32 poison> 4832 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 4833 ret void 4834} 4835 4836define void @s_shuffle_v3i64_v3i64__5_u_u() { 4837; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_u: 4838; GFX900: ; %bb.0: 4839; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4840; GFX900-NEXT: ;;#ASMSTART 4841; GFX900-NEXT: ; def s[4:9] 4842; GFX900-NEXT: ;;#ASMEND 4843; GFX900-NEXT: ;;#ASMSTART 4844; GFX900-NEXT: ; use s[8:13] 4845; GFX900-NEXT: ;;#ASMEND 4846; GFX900-NEXT: s_setpc_b64 s[30:31] 4847; 4848; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_u: 4849; GFX90A: ; %bb.0: 4850; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4851; GFX90A-NEXT: ;;#ASMSTART 4852; GFX90A-NEXT: ; def s[4:9] 4853; GFX90A-NEXT: ;;#ASMEND 4854; GFX90A-NEXT: ;;#ASMSTART 4855; GFX90A-NEXT: ; use s[8:13] 4856; GFX90A-NEXT: ;;#ASMEND 4857; GFX90A-NEXT: s_setpc_b64 s[30:31] 4858; 4859; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_u: 4860; GFX940: ; %bb.0: 4861; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4862; GFX940-NEXT: ;;#ASMSTART 4863; GFX940-NEXT: ; def s[0:5] 4864; GFX940-NEXT: ;;#ASMEND 4865; GFX940-NEXT: s_mov_b32 s8, s4 4866; GFX940-NEXT: s_mov_b32 s9, s5 4867; GFX940-NEXT: ;;#ASMSTART 4868; GFX940-NEXT: ; use s[8:13] 4869; GFX940-NEXT: ;;#ASMEND 4870; GFX940-NEXT: s_setpc_b64 s[30:31] 4871 %vec0 = call <3 x i64> asm "; def $0", "=s"() 4872 %vec1 = call <3 x i64> asm "; def $0", "=s"() 4873 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison> 4874 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 4875 ret void 4876} 4877 4878define void @s_shuffle_v3i64_v3i64__5_0_u() { 4879; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_0_u: 4880; GFX900: ; %bb.0: 4881; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4882; GFX900-NEXT: ;;#ASMSTART 4883; GFX900-NEXT: ; def s[4:9] 4884; GFX900-NEXT: ;;#ASMEND 4885; GFX900-NEXT: ;;#ASMSTART 4886; GFX900-NEXT: ; def s[8:13] 4887; GFX900-NEXT: ;;#ASMEND 4888; GFX900-NEXT: s_mov_b32 s8, s12 4889; GFX900-NEXT: s_mov_b32 s9, s13 4890; GFX900-NEXT: s_mov_b32 s10, s4 4891; GFX900-NEXT: s_mov_b32 s11, s5 4892; GFX900-NEXT: ;;#ASMSTART 4893; GFX900-NEXT: ; use s[8:13] 4894; GFX900-NEXT: ;;#ASMEND 4895; GFX900-NEXT: s_setpc_b64 s[30:31] 4896; 4897; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_0_u: 4898; GFX90A: ; %bb.0: 4899; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4900; GFX90A-NEXT: ;;#ASMSTART 4901; GFX90A-NEXT: ; def s[4:9] 4902; GFX90A-NEXT: ;;#ASMEND 4903; GFX90A-NEXT: ;;#ASMSTART 4904; GFX90A-NEXT: ; def s[8:13] 4905; GFX90A-NEXT: ;;#ASMEND 4906; GFX90A-NEXT: s_mov_b32 s8, s12 4907; GFX90A-NEXT: s_mov_b32 s9, s13 4908; GFX90A-NEXT: s_mov_b32 s10, s4 4909; GFX90A-NEXT: s_mov_b32 s11, s5 4910; GFX90A-NEXT: ;;#ASMSTART 4911; GFX90A-NEXT: ; use s[8:13] 4912; GFX90A-NEXT: ;;#ASMEND 4913; GFX90A-NEXT: s_setpc_b64 s[30:31] 4914; 4915; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_u: 4916; GFX940: ; %bb.0: 4917; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4918; GFX940-NEXT: ;;#ASMSTART 4919; GFX940-NEXT: ; def s[0:5] 4920; GFX940-NEXT: ;;#ASMEND 4921; GFX940-NEXT: s_mov_b32 s10, s0 4922; GFX940-NEXT: ;;#ASMSTART 4923; GFX940-NEXT: ; def s[4:9] 4924; GFX940-NEXT: ;;#ASMEND 4925; GFX940-NEXT: s_mov_b32 s11, s1 4926; GFX940-NEXT: ;;#ASMSTART 4927; GFX940-NEXT: ; use s[8:13] 4928; GFX940-NEXT: ;;#ASMEND 4929; GFX940-NEXT: s_setpc_b64 s[30:31] 4930 %vec0 = call <3 x i64> asm "; def $0", "=s"() 4931 %vec1 = call <3 x i64> asm "; def $0", "=s"() 4932 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 poison> 4933 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 4934 ret void 4935} 4936 4937define void @s_shuffle_v3i64_v3i64__5_1_u() { 4938; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_u: 4939; GFX900: ; %bb.0: 4940; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4941; GFX900-NEXT: ;;#ASMSTART 4942; GFX900-NEXT: ; def s[8:13] 4943; GFX900-NEXT: ;;#ASMEND 4944; GFX900-NEXT: ;;#ASMSTART 4945; GFX900-NEXT: ; def s[4:9] 4946; GFX900-NEXT: ;;#ASMEND 4947; GFX900-NEXT: ;;#ASMSTART 4948; GFX900-NEXT: ; use s[8:13] 4949; GFX900-NEXT: ;;#ASMEND 4950; GFX900-NEXT: s_setpc_b64 s[30:31] 4951; 4952; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_u: 4953; GFX90A: ; %bb.0: 4954; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4955; GFX90A-NEXT: ;;#ASMSTART 4956; GFX90A-NEXT: ; def s[8:13] 4957; GFX90A-NEXT: ;;#ASMEND 4958; GFX90A-NEXT: ;;#ASMSTART 4959; GFX90A-NEXT: ; def s[4:9] 4960; GFX90A-NEXT: ;;#ASMEND 4961; GFX90A-NEXT: ;;#ASMSTART 4962; GFX90A-NEXT: ; use s[8:13] 4963; GFX90A-NEXT: ;;#ASMEND 4964; GFX90A-NEXT: s_setpc_b64 s[30:31] 4965; 4966; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_u: 4967; GFX940: ; %bb.0: 4968; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4969; GFX940-NEXT: ;;#ASMSTART 4970; GFX940-NEXT: ; def s[8:13] 4971; GFX940-NEXT: ;;#ASMEND 4972; GFX940-NEXT: ;;#ASMSTART 4973; GFX940-NEXT: ; def s[0:5] 4974; GFX940-NEXT: ;;#ASMEND 4975; GFX940-NEXT: s_mov_b32 s8, s4 4976; GFX940-NEXT: s_mov_b32 s9, s5 4977; GFX940-NEXT: ;;#ASMSTART 4978; GFX940-NEXT: ; use s[8:13] 4979; GFX940-NEXT: ;;#ASMEND 4980; GFX940-NEXT: s_setpc_b64 s[30:31] 4981 %vec0 = call <3 x i64> asm "; def $0", "=s"() 4982 %vec1 = call <3 x i64> asm "; def $0", "=s"() 4983 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 poison> 4984 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 4985 ret void 4986} 4987 4988define void @s_shuffle_v3i64_v3i64__5_2_u() { 4989; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_2_u: 4990; GFX900: ; %bb.0: 4991; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4992; GFX900-NEXT: ;;#ASMSTART 4993; GFX900-NEXT: ; def s[8:13] 4994; GFX900-NEXT: ;;#ASMEND 4995; GFX900-NEXT: ;;#ASMSTART 4996; GFX900-NEXT: ; def s[4:9] 4997; GFX900-NEXT: ;;#ASMEND 4998; GFX900-NEXT: s_mov_b32 s10, s12 4999; GFX900-NEXT: s_mov_b32 s11, s13 5000; GFX900-NEXT: ;;#ASMSTART 5001; GFX900-NEXT: ; use s[8:13] 5002; GFX900-NEXT: ;;#ASMEND 5003; GFX900-NEXT: s_setpc_b64 s[30:31] 5004; 5005; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_2_u: 5006; GFX90A: ; %bb.0: 5007; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5008; GFX90A-NEXT: ;;#ASMSTART 5009; GFX90A-NEXT: ; def s[8:13] 5010; GFX90A-NEXT: ;;#ASMEND 5011; GFX90A-NEXT: ;;#ASMSTART 5012; GFX90A-NEXT: ; def s[4:9] 5013; GFX90A-NEXT: ;;#ASMEND 5014; GFX90A-NEXT: s_mov_b32 s10, s12 5015; GFX90A-NEXT: s_mov_b32 s11, s13 5016; GFX90A-NEXT: ;;#ASMSTART 5017; GFX90A-NEXT: ; use s[8:13] 5018; GFX90A-NEXT: ;;#ASMEND 5019; GFX90A-NEXT: s_setpc_b64 s[30:31] 5020; 5021; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_u: 5022; GFX940: ; %bb.0: 5023; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5024; GFX940-NEXT: ;;#ASMSTART 5025; GFX940-NEXT: ; def s[8:13] 5026; GFX940-NEXT: ;;#ASMEND 5027; GFX940-NEXT: ;;#ASMSTART 5028; GFX940-NEXT: ; def s[0:5] 5029; GFX940-NEXT: ;;#ASMEND 5030; GFX940-NEXT: s_mov_b32 s8, s12 5031; GFX940-NEXT: s_mov_b32 s9, s13 5032; GFX940-NEXT: s_mov_b32 s10, s4 5033; GFX940-NEXT: s_mov_b32 s11, s5 5034; GFX940-NEXT: ;;#ASMSTART 5035; GFX940-NEXT: ; use s[8:13] 5036; GFX940-NEXT: ;;#ASMEND 5037; GFX940-NEXT: s_setpc_b64 s[30:31] 5038 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5039 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5040 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 poison> 5041 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5042 ret void 5043} 5044 5045define void @s_shuffle_v3i64_v3i64__5_3_u() { 5046; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_u: 5047; GFX900: ; %bb.0: 5048; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5049; GFX900-NEXT: ;;#ASMSTART 5050; GFX900-NEXT: ; def s[4:9] 5051; GFX900-NEXT: ;;#ASMEND 5052; GFX900-NEXT: s_mov_b32 s10, s4 5053; GFX900-NEXT: s_mov_b32 s11, s5 5054; GFX900-NEXT: ;;#ASMSTART 5055; GFX900-NEXT: ; use s[8:13] 5056; GFX900-NEXT: ;;#ASMEND 5057; GFX900-NEXT: s_setpc_b64 s[30:31] 5058; 5059; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_u: 5060; GFX90A: ; %bb.0: 5061; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5062; GFX90A-NEXT: ;;#ASMSTART 5063; GFX90A-NEXT: ; def s[4:9] 5064; GFX90A-NEXT: ;;#ASMEND 5065; GFX90A-NEXT: s_mov_b32 s10, s4 5066; GFX90A-NEXT: s_mov_b32 s11, s5 5067; GFX90A-NEXT: ;;#ASMSTART 5068; GFX90A-NEXT: ; use s[8:13] 5069; GFX90A-NEXT: ;;#ASMEND 5070; GFX90A-NEXT: s_setpc_b64 s[30:31] 5071; 5072; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_u: 5073; GFX940: ; %bb.0: 5074; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5075; GFX940-NEXT: ;;#ASMSTART 5076; GFX940-NEXT: ; def s[0:5] 5077; GFX940-NEXT: ;;#ASMEND 5078; GFX940-NEXT: s_mov_b32 s8, s4 5079; GFX940-NEXT: s_mov_b32 s9, s5 5080; GFX940-NEXT: s_mov_b32 s10, s0 5081; GFX940-NEXT: s_mov_b32 s11, s1 5082; GFX940-NEXT: ;;#ASMSTART 5083; GFX940-NEXT: ; use s[8:13] 5084; GFX940-NEXT: ;;#ASMEND 5085; GFX940-NEXT: s_setpc_b64 s[30:31] 5086 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5087 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5088 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 poison> 5089 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5090 ret void 5091} 5092 5093define void @s_shuffle_v3i64_v3i64__5_4_u() { 5094; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_u: 5095; GFX9: ; %bb.0: 5096; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5097; GFX9-NEXT: ;;#ASMSTART 5098; GFX9-NEXT: ; def s[8:13] 5099; GFX9-NEXT: ;;#ASMEND 5100; GFX9-NEXT: s_mov_b32 s8, s12 5101; GFX9-NEXT: s_mov_b32 s9, s13 5102; GFX9-NEXT: ;;#ASMSTART 5103; GFX9-NEXT: ; use s[8:13] 5104; GFX9-NEXT: ;;#ASMEND 5105; GFX9-NEXT: s_setpc_b64 s[30:31] 5106 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5107 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5108 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 poison> 5109 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5110 ret void 5111} 5112 5113define void @s_shuffle_v3i64_v3i64__5_5_u() { 5114; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_u: 5115; GFX900: ; %bb.0: 5116; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5117; GFX900-NEXT: ;;#ASMSTART 5118; GFX900-NEXT: ; def s[8:13] 5119; GFX900-NEXT: ;;#ASMEND 5120; GFX900-NEXT: s_mov_b32 s8, s12 5121; GFX900-NEXT: s_mov_b32 s9, s13 5122; GFX900-NEXT: s_mov_b32 s10, s12 5123; GFX900-NEXT: s_mov_b32 s11, s13 5124; GFX900-NEXT: ;;#ASMSTART 5125; GFX900-NEXT: ; use s[8:13] 5126; GFX900-NEXT: ;;#ASMEND 5127; GFX900-NEXT: s_setpc_b64 s[30:31] 5128; 5129; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_u: 5130; GFX90A: ; %bb.0: 5131; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5132; GFX90A-NEXT: ;;#ASMSTART 5133; GFX90A-NEXT: ; def s[8:13] 5134; GFX90A-NEXT: ;;#ASMEND 5135; GFX90A-NEXT: s_mov_b32 s8, s12 5136; GFX90A-NEXT: s_mov_b32 s9, s13 5137; GFX90A-NEXT: s_mov_b32 s10, s12 5138; GFX90A-NEXT: s_mov_b32 s11, s13 5139; GFX90A-NEXT: ;;#ASMSTART 5140; GFX90A-NEXT: ; use s[8:13] 5141; GFX90A-NEXT: ;;#ASMEND 5142; GFX90A-NEXT: s_setpc_b64 s[30:31] 5143; 5144; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_u: 5145; GFX940: ; %bb.0: 5146; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5147; GFX940-NEXT: ;;#ASMSTART 5148; GFX940-NEXT: ; def s[0:5] 5149; GFX940-NEXT: ;;#ASMEND 5150; GFX940-NEXT: s_mov_b32 s8, s4 5151; GFX940-NEXT: s_mov_b32 s9, s5 5152; GFX940-NEXT: s_mov_b32 s10, s4 5153; GFX940-NEXT: s_mov_b32 s11, s5 5154; GFX940-NEXT: ;;#ASMSTART 5155; GFX940-NEXT: ; use s[8:13] 5156; GFX940-NEXT: ;;#ASMEND 5157; GFX940-NEXT: s_setpc_b64 s[30:31] 5158 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5159 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5160 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 poison> 5161 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5162 ret void 5163} 5164 5165define void @s_shuffle_v3i64_v3i64__5_5_0() { 5166; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_0: 5167; GFX900: ; %bb.0: 5168; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5169; GFX900-NEXT: ;;#ASMSTART 5170; GFX900-NEXT: ; def s[4:9] 5171; GFX900-NEXT: ;;#ASMEND 5172; GFX900-NEXT: ;;#ASMSTART 5173; GFX900-NEXT: ; def s[8:13] 5174; GFX900-NEXT: ;;#ASMEND 5175; GFX900-NEXT: s_mov_b32 s8, s12 5176; GFX900-NEXT: s_mov_b32 s9, s13 5177; GFX900-NEXT: s_mov_b32 s10, s12 5178; GFX900-NEXT: s_mov_b32 s11, s13 5179; GFX900-NEXT: s_mov_b32 s12, s4 5180; GFX900-NEXT: s_mov_b32 s13, s5 5181; GFX900-NEXT: ;;#ASMSTART 5182; GFX900-NEXT: ; use s[8:13] 5183; GFX900-NEXT: ;;#ASMEND 5184; GFX900-NEXT: s_setpc_b64 s[30:31] 5185; 5186; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_0: 5187; GFX90A: ; %bb.0: 5188; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5189; GFX90A-NEXT: ;;#ASMSTART 5190; GFX90A-NEXT: ; def s[4:9] 5191; GFX90A-NEXT: ;;#ASMEND 5192; GFX90A-NEXT: ;;#ASMSTART 5193; GFX90A-NEXT: ; def s[8:13] 5194; GFX90A-NEXT: ;;#ASMEND 5195; GFX90A-NEXT: s_mov_b32 s8, s12 5196; GFX90A-NEXT: s_mov_b32 s9, s13 5197; GFX90A-NEXT: s_mov_b32 s10, s12 5198; GFX90A-NEXT: s_mov_b32 s11, s13 5199; GFX90A-NEXT: s_mov_b32 s12, s4 5200; GFX90A-NEXT: s_mov_b32 s13, s5 5201; GFX90A-NEXT: ;;#ASMSTART 5202; GFX90A-NEXT: ; use s[8:13] 5203; GFX90A-NEXT: ;;#ASMEND 5204; GFX90A-NEXT: s_setpc_b64 s[30:31] 5205; 5206; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_0: 5207; GFX940: ; %bb.0: 5208; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5209; GFX940-NEXT: ;;#ASMSTART 5210; GFX940-NEXT: ; def s[8:13] 5211; GFX940-NEXT: ;;#ASMEND 5212; GFX940-NEXT: ;;#ASMSTART 5213; GFX940-NEXT: ; def s[0:5] 5214; GFX940-NEXT: ;;#ASMEND 5215; GFX940-NEXT: s_mov_b32 s8, s12 5216; GFX940-NEXT: s_mov_b32 s9, s13 5217; GFX940-NEXT: s_mov_b32 s10, s12 5218; GFX940-NEXT: s_mov_b32 s11, s13 5219; GFX940-NEXT: s_mov_b32 s12, s0 5220; GFX940-NEXT: s_mov_b32 s13, s1 5221; GFX940-NEXT: ;;#ASMSTART 5222; GFX940-NEXT: ; use s[8:13] 5223; GFX940-NEXT: ;;#ASMEND 5224; GFX940-NEXT: s_setpc_b64 s[30:31] 5225 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5226 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5227 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 0> 5228 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5229 ret void 5230} 5231 5232define void @s_shuffle_v3i64_v3i64__5_5_1() { 5233; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_1: 5234; GFX900: ; %bb.0: 5235; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5236; GFX900-NEXT: ;;#ASMSTART 5237; GFX900-NEXT: ; def s[4:9] 5238; GFX900-NEXT: ;;#ASMEND 5239; GFX900-NEXT: ;;#ASMSTART 5240; GFX900-NEXT: ; def s[8:13] 5241; GFX900-NEXT: ;;#ASMEND 5242; GFX900-NEXT: s_mov_b32 s8, s12 5243; GFX900-NEXT: s_mov_b32 s9, s13 5244; GFX900-NEXT: s_mov_b32 s10, s12 5245; GFX900-NEXT: s_mov_b32 s11, s13 5246; GFX900-NEXT: s_mov_b32 s12, s6 5247; GFX900-NEXT: s_mov_b32 s13, s7 5248; GFX900-NEXT: ;;#ASMSTART 5249; GFX900-NEXT: ; use s[8:13] 5250; GFX900-NEXT: ;;#ASMEND 5251; GFX900-NEXT: s_setpc_b64 s[30:31] 5252; 5253; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_1: 5254; GFX90A: ; %bb.0: 5255; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5256; GFX90A-NEXT: ;;#ASMSTART 5257; GFX90A-NEXT: ; def s[4:9] 5258; GFX90A-NEXT: ;;#ASMEND 5259; GFX90A-NEXT: ;;#ASMSTART 5260; GFX90A-NEXT: ; def s[8:13] 5261; GFX90A-NEXT: ;;#ASMEND 5262; GFX90A-NEXT: s_mov_b32 s8, s12 5263; GFX90A-NEXT: s_mov_b32 s9, s13 5264; GFX90A-NEXT: s_mov_b32 s10, s12 5265; GFX90A-NEXT: s_mov_b32 s11, s13 5266; GFX90A-NEXT: s_mov_b32 s12, s6 5267; GFX90A-NEXT: s_mov_b32 s13, s7 5268; GFX90A-NEXT: ;;#ASMSTART 5269; GFX90A-NEXT: ; use s[8:13] 5270; GFX90A-NEXT: ;;#ASMEND 5271; GFX90A-NEXT: s_setpc_b64 s[30:31] 5272; 5273; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_1: 5274; GFX940: ; %bb.0: 5275; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5276; GFX940-NEXT: ;;#ASMSTART 5277; GFX940-NEXT: ; def s[8:13] 5278; GFX940-NEXT: ;;#ASMEND 5279; GFX940-NEXT: ;;#ASMSTART 5280; GFX940-NEXT: ; def s[0:5] 5281; GFX940-NEXT: ;;#ASMEND 5282; GFX940-NEXT: s_mov_b32 s8, s12 5283; GFX940-NEXT: s_mov_b32 s9, s13 5284; GFX940-NEXT: s_mov_b32 s10, s12 5285; GFX940-NEXT: s_mov_b32 s11, s13 5286; GFX940-NEXT: s_mov_b32 s12, s2 5287; GFX940-NEXT: s_mov_b32 s13, s3 5288; GFX940-NEXT: ;;#ASMSTART 5289; GFX940-NEXT: ; use s[8:13] 5290; GFX940-NEXT: ;;#ASMEND 5291; GFX940-NEXT: s_setpc_b64 s[30:31] 5292 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5293 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5294 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 1> 5295 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5296 ret void 5297} 5298 5299define void @s_shuffle_v3i64_v3i64__5_5_2() { 5300; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_2: 5301; GFX900: ; %bb.0: 5302; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5303; GFX900-NEXT: ;;#ASMSTART 5304; GFX900-NEXT: ; def s[8:13] 5305; GFX900-NEXT: ;;#ASMEND 5306; GFX900-NEXT: ;;#ASMSTART 5307; GFX900-NEXT: ; def s[16:21] 5308; GFX900-NEXT: ;;#ASMEND 5309; GFX900-NEXT: s_mov_b32 s8, s20 5310; GFX900-NEXT: s_mov_b32 s9, s21 5311; GFX900-NEXT: s_mov_b32 s10, s20 5312; GFX900-NEXT: s_mov_b32 s11, s21 5313; GFX900-NEXT: ;;#ASMSTART 5314; GFX900-NEXT: ; use s[8:13] 5315; GFX900-NEXT: ;;#ASMEND 5316; GFX900-NEXT: s_setpc_b64 s[30:31] 5317; 5318; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_2: 5319; GFX90A: ; %bb.0: 5320; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5321; GFX90A-NEXT: ;;#ASMSTART 5322; GFX90A-NEXT: ; def s[8:13] 5323; GFX90A-NEXT: ;;#ASMEND 5324; GFX90A-NEXT: ;;#ASMSTART 5325; GFX90A-NEXT: ; def s[16:21] 5326; GFX90A-NEXT: ;;#ASMEND 5327; GFX90A-NEXT: s_mov_b32 s8, s20 5328; GFX90A-NEXT: s_mov_b32 s9, s21 5329; GFX90A-NEXT: s_mov_b32 s10, s20 5330; GFX90A-NEXT: s_mov_b32 s11, s21 5331; GFX90A-NEXT: ;;#ASMSTART 5332; GFX90A-NEXT: ; use s[8:13] 5333; GFX90A-NEXT: ;;#ASMEND 5334; GFX90A-NEXT: s_setpc_b64 s[30:31] 5335; 5336; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_2: 5337; GFX940: ; %bb.0: 5338; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5339; GFX940-NEXT: ;;#ASMSTART 5340; GFX940-NEXT: ; def s[8:13] 5341; GFX940-NEXT: ;;#ASMEND 5342; GFX940-NEXT: ;;#ASMSTART 5343; GFX940-NEXT: ; def s[0:5] 5344; GFX940-NEXT: ;;#ASMEND 5345; GFX940-NEXT: s_mov_b32 s8, s4 5346; GFX940-NEXT: s_mov_b32 s9, s5 5347; GFX940-NEXT: s_mov_b32 s10, s4 5348; GFX940-NEXT: s_mov_b32 s11, s5 5349; GFX940-NEXT: ;;#ASMSTART 5350; GFX940-NEXT: ; use s[8:13] 5351; GFX940-NEXT: ;;#ASMEND 5352; GFX940-NEXT: s_setpc_b64 s[30:31] 5353 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5354 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5355 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 2> 5356 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5357 ret void 5358} 5359 5360define void @s_shuffle_v3i64_v3i64__5_5_3() { 5361; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_3: 5362; GFX900: ; %bb.0: 5363; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5364; GFX900-NEXT: ;;#ASMSTART 5365; GFX900-NEXT: ; def s[12:17] 5366; GFX900-NEXT: ;;#ASMEND 5367; GFX900-NEXT: s_mov_b32 s8, s16 5368; GFX900-NEXT: s_mov_b32 s9, s17 5369; GFX900-NEXT: s_mov_b32 s10, s16 5370; GFX900-NEXT: s_mov_b32 s11, s17 5371; GFX900-NEXT: ;;#ASMSTART 5372; GFX900-NEXT: ; use s[8:13] 5373; GFX900-NEXT: ;;#ASMEND 5374; GFX900-NEXT: s_setpc_b64 s[30:31] 5375; 5376; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_3: 5377; GFX90A: ; %bb.0: 5378; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5379; GFX90A-NEXT: ;;#ASMSTART 5380; GFX90A-NEXT: ; def s[12:17] 5381; GFX90A-NEXT: ;;#ASMEND 5382; GFX90A-NEXT: s_mov_b32 s8, s16 5383; GFX90A-NEXT: s_mov_b32 s9, s17 5384; GFX90A-NEXT: s_mov_b32 s10, s16 5385; GFX90A-NEXT: s_mov_b32 s11, s17 5386; GFX90A-NEXT: ;;#ASMSTART 5387; GFX90A-NEXT: ; use s[8:13] 5388; GFX90A-NEXT: ;;#ASMEND 5389; GFX90A-NEXT: s_setpc_b64 s[30:31] 5390; 5391; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_3: 5392; GFX940: ; %bb.0: 5393; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5394; GFX940-NEXT: ;;#ASMSTART 5395; GFX940-NEXT: ; def s[0:5] 5396; GFX940-NEXT: ;;#ASMEND 5397; GFX940-NEXT: s_mov_b32 s8, s4 5398; GFX940-NEXT: s_mov_b32 s9, s5 5399; GFX940-NEXT: s_mov_b32 s10, s4 5400; GFX940-NEXT: s_mov_b32 s11, s5 5401; GFX940-NEXT: s_mov_b32 s12, s0 5402; GFX940-NEXT: s_mov_b32 s13, s1 5403; GFX940-NEXT: ;;#ASMSTART 5404; GFX940-NEXT: ; use s[8:13] 5405; GFX940-NEXT: ;;#ASMEND 5406; GFX940-NEXT: s_setpc_b64 s[30:31] 5407 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5408 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5409 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 3> 5410 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5411 ret void 5412} 5413 5414define void @s_shuffle_v3i64_v3i64__5_5_4() { 5415; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_4: 5416; GFX900: ; %bb.0: 5417; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5418; GFX900-NEXT: ;;#ASMSTART 5419; GFX900-NEXT: ; def s[12:17] 5420; GFX900-NEXT: ;;#ASMEND 5421; GFX900-NEXT: s_mov_b32 s8, s16 5422; GFX900-NEXT: s_mov_b32 s9, s17 5423; GFX900-NEXT: s_mov_b32 s10, s16 5424; GFX900-NEXT: s_mov_b32 s11, s17 5425; GFX900-NEXT: s_mov_b32 s12, s14 5426; GFX900-NEXT: s_mov_b32 s13, s15 5427; GFX900-NEXT: ;;#ASMSTART 5428; GFX900-NEXT: ; use s[8:13] 5429; GFX900-NEXT: ;;#ASMEND 5430; GFX900-NEXT: s_setpc_b64 s[30:31] 5431; 5432; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_4: 5433; GFX90A: ; %bb.0: 5434; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5435; GFX90A-NEXT: ;;#ASMSTART 5436; GFX90A-NEXT: ; def s[12:17] 5437; GFX90A-NEXT: ;;#ASMEND 5438; GFX90A-NEXT: s_mov_b32 s8, s16 5439; GFX90A-NEXT: s_mov_b32 s9, s17 5440; GFX90A-NEXT: s_mov_b32 s10, s16 5441; GFX90A-NEXT: s_mov_b32 s11, s17 5442; GFX90A-NEXT: s_mov_b32 s12, s14 5443; GFX90A-NEXT: s_mov_b32 s13, s15 5444; GFX90A-NEXT: ;;#ASMSTART 5445; GFX90A-NEXT: ; use s[8:13] 5446; GFX90A-NEXT: ;;#ASMEND 5447; GFX90A-NEXT: s_setpc_b64 s[30:31] 5448; 5449; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_5_4: 5450; GFX940: ; %bb.0: 5451; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5452; GFX940-NEXT: ;;#ASMSTART 5453; GFX940-NEXT: ; def s[0:5] 5454; GFX940-NEXT: ;;#ASMEND 5455; GFX940-NEXT: s_mov_b32 s8, s4 5456; GFX940-NEXT: s_mov_b32 s9, s5 5457; GFX940-NEXT: s_mov_b32 s10, s4 5458; GFX940-NEXT: s_mov_b32 s11, s5 5459; GFX940-NEXT: s_mov_b32 s12, s2 5460; GFX940-NEXT: s_mov_b32 s13, s3 5461; GFX940-NEXT: ;;#ASMSTART 5462; GFX940-NEXT: ; use s[8:13] 5463; GFX940-NEXT: ;;#ASMEND 5464; GFX940-NEXT: s_setpc_b64 s[30:31] 5465 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5466 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5467 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 4> 5468 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5469 ret void 5470} 5471 5472define void @s_shuffle_v3i64_v3i64__5_5_5() { 5473; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_5_5: 5474; GFX9: ; %bb.0: 5475; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5476; GFX9-NEXT: ;;#ASMSTART 5477; GFX9-NEXT: ; def s[8:13] 5478; GFX9-NEXT: ;;#ASMEND 5479; GFX9-NEXT: s_mov_b32 s8, s12 5480; GFX9-NEXT: s_mov_b32 s9, s13 5481; GFX9-NEXT: s_mov_b32 s10, s12 5482; GFX9-NEXT: s_mov_b32 s11, s13 5483; GFX9-NEXT: ;;#ASMSTART 5484; GFX9-NEXT: ; use s[8:13] 5485; GFX9-NEXT: ;;#ASMEND 5486; GFX9-NEXT: s_setpc_b64 s[30:31] 5487 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5488 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5489 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 5> 5490 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5491 ret void 5492} 5493 5494define void @s_shuffle_v3i64_v3i64__u_0_0() { 5495; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_0_0: 5496; GFX900: ; %bb.0: 5497; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5498; GFX900-NEXT: ;;#ASMSTART 5499; GFX900-NEXT: ; def s[4:9] 5500; GFX900-NEXT: ;;#ASMEND 5501; GFX900-NEXT: s_mov_b32 s10, s4 5502; GFX900-NEXT: s_mov_b32 s11, s5 5503; GFX900-NEXT: s_mov_b32 s12, s4 5504; GFX900-NEXT: s_mov_b32 s13, s5 5505; GFX900-NEXT: ;;#ASMSTART 5506; GFX900-NEXT: ; use s[8:13] 5507; GFX900-NEXT: ;;#ASMEND 5508; GFX900-NEXT: s_setpc_b64 s[30:31] 5509; 5510; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_0_0: 5511; GFX90A: ; %bb.0: 5512; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5513; GFX90A-NEXT: ;;#ASMSTART 5514; GFX90A-NEXT: ; def s[4:9] 5515; GFX90A-NEXT: ;;#ASMEND 5516; GFX90A-NEXT: s_mov_b32 s10, s4 5517; GFX90A-NEXT: s_mov_b32 s11, s5 5518; GFX90A-NEXT: s_mov_b32 s12, s4 5519; GFX90A-NEXT: s_mov_b32 s13, s5 5520; GFX90A-NEXT: ;;#ASMSTART 5521; GFX90A-NEXT: ; use s[8:13] 5522; GFX90A-NEXT: ;;#ASMEND 5523; GFX90A-NEXT: s_setpc_b64 s[30:31] 5524; 5525; GFX940-LABEL: s_shuffle_v3i64_v3i64__u_0_0: 5526; GFX940: ; %bb.0: 5527; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5528; GFX940-NEXT: ;;#ASMSTART 5529; GFX940-NEXT: ; def s[0:5] 5530; GFX940-NEXT: ;;#ASMEND 5531; GFX940-NEXT: s_mov_b32 s10, s0 5532; GFX940-NEXT: s_mov_b32 s11, s1 5533; GFX940-NEXT: s_mov_b32 s12, s0 5534; GFX940-NEXT: s_mov_b32 s13, s1 5535; GFX940-NEXT: ;;#ASMSTART 5536; GFX940-NEXT: ; use s[8:13] 5537; GFX940-NEXT: ;;#ASMEND 5538; GFX940-NEXT: s_setpc_b64 s[30:31] 5539 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5540 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 0, i32 0> 5541 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5542 ret void 5543} 5544 5545define void @s_shuffle_v3i64_v3i64__0_0_0() { 5546; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_0_0: 5547; GFX9: ; %bb.0: 5548; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5549; GFX9-NEXT: ;;#ASMSTART 5550; GFX9-NEXT: ; def s[8:13] 5551; GFX9-NEXT: ;;#ASMEND 5552; GFX9-NEXT: s_mov_b32 s10, s8 5553; GFX9-NEXT: s_mov_b32 s11, s9 5554; GFX9-NEXT: s_mov_b32 s12, s8 5555; GFX9-NEXT: s_mov_b32 s13, s9 5556; GFX9-NEXT: ;;#ASMSTART 5557; GFX9-NEXT: ; use s[8:13] 5558; GFX9-NEXT: ;;#ASMEND 5559; GFX9-NEXT: s_setpc_b64 s[30:31] 5560 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5561 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> zeroinitializer 5562 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5563 ret void 5564} 5565 5566define void @s_shuffle_v3i64_v3i64__1_0_0() { 5567; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_0_0: 5568; GFX900: ; %bb.0: 5569; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5570; GFX900-NEXT: ;;#ASMSTART 5571; GFX900-NEXT: ; def s[4:9] 5572; GFX900-NEXT: ;;#ASMEND 5573; GFX900-NEXT: s_mov_b32 s8, s6 5574; GFX900-NEXT: s_mov_b32 s9, s7 5575; GFX900-NEXT: s_mov_b32 s10, s4 5576; GFX900-NEXT: s_mov_b32 s11, s5 5577; GFX900-NEXT: s_mov_b32 s12, s4 5578; GFX900-NEXT: s_mov_b32 s13, s5 5579; GFX900-NEXT: ;;#ASMSTART 5580; GFX900-NEXT: ; use s[8:13] 5581; GFX900-NEXT: ;;#ASMEND 5582; GFX900-NEXT: s_setpc_b64 s[30:31] 5583; 5584; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_0_0: 5585; GFX90A: ; %bb.0: 5586; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5587; GFX90A-NEXT: ;;#ASMSTART 5588; GFX90A-NEXT: ; def s[4:9] 5589; GFX90A-NEXT: ;;#ASMEND 5590; GFX90A-NEXT: s_mov_b32 s8, s6 5591; GFX90A-NEXT: s_mov_b32 s9, s7 5592; GFX90A-NEXT: s_mov_b32 s10, s4 5593; GFX90A-NEXT: s_mov_b32 s11, s5 5594; GFX90A-NEXT: s_mov_b32 s12, s4 5595; GFX90A-NEXT: s_mov_b32 s13, s5 5596; GFX90A-NEXT: ;;#ASMSTART 5597; GFX90A-NEXT: ; use s[8:13] 5598; GFX90A-NEXT: ;;#ASMEND 5599; GFX90A-NEXT: s_setpc_b64 s[30:31] 5600; 5601; GFX940-LABEL: s_shuffle_v3i64_v3i64__1_0_0: 5602; GFX940: ; %bb.0: 5603; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5604; GFX940-NEXT: ;;#ASMSTART 5605; GFX940-NEXT: ; def s[0:5] 5606; GFX940-NEXT: ;;#ASMEND 5607; GFX940-NEXT: s_mov_b32 s8, s2 5608; GFX940-NEXT: s_mov_b32 s9, s3 5609; GFX940-NEXT: s_mov_b32 s10, s0 5610; GFX940-NEXT: s_mov_b32 s11, s1 5611; GFX940-NEXT: s_mov_b32 s12, s0 5612; GFX940-NEXT: s_mov_b32 s13, s1 5613; GFX940-NEXT: ;;#ASMSTART 5614; GFX940-NEXT: ; use s[8:13] 5615; GFX940-NEXT: ;;#ASMEND 5616; GFX940-NEXT: s_setpc_b64 s[30:31] 5617 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5618 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 0, i32 0> 5619 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5620 ret void 5621} 5622 5623define void @s_shuffle_v3i64_v3i64__2_0_0() { 5624; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_0_0: 5625; GFX900: ; %bb.0: 5626; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5627; GFX900-NEXT: ;;#ASMSTART 5628; GFX900-NEXT: ; def s[4:9] 5629; GFX900-NEXT: ;;#ASMEND 5630; GFX900-NEXT: s_mov_b32 s10, s4 5631; GFX900-NEXT: s_mov_b32 s11, s5 5632; GFX900-NEXT: s_mov_b32 s12, s4 5633; GFX900-NEXT: s_mov_b32 s13, s5 5634; GFX900-NEXT: ;;#ASMSTART 5635; GFX900-NEXT: ; use s[8:13] 5636; GFX900-NEXT: ;;#ASMEND 5637; GFX900-NEXT: s_setpc_b64 s[30:31] 5638; 5639; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_0_0: 5640; GFX90A: ; %bb.0: 5641; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5642; GFX90A-NEXT: ;;#ASMSTART 5643; GFX90A-NEXT: ; def s[4:9] 5644; GFX90A-NEXT: ;;#ASMEND 5645; GFX90A-NEXT: s_mov_b32 s10, s4 5646; GFX90A-NEXT: s_mov_b32 s11, s5 5647; GFX90A-NEXT: s_mov_b32 s12, s4 5648; GFX90A-NEXT: s_mov_b32 s13, s5 5649; GFX90A-NEXT: ;;#ASMSTART 5650; GFX90A-NEXT: ; use s[8:13] 5651; GFX90A-NEXT: ;;#ASMEND 5652; GFX90A-NEXT: s_setpc_b64 s[30:31] 5653; 5654; GFX940-LABEL: s_shuffle_v3i64_v3i64__2_0_0: 5655; GFX940: ; %bb.0: 5656; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5657; GFX940-NEXT: ;;#ASMSTART 5658; GFX940-NEXT: ; def s[0:5] 5659; GFX940-NEXT: ;;#ASMEND 5660; GFX940-NEXT: s_mov_b32 s8, s4 5661; GFX940-NEXT: s_mov_b32 s9, s5 5662; GFX940-NEXT: s_mov_b32 s10, s0 5663; GFX940-NEXT: s_mov_b32 s11, s1 5664; GFX940-NEXT: s_mov_b32 s12, s0 5665; GFX940-NEXT: s_mov_b32 s13, s1 5666; GFX940-NEXT: ;;#ASMSTART 5667; GFX940-NEXT: ; use s[8:13] 5668; GFX940-NEXT: ;;#ASMEND 5669; GFX940-NEXT: s_setpc_b64 s[30:31] 5670 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5671 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 0, i32 0> 5672 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5673 ret void 5674} 5675 5676define void @s_shuffle_v3i64_v3i64__3_0_0() { 5677; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_0_0: 5678; GFX900: ; %bb.0: 5679; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5680; GFX900-NEXT: ;;#ASMSTART 5681; GFX900-NEXT: ; def s[4:9] 5682; GFX900-NEXT: ;;#ASMEND 5683; GFX900-NEXT: s_mov_b32 s10, s4 5684; GFX900-NEXT: s_mov_b32 s11, s5 5685; GFX900-NEXT: s_mov_b32 s12, s4 5686; GFX900-NEXT: s_mov_b32 s13, s5 5687; GFX900-NEXT: ;;#ASMSTART 5688; GFX900-NEXT: ; use s[8:13] 5689; GFX900-NEXT: ;;#ASMEND 5690; GFX900-NEXT: s_setpc_b64 s[30:31] 5691; 5692; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_0_0: 5693; GFX90A: ; %bb.0: 5694; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5695; GFX90A-NEXT: ;;#ASMSTART 5696; GFX90A-NEXT: ; def s[4:9] 5697; GFX90A-NEXT: ;;#ASMEND 5698; GFX90A-NEXT: s_mov_b32 s10, s4 5699; GFX90A-NEXT: s_mov_b32 s11, s5 5700; GFX90A-NEXT: s_mov_b32 s12, s4 5701; GFX90A-NEXT: s_mov_b32 s13, s5 5702; GFX90A-NEXT: ;;#ASMSTART 5703; GFX90A-NEXT: ; use s[8:13] 5704; GFX90A-NEXT: ;;#ASMEND 5705; GFX90A-NEXT: s_setpc_b64 s[30:31] 5706; 5707; GFX940-LABEL: s_shuffle_v3i64_v3i64__3_0_0: 5708; GFX940: ; %bb.0: 5709; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5710; GFX940-NEXT: ;;#ASMSTART 5711; GFX940-NEXT: ; def s[0:5] 5712; GFX940-NEXT: ;;#ASMEND 5713; GFX940-NEXT: s_mov_b32 s10, s0 5714; GFX940-NEXT: s_mov_b32 s11, s1 5715; GFX940-NEXT: s_mov_b32 s12, s0 5716; GFX940-NEXT: s_mov_b32 s13, s1 5717; GFX940-NEXT: ;;#ASMSTART 5718; GFX940-NEXT: ; use s[8:13] 5719; GFX940-NEXT: ;;#ASMEND 5720; GFX940-NEXT: s_setpc_b64 s[30:31] 5721 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5722 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 0, i32 0> 5723 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5724 ret void 5725} 5726 5727define void @s_shuffle_v3i64_v3i64__4_0_0() { 5728; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_0_0: 5729; GFX900: ; %bb.0: 5730; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5731; GFX900-NEXT: ;;#ASMSTART 5732; GFX900-NEXT: ; def s[4:9] 5733; GFX900-NEXT: ;;#ASMEND 5734; GFX900-NEXT: ;;#ASMSTART 5735; GFX900-NEXT: ; def s[8:13] 5736; GFX900-NEXT: ;;#ASMEND 5737; GFX900-NEXT: s_mov_b32 s8, s10 5738; GFX900-NEXT: s_mov_b32 s9, s11 5739; GFX900-NEXT: s_mov_b32 s10, s4 5740; GFX900-NEXT: s_mov_b32 s11, s5 5741; GFX900-NEXT: s_mov_b32 s12, s4 5742; GFX900-NEXT: s_mov_b32 s13, s5 5743; GFX900-NEXT: ;;#ASMSTART 5744; GFX900-NEXT: ; use s[8:13] 5745; GFX900-NEXT: ;;#ASMEND 5746; GFX900-NEXT: s_setpc_b64 s[30:31] 5747; 5748; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_0_0: 5749; GFX90A: ; %bb.0: 5750; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5751; GFX90A-NEXT: ;;#ASMSTART 5752; GFX90A-NEXT: ; def s[4:9] 5753; GFX90A-NEXT: ;;#ASMEND 5754; GFX90A-NEXT: ;;#ASMSTART 5755; GFX90A-NEXT: ; def s[8:13] 5756; GFX90A-NEXT: ;;#ASMEND 5757; GFX90A-NEXT: s_mov_b32 s8, s10 5758; GFX90A-NEXT: s_mov_b32 s9, s11 5759; GFX90A-NEXT: s_mov_b32 s10, s4 5760; GFX90A-NEXT: s_mov_b32 s11, s5 5761; GFX90A-NEXT: s_mov_b32 s12, s4 5762; GFX90A-NEXT: s_mov_b32 s13, s5 5763; GFX90A-NEXT: ;;#ASMSTART 5764; GFX90A-NEXT: ; use s[8:13] 5765; GFX90A-NEXT: ;;#ASMEND 5766; GFX90A-NEXT: s_setpc_b64 s[30:31] 5767; 5768; GFX940-LABEL: s_shuffle_v3i64_v3i64__4_0_0: 5769; GFX940: ; %bb.0: 5770; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5771; GFX940-NEXT: ;;#ASMSTART 5772; GFX940-NEXT: ; def s[0:5] 5773; GFX940-NEXT: ;;#ASMEND 5774; GFX940-NEXT: s_mov_b32 s10, s0 5775; GFX940-NEXT: ;;#ASMSTART 5776; GFX940-NEXT: ; def s[4:9] 5777; GFX940-NEXT: ;;#ASMEND 5778; GFX940-NEXT: s_mov_b32 s8, s6 5779; GFX940-NEXT: s_mov_b32 s9, s7 5780; GFX940-NEXT: s_mov_b32 s11, s1 5781; GFX940-NEXT: s_mov_b32 s12, s0 5782; GFX940-NEXT: s_mov_b32 s13, s1 5783; GFX940-NEXT: ;;#ASMSTART 5784; GFX940-NEXT: ; use s[8:13] 5785; GFX940-NEXT: ;;#ASMEND 5786; GFX940-NEXT: s_setpc_b64 s[30:31] 5787 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5788 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5789 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 0, i32 0> 5790 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5791 ret void 5792} 5793 5794define void @s_shuffle_v3i64_v3i64__5_0_0() { 5795; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_0_0: 5796; GFX900: ; %bb.0: 5797; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5798; GFX900-NEXT: ;;#ASMSTART 5799; GFX900-NEXT: ; def s[4:9] 5800; GFX900-NEXT: ;;#ASMEND 5801; GFX900-NEXT: ;;#ASMSTART 5802; GFX900-NEXT: ; def s[8:13] 5803; GFX900-NEXT: ;;#ASMEND 5804; GFX900-NEXT: s_mov_b32 s8, s12 5805; GFX900-NEXT: s_mov_b32 s9, s13 5806; GFX900-NEXT: s_mov_b32 s10, s4 5807; GFX900-NEXT: s_mov_b32 s11, s5 5808; GFX900-NEXT: s_mov_b32 s12, s4 5809; GFX900-NEXT: s_mov_b32 s13, s5 5810; GFX900-NEXT: ;;#ASMSTART 5811; GFX900-NEXT: ; use s[8:13] 5812; GFX900-NEXT: ;;#ASMEND 5813; GFX900-NEXT: s_setpc_b64 s[30:31] 5814; 5815; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_0_0: 5816; GFX90A: ; %bb.0: 5817; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5818; GFX90A-NEXT: ;;#ASMSTART 5819; GFX90A-NEXT: ; def s[4:9] 5820; GFX90A-NEXT: ;;#ASMEND 5821; GFX90A-NEXT: ;;#ASMSTART 5822; GFX90A-NEXT: ; def s[8:13] 5823; GFX90A-NEXT: ;;#ASMEND 5824; GFX90A-NEXT: s_mov_b32 s8, s12 5825; GFX90A-NEXT: s_mov_b32 s9, s13 5826; GFX90A-NEXT: s_mov_b32 s10, s4 5827; GFX90A-NEXT: s_mov_b32 s11, s5 5828; GFX90A-NEXT: s_mov_b32 s12, s4 5829; GFX90A-NEXT: s_mov_b32 s13, s5 5830; GFX90A-NEXT: ;;#ASMSTART 5831; GFX90A-NEXT: ; use s[8:13] 5832; GFX90A-NEXT: ;;#ASMEND 5833; GFX90A-NEXT: s_setpc_b64 s[30:31] 5834; 5835; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_0: 5836; GFX940: ; %bb.0: 5837; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5838; GFX940-NEXT: ;;#ASMSTART 5839; GFX940-NEXT: ; def s[0:5] 5840; GFX940-NEXT: ;;#ASMEND 5841; GFX940-NEXT: s_mov_b32 s10, s0 5842; GFX940-NEXT: ;;#ASMSTART 5843; GFX940-NEXT: ; def s[4:9] 5844; GFX940-NEXT: ;;#ASMEND 5845; GFX940-NEXT: s_mov_b32 s11, s1 5846; GFX940-NEXT: s_mov_b32 s12, s0 5847; GFX940-NEXT: s_mov_b32 s13, s1 5848; GFX940-NEXT: ;;#ASMSTART 5849; GFX940-NEXT: ; use s[8:13] 5850; GFX940-NEXT: ;;#ASMEND 5851; GFX940-NEXT: s_setpc_b64 s[30:31] 5852 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5853 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5854 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 0> 5855 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5856 ret void 5857} 5858 5859define void @s_shuffle_v3i64_v3i64__5_u_0() { 5860; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_0: 5861; GFX900: ; %bb.0: 5862; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5863; GFX900-NEXT: ;;#ASMSTART 5864; GFX900-NEXT: ; def s[4:9] 5865; GFX900-NEXT: ;;#ASMEND 5866; GFX900-NEXT: ;;#ASMSTART 5867; GFX900-NEXT: ; def s[8:13] 5868; GFX900-NEXT: ;;#ASMEND 5869; GFX900-NEXT: s_mov_b32 s8, s12 5870; GFX900-NEXT: s_mov_b32 s9, s13 5871; GFX900-NEXT: s_mov_b32 s12, s4 5872; GFX900-NEXT: s_mov_b32 s13, s5 5873; GFX900-NEXT: ;;#ASMSTART 5874; GFX900-NEXT: ; use s[8:13] 5875; GFX900-NEXT: ;;#ASMEND 5876; GFX900-NEXT: s_setpc_b64 s[30:31] 5877; 5878; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_0: 5879; GFX90A: ; %bb.0: 5880; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5881; GFX90A-NEXT: ;;#ASMSTART 5882; GFX90A-NEXT: ; def s[4:9] 5883; GFX90A-NEXT: ;;#ASMEND 5884; GFX90A-NEXT: ;;#ASMSTART 5885; GFX90A-NEXT: ; def s[8:13] 5886; GFX90A-NEXT: ;;#ASMEND 5887; GFX90A-NEXT: s_mov_b32 s8, s12 5888; GFX90A-NEXT: s_mov_b32 s9, s13 5889; GFX90A-NEXT: s_mov_b32 s12, s4 5890; GFX90A-NEXT: s_mov_b32 s13, s5 5891; GFX90A-NEXT: ;;#ASMSTART 5892; GFX90A-NEXT: ; use s[8:13] 5893; GFX90A-NEXT: ;;#ASMEND 5894; GFX90A-NEXT: s_setpc_b64 s[30:31] 5895; 5896; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_0: 5897; GFX940: ; %bb.0: 5898; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5899; GFX940-NEXT: ;;#ASMSTART 5900; GFX940-NEXT: ; def s[0:5] 5901; GFX940-NEXT: ;;#ASMEND 5902; GFX940-NEXT: s_mov_b32 s12, s0 5903; GFX940-NEXT: ;;#ASMSTART 5904; GFX940-NEXT: ; def s[4:9] 5905; GFX940-NEXT: ;;#ASMEND 5906; GFX940-NEXT: s_mov_b32 s13, s1 5907; GFX940-NEXT: ;;#ASMSTART 5908; GFX940-NEXT: ; use s[8:13] 5909; GFX940-NEXT: ;;#ASMEND 5910; GFX940-NEXT: s_setpc_b64 s[30:31] 5911 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5912 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5913 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 0> 5914 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5915 ret void 5916} 5917 5918define void @s_shuffle_v3i64_v3i64__5_1_0() { 5919; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_0: 5920; GFX900: ; %bb.0: 5921; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5922; GFX900-NEXT: ;;#ASMSTART 5923; GFX900-NEXT: ; def s[4:9] 5924; GFX900-NEXT: ;;#ASMEND 5925; GFX900-NEXT: ;;#ASMSTART 5926; GFX900-NEXT: ; def s[8:13] 5927; GFX900-NEXT: ;;#ASMEND 5928; GFX900-NEXT: s_mov_b32 s8, s12 5929; GFX900-NEXT: s_mov_b32 s9, s13 5930; GFX900-NEXT: s_mov_b32 s10, s6 5931; GFX900-NEXT: s_mov_b32 s11, s7 5932; GFX900-NEXT: s_mov_b32 s12, s4 5933; GFX900-NEXT: s_mov_b32 s13, s5 5934; GFX900-NEXT: ;;#ASMSTART 5935; GFX900-NEXT: ; use s[8:13] 5936; GFX900-NEXT: ;;#ASMEND 5937; GFX900-NEXT: s_setpc_b64 s[30:31] 5938; 5939; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_0: 5940; GFX90A: ; %bb.0: 5941; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5942; GFX90A-NEXT: ;;#ASMSTART 5943; GFX90A-NEXT: ; def s[4:9] 5944; GFX90A-NEXT: ;;#ASMEND 5945; GFX90A-NEXT: ;;#ASMSTART 5946; GFX90A-NEXT: ; def s[8:13] 5947; GFX90A-NEXT: ;;#ASMEND 5948; GFX90A-NEXT: s_mov_b32 s8, s12 5949; GFX90A-NEXT: s_mov_b32 s9, s13 5950; GFX90A-NEXT: s_mov_b32 s10, s6 5951; GFX90A-NEXT: s_mov_b32 s11, s7 5952; GFX90A-NEXT: s_mov_b32 s12, s4 5953; GFX90A-NEXT: s_mov_b32 s13, s5 5954; GFX90A-NEXT: ;;#ASMSTART 5955; GFX90A-NEXT: ; use s[8:13] 5956; GFX90A-NEXT: ;;#ASMEND 5957; GFX90A-NEXT: s_setpc_b64 s[30:31] 5958; 5959; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_0: 5960; GFX940: ; %bb.0: 5961; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5962; GFX940-NEXT: ;;#ASMSTART 5963; GFX940-NEXT: ; def s[0:5] 5964; GFX940-NEXT: ;;#ASMEND 5965; GFX940-NEXT: s_mov_b32 s10, s2 5966; GFX940-NEXT: ;;#ASMSTART 5967; GFX940-NEXT: ; def s[4:9] 5968; GFX940-NEXT: ;;#ASMEND 5969; GFX940-NEXT: s_mov_b32 s11, s3 5970; GFX940-NEXT: s_mov_b32 s12, s0 5971; GFX940-NEXT: s_mov_b32 s13, s1 5972; GFX940-NEXT: ;;#ASMSTART 5973; GFX940-NEXT: ; use s[8:13] 5974; GFX940-NEXT: ;;#ASMEND 5975; GFX940-NEXT: s_setpc_b64 s[30:31] 5976 %vec0 = call <3 x i64> asm "; def $0", "=s"() 5977 %vec1 = call <3 x i64> asm "; def $0", "=s"() 5978 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 0> 5979 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 5980 ret void 5981} 5982 5983define void @s_shuffle_v3i64_v3i64__5_2_0() { 5984; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_2_0: 5985; GFX900: ; %bb.0: 5986; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5987; GFX900-NEXT: ;;#ASMSTART 5988; GFX900-NEXT: ; def s[12:17] 5989; GFX900-NEXT: ;;#ASMEND 5990; GFX900-NEXT: ;;#ASMSTART 5991; GFX900-NEXT: ; def s[4:9] 5992; GFX900-NEXT: ;;#ASMEND 5993; GFX900-NEXT: s_mov_b32 s10, s16 5994; GFX900-NEXT: s_mov_b32 s11, s17 5995; GFX900-NEXT: ;;#ASMSTART 5996; GFX900-NEXT: ; use s[8:13] 5997; GFX900-NEXT: ;;#ASMEND 5998; GFX900-NEXT: s_setpc_b64 s[30:31] 5999; 6000; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_2_0: 6001; GFX90A: ; %bb.0: 6002; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6003; GFX90A-NEXT: ;;#ASMSTART 6004; GFX90A-NEXT: ; def s[12:17] 6005; GFX90A-NEXT: ;;#ASMEND 6006; GFX90A-NEXT: ;;#ASMSTART 6007; GFX90A-NEXT: ; def s[4:9] 6008; GFX90A-NEXT: ;;#ASMEND 6009; GFX90A-NEXT: s_mov_b32 s10, s16 6010; GFX90A-NEXT: s_mov_b32 s11, s17 6011; GFX90A-NEXT: ;;#ASMSTART 6012; GFX90A-NEXT: ; use s[8:13] 6013; GFX90A-NEXT: ;;#ASMEND 6014; GFX90A-NEXT: s_setpc_b64 s[30:31] 6015; 6016; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_0: 6017; GFX940: ; %bb.0: 6018; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6019; GFX940-NEXT: ;;#ASMSTART 6020; GFX940-NEXT: ; def s[8:13] 6021; GFX940-NEXT: ;;#ASMEND 6022; GFX940-NEXT: ;;#ASMSTART 6023; GFX940-NEXT: ; def s[0:5] 6024; GFX940-NEXT: ;;#ASMEND 6025; GFX940-NEXT: s_mov_b32 s8, s12 6026; GFX940-NEXT: s_mov_b32 s9, s13 6027; GFX940-NEXT: s_mov_b32 s10, s4 6028; GFX940-NEXT: s_mov_b32 s11, s5 6029; GFX940-NEXT: s_mov_b32 s12, s0 6030; GFX940-NEXT: s_mov_b32 s13, s1 6031; GFX940-NEXT: ;;#ASMSTART 6032; GFX940-NEXT: ; use s[8:13] 6033; GFX940-NEXT: ;;#ASMEND 6034; GFX940-NEXT: s_setpc_b64 s[30:31] 6035 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6036 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6037 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 0> 6038 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6039 ret void 6040} 6041 6042define void @s_shuffle_v3i64_v3i64__5_3_0() { 6043; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_0: 6044; GFX900: ; %bb.0: 6045; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6046; GFX900-NEXT: ;;#ASMSTART 6047; GFX900-NEXT: ; def s[4:9] 6048; GFX900-NEXT: ;;#ASMEND 6049; GFX900-NEXT: ;;#ASMSTART 6050; GFX900-NEXT: ; def s[12:17] 6051; GFX900-NEXT: ;;#ASMEND 6052; GFX900-NEXT: s_mov_b32 s8, s16 6053; GFX900-NEXT: s_mov_b32 s9, s17 6054; GFX900-NEXT: s_mov_b32 s10, s12 6055; GFX900-NEXT: s_mov_b32 s11, s13 6056; GFX900-NEXT: s_mov_b32 s12, s4 6057; GFX900-NEXT: s_mov_b32 s13, s5 6058; GFX900-NEXT: ;;#ASMSTART 6059; GFX900-NEXT: ; use s[8:13] 6060; GFX900-NEXT: ;;#ASMEND 6061; GFX900-NEXT: s_setpc_b64 s[30:31] 6062; 6063; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_0: 6064; GFX90A: ; %bb.0: 6065; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6066; GFX90A-NEXT: ;;#ASMSTART 6067; GFX90A-NEXT: ; def s[4:9] 6068; GFX90A-NEXT: ;;#ASMEND 6069; GFX90A-NEXT: ;;#ASMSTART 6070; GFX90A-NEXT: ; def s[12:17] 6071; GFX90A-NEXT: ;;#ASMEND 6072; GFX90A-NEXT: s_mov_b32 s8, s16 6073; GFX90A-NEXT: s_mov_b32 s9, s17 6074; GFX90A-NEXT: s_mov_b32 s10, s12 6075; GFX90A-NEXT: s_mov_b32 s11, s13 6076; GFX90A-NEXT: s_mov_b32 s12, s4 6077; GFX90A-NEXT: s_mov_b32 s13, s5 6078; GFX90A-NEXT: ;;#ASMSTART 6079; GFX90A-NEXT: ; use s[8:13] 6080; GFX90A-NEXT: ;;#ASMEND 6081; GFX90A-NEXT: s_setpc_b64 s[30:31] 6082; 6083; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_0: 6084; GFX940: ; %bb.0: 6085; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6086; GFX940-NEXT: ;;#ASMSTART 6087; GFX940-NEXT: ; def s[0:5] 6088; GFX940-NEXT: ;;#ASMEND 6089; GFX940-NEXT: s_mov_b32 s12, s0 6090; GFX940-NEXT: ;;#ASMSTART 6091; GFX940-NEXT: ; def s[4:9] 6092; GFX940-NEXT: ;;#ASMEND 6093; GFX940-NEXT: s_mov_b32 s10, s4 6094; GFX940-NEXT: s_mov_b32 s11, s5 6095; GFX940-NEXT: s_mov_b32 s13, s1 6096; GFX940-NEXT: ;;#ASMSTART 6097; GFX940-NEXT: ; use s[8:13] 6098; GFX940-NEXT: ;;#ASMEND 6099; GFX940-NEXT: s_setpc_b64 s[30:31] 6100 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6101 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6102 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 0> 6103 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6104 ret void 6105} 6106 6107define void @s_shuffle_v3i64_v3i64__5_4_0() { 6108; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_0: 6109; GFX900: ; %bb.0: 6110; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6111; GFX900-NEXT: ;;#ASMSTART 6112; GFX900-NEXT: ; def s[4:9] 6113; GFX900-NEXT: ;;#ASMEND 6114; GFX900-NEXT: ;;#ASMSTART 6115; GFX900-NEXT: ; def s[8:13] 6116; GFX900-NEXT: ;;#ASMEND 6117; GFX900-NEXT: s_mov_b32 s8, s12 6118; GFX900-NEXT: s_mov_b32 s9, s13 6119; GFX900-NEXT: s_mov_b32 s12, s4 6120; GFX900-NEXT: s_mov_b32 s13, s5 6121; GFX900-NEXT: ;;#ASMSTART 6122; GFX900-NEXT: ; use s[8:13] 6123; GFX900-NEXT: ;;#ASMEND 6124; GFX900-NEXT: s_setpc_b64 s[30:31] 6125; 6126; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_0: 6127; GFX90A: ; %bb.0: 6128; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6129; GFX90A-NEXT: ;;#ASMSTART 6130; GFX90A-NEXT: ; def s[4:9] 6131; GFX90A-NEXT: ;;#ASMEND 6132; GFX90A-NEXT: ;;#ASMSTART 6133; GFX90A-NEXT: ; def s[8:13] 6134; GFX90A-NEXT: ;;#ASMEND 6135; GFX90A-NEXT: s_mov_b32 s8, s12 6136; GFX90A-NEXT: s_mov_b32 s9, s13 6137; GFX90A-NEXT: s_mov_b32 s12, s4 6138; GFX90A-NEXT: s_mov_b32 s13, s5 6139; GFX90A-NEXT: ;;#ASMSTART 6140; GFX90A-NEXT: ; use s[8:13] 6141; GFX90A-NEXT: ;;#ASMEND 6142; GFX90A-NEXT: s_setpc_b64 s[30:31] 6143; 6144; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_4_0: 6145; GFX940: ; %bb.0: 6146; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6147; GFX940-NEXT: ;;#ASMSTART 6148; GFX940-NEXT: ; def s[8:13] 6149; GFX940-NEXT: ;;#ASMEND 6150; GFX940-NEXT: ;;#ASMSTART 6151; GFX940-NEXT: ; def s[0:5] 6152; GFX940-NEXT: ;;#ASMEND 6153; GFX940-NEXT: s_mov_b32 s8, s12 6154; GFX940-NEXT: s_mov_b32 s9, s13 6155; GFX940-NEXT: s_mov_b32 s12, s0 6156; GFX940-NEXT: s_mov_b32 s13, s1 6157; GFX940-NEXT: ;;#ASMSTART 6158; GFX940-NEXT: ; use s[8:13] 6159; GFX940-NEXT: ;;#ASMEND 6160; GFX940-NEXT: s_setpc_b64 s[30:31] 6161 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6162 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6163 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 0> 6164 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6165 ret void 6166} 6167 6168define void @s_shuffle_v3i64_v3i64__u_1_1() { 6169; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_1_1: 6170; GFX9: ; %bb.0: 6171; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6172; GFX9-NEXT: ;;#ASMSTART 6173; GFX9-NEXT: ; def s[8:13] 6174; GFX9-NEXT: ;;#ASMEND 6175; GFX9-NEXT: s_mov_b32 s12, s10 6176; GFX9-NEXT: s_mov_b32 s13, s11 6177; GFX9-NEXT: ;;#ASMSTART 6178; GFX9-NEXT: ; use s[8:13] 6179; GFX9-NEXT: ;;#ASMEND 6180; GFX9-NEXT: s_setpc_b64 s[30:31] 6181 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6182 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1> 6183 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6184 ret void 6185} 6186 6187define void @s_shuffle_v3i64_v3i64__0_1_1() { 6188; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_1_1: 6189; GFX9: ; %bb.0: 6190; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6191; GFX9-NEXT: ;;#ASMSTART 6192; GFX9-NEXT: ; def s[8:13] 6193; GFX9-NEXT: ;;#ASMEND 6194; GFX9-NEXT: s_mov_b32 s12, s10 6195; GFX9-NEXT: s_mov_b32 s13, s11 6196; GFX9-NEXT: ;;#ASMSTART 6197; GFX9-NEXT: ; use s[8:13] 6198; GFX9-NEXT: ;;#ASMEND 6199; GFX9-NEXT: s_setpc_b64 s[30:31] 6200 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6201 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1> 6202 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6203 ret void 6204} 6205 6206define void @s_shuffle_v3i64_v3i64__1_1_1() { 6207; GFX9-LABEL: s_shuffle_v3i64_v3i64__1_1_1: 6208; GFX9: ; %bb.0: 6209; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6210; GFX9-NEXT: ;;#ASMSTART 6211; GFX9-NEXT: ; def s[8:13] 6212; GFX9-NEXT: ;;#ASMEND 6213; GFX9-NEXT: s_mov_b32 s8, s10 6214; GFX9-NEXT: s_mov_b32 s9, s11 6215; GFX9-NEXT: s_mov_b32 s12, s10 6216; GFX9-NEXT: s_mov_b32 s13, s11 6217; GFX9-NEXT: ;;#ASMSTART 6218; GFX9-NEXT: ; use s[8:13] 6219; GFX9-NEXT: ;;#ASMEND 6220; GFX9-NEXT: s_setpc_b64 s[30:31] 6221 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6222 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1> 6223 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6224 ret void 6225} 6226 6227define void @s_shuffle_v3i64_v3i64__2_1_1() { 6228; GFX9-LABEL: s_shuffle_v3i64_v3i64__2_1_1: 6229; GFX9: ; %bb.0: 6230; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6231; GFX9-NEXT: ;;#ASMSTART 6232; GFX9-NEXT: ; def s[8:13] 6233; GFX9-NEXT: ;;#ASMEND 6234; GFX9-NEXT: s_mov_b32 s8, s12 6235; GFX9-NEXT: s_mov_b32 s9, s13 6236; GFX9-NEXT: s_mov_b32 s12, s10 6237; GFX9-NEXT: s_mov_b32 s13, s11 6238; GFX9-NEXT: ;;#ASMSTART 6239; GFX9-NEXT: ; use s[8:13] 6240; GFX9-NEXT: ;;#ASMEND 6241; GFX9-NEXT: s_setpc_b64 s[30:31] 6242 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6243 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1> 6244 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6245 ret void 6246} 6247 6248define void @s_shuffle_v3i64_v3i64__3_1_1() { 6249; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_1_1: 6250; GFX9: ; %bb.0: 6251; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6252; GFX9-NEXT: ;;#ASMSTART 6253; GFX9-NEXT: ; def s[8:13] 6254; GFX9-NEXT: ;;#ASMEND 6255; GFX9-NEXT: s_mov_b32 s12, s10 6256; GFX9-NEXT: s_mov_b32 s13, s11 6257; GFX9-NEXT: ;;#ASMSTART 6258; GFX9-NEXT: ; use s[8:13] 6259; GFX9-NEXT: ;;#ASMEND 6260; GFX9-NEXT: s_setpc_b64 s[30:31] 6261 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6262 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1> 6263 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6264 ret void 6265} 6266 6267define void @s_shuffle_v3i64_v3i64__4_1_1() { 6268; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_1_1: 6269; GFX900: ; %bb.0: 6270; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6271; GFX900-NEXT: ;;#ASMSTART 6272; GFX900-NEXT: ; def s[8:13] 6273; GFX900-NEXT: ;;#ASMEND 6274; GFX900-NEXT: ;;#ASMSTART 6275; GFX900-NEXT: ; def s[4:9] 6276; GFX900-NEXT: ;;#ASMEND 6277; GFX900-NEXT: s_mov_b32 s8, s6 6278; GFX900-NEXT: s_mov_b32 s9, s7 6279; GFX900-NEXT: s_mov_b32 s12, s10 6280; GFX900-NEXT: s_mov_b32 s13, s11 6281; GFX900-NEXT: ;;#ASMSTART 6282; GFX900-NEXT: ; use s[8:13] 6283; GFX900-NEXT: ;;#ASMEND 6284; GFX900-NEXT: s_setpc_b64 s[30:31] 6285; 6286; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_1_1: 6287; GFX90A: ; %bb.0: 6288; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6289; GFX90A-NEXT: ;;#ASMSTART 6290; GFX90A-NEXT: ; def s[8:13] 6291; GFX90A-NEXT: ;;#ASMEND 6292; GFX90A-NEXT: ;;#ASMSTART 6293; GFX90A-NEXT: ; def s[4:9] 6294; GFX90A-NEXT: ;;#ASMEND 6295; GFX90A-NEXT: s_mov_b32 s8, s6 6296; GFX90A-NEXT: s_mov_b32 s9, s7 6297; GFX90A-NEXT: s_mov_b32 s12, s10 6298; GFX90A-NEXT: s_mov_b32 s13, s11 6299; GFX90A-NEXT: ;;#ASMSTART 6300; GFX90A-NEXT: ; use s[8:13] 6301; GFX90A-NEXT: ;;#ASMEND 6302; GFX90A-NEXT: s_setpc_b64 s[30:31] 6303; 6304; GFX940-LABEL: s_shuffle_v3i64_v3i64__4_1_1: 6305; GFX940: ; %bb.0: 6306; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6307; GFX940-NEXT: ;;#ASMSTART 6308; GFX940-NEXT: ; def s[8:13] 6309; GFX940-NEXT: ;;#ASMEND 6310; GFX940-NEXT: ;;#ASMSTART 6311; GFX940-NEXT: ; def s[0:5] 6312; GFX940-NEXT: ;;#ASMEND 6313; GFX940-NEXT: s_mov_b32 s8, s2 6314; GFX940-NEXT: s_mov_b32 s9, s3 6315; GFX940-NEXT: s_mov_b32 s12, s10 6316; GFX940-NEXT: s_mov_b32 s13, s11 6317; GFX940-NEXT: ;;#ASMSTART 6318; GFX940-NEXT: ; use s[8:13] 6319; GFX940-NEXT: ;;#ASMEND 6320; GFX940-NEXT: s_setpc_b64 s[30:31] 6321 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6322 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6323 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 1, i32 1> 6324 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6325 ret void 6326} 6327 6328define void @s_shuffle_v3i64_v3i64__5_1_1() { 6329; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_1: 6330; GFX900: ; %bb.0: 6331; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6332; GFX900-NEXT: ;;#ASMSTART 6333; GFX900-NEXT: ; def s[8:13] 6334; GFX900-NEXT: ;;#ASMEND 6335; GFX900-NEXT: ;;#ASMSTART 6336; GFX900-NEXT: ; def s[4:9] 6337; GFX900-NEXT: ;;#ASMEND 6338; GFX900-NEXT: s_mov_b32 s12, s10 6339; GFX900-NEXT: s_mov_b32 s13, s11 6340; GFX900-NEXT: ;;#ASMSTART 6341; GFX900-NEXT: ; use s[8:13] 6342; GFX900-NEXT: ;;#ASMEND 6343; GFX900-NEXT: s_setpc_b64 s[30:31] 6344; 6345; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_1: 6346; GFX90A: ; %bb.0: 6347; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6348; GFX90A-NEXT: ;;#ASMSTART 6349; GFX90A-NEXT: ; def s[8:13] 6350; GFX90A-NEXT: ;;#ASMEND 6351; GFX90A-NEXT: ;;#ASMSTART 6352; GFX90A-NEXT: ; def s[4:9] 6353; GFX90A-NEXT: ;;#ASMEND 6354; GFX90A-NEXT: s_mov_b32 s12, s10 6355; GFX90A-NEXT: s_mov_b32 s13, s11 6356; GFX90A-NEXT: ;;#ASMSTART 6357; GFX90A-NEXT: ; use s[8:13] 6358; GFX90A-NEXT: ;;#ASMEND 6359; GFX90A-NEXT: s_setpc_b64 s[30:31] 6360; 6361; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_1: 6362; GFX940: ; %bb.0: 6363; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6364; GFX940-NEXT: ;;#ASMSTART 6365; GFX940-NEXT: ; def s[8:13] 6366; GFX940-NEXT: ;;#ASMEND 6367; GFX940-NEXT: ;;#ASMSTART 6368; GFX940-NEXT: ; def s[0:5] 6369; GFX940-NEXT: ;;#ASMEND 6370; GFX940-NEXT: s_mov_b32 s8, s4 6371; GFX940-NEXT: s_mov_b32 s9, s5 6372; GFX940-NEXT: s_mov_b32 s12, s10 6373; GFX940-NEXT: s_mov_b32 s13, s11 6374; GFX940-NEXT: ;;#ASMSTART 6375; GFX940-NEXT: ; use s[8:13] 6376; GFX940-NEXT: ;;#ASMEND 6377; GFX940-NEXT: s_setpc_b64 s[30:31] 6378 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6379 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6380 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 1> 6381 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6382 ret void 6383} 6384 6385define void @s_shuffle_v3i64_v3i64__5_u_1() { 6386; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_1: 6387; GFX900: ; %bb.0: 6388; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6389; GFX900-NEXT: ;;#ASMSTART 6390; GFX900-NEXT: ; def s[4:9] 6391; GFX900-NEXT: ;;#ASMEND 6392; GFX900-NEXT: ;;#ASMSTART 6393; GFX900-NEXT: ; def s[8:13] 6394; GFX900-NEXT: ;;#ASMEND 6395; GFX900-NEXT: s_mov_b32 s8, s12 6396; GFX900-NEXT: s_mov_b32 s9, s13 6397; GFX900-NEXT: s_mov_b32 s12, s6 6398; GFX900-NEXT: s_mov_b32 s13, s7 6399; GFX900-NEXT: ;;#ASMSTART 6400; GFX900-NEXT: ; use s[8:13] 6401; GFX900-NEXT: ;;#ASMEND 6402; GFX900-NEXT: s_setpc_b64 s[30:31] 6403; 6404; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_1: 6405; GFX90A: ; %bb.0: 6406; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6407; GFX90A-NEXT: ;;#ASMSTART 6408; GFX90A-NEXT: ; def s[4:9] 6409; GFX90A-NEXT: ;;#ASMEND 6410; GFX90A-NEXT: ;;#ASMSTART 6411; GFX90A-NEXT: ; def s[8:13] 6412; GFX90A-NEXT: ;;#ASMEND 6413; GFX90A-NEXT: s_mov_b32 s8, s12 6414; GFX90A-NEXT: s_mov_b32 s9, s13 6415; GFX90A-NEXT: s_mov_b32 s12, s6 6416; GFX90A-NEXT: s_mov_b32 s13, s7 6417; GFX90A-NEXT: ;;#ASMSTART 6418; GFX90A-NEXT: ; use s[8:13] 6419; GFX90A-NEXT: ;;#ASMEND 6420; GFX90A-NEXT: s_setpc_b64 s[30:31] 6421; 6422; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_1: 6423; GFX940: ; %bb.0: 6424; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6425; GFX940-NEXT: ;;#ASMSTART 6426; GFX940-NEXT: ; def s[0:5] 6427; GFX940-NEXT: ;;#ASMEND 6428; GFX940-NEXT: s_mov_b32 s12, s2 6429; GFX940-NEXT: ;;#ASMSTART 6430; GFX940-NEXT: ; def s[4:9] 6431; GFX940-NEXT: ;;#ASMEND 6432; GFX940-NEXT: s_mov_b32 s13, s3 6433; GFX940-NEXT: ;;#ASMSTART 6434; GFX940-NEXT: ; use s[8:13] 6435; GFX940-NEXT: ;;#ASMEND 6436; GFX940-NEXT: s_setpc_b64 s[30:31] 6437 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6438 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6439 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 1> 6440 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6441 ret void 6442} 6443 6444define void @s_shuffle_v3i64_v3i64__5_0_1() { 6445; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_0_1: 6446; GFX900: ; %bb.0: 6447; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6448; GFX900-NEXT: ;;#ASMSTART 6449; GFX900-NEXT: ; def s[4:9] 6450; GFX900-NEXT: ;;#ASMEND 6451; GFX900-NEXT: ;;#ASMSTART 6452; GFX900-NEXT: ; def s[8:13] 6453; GFX900-NEXT: ;;#ASMEND 6454; GFX900-NEXT: s_mov_b32 s8, s12 6455; GFX900-NEXT: s_mov_b32 s9, s13 6456; GFX900-NEXT: s_mov_b32 s10, s4 6457; GFX900-NEXT: s_mov_b32 s11, s5 6458; GFX900-NEXT: s_mov_b32 s12, s6 6459; GFX900-NEXT: s_mov_b32 s13, s7 6460; GFX900-NEXT: ;;#ASMSTART 6461; GFX900-NEXT: ; use s[8:13] 6462; GFX900-NEXT: ;;#ASMEND 6463; GFX900-NEXT: s_setpc_b64 s[30:31] 6464; 6465; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_0_1: 6466; GFX90A: ; %bb.0: 6467; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6468; GFX90A-NEXT: ;;#ASMSTART 6469; GFX90A-NEXT: ; def s[4:9] 6470; GFX90A-NEXT: ;;#ASMEND 6471; GFX90A-NEXT: ;;#ASMSTART 6472; GFX90A-NEXT: ; def s[8:13] 6473; GFX90A-NEXT: ;;#ASMEND 6474; GFX90A-NEXT: s_mov_b32 s8, s12 6475; GFX90A-NEXT: s_mov_b32 s9, s13 6476; GFX90A-NEXT: s_mov_b32 s10, s4 6477; GFX90A-NEXT: s_mov_b32 s11, s5 6478; GFX90A-NEXT: s_mov_b32 s12, s6 6479; GFX90A-NEXT: s_mov_b32 s13, s7 6480; GFX90A-NEXT: ;;#ASMSTART 6481; GFX90A-NEXT: ; use s[8:13] 6482; GFX90A-NEXT: ;;#ASMEND 6483; GFX90A-NEXT: s_setpc_b64 s[30:31] 6484; 6485; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_1: 6486; GFX940: ; %bb.0: 6487; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6488; GFX940-NEXT: ;;#ASMSTART 6489; GFX940-NEXT: ; def s[0:5] 6490; GFX940-NEXT: ;;#ASMEND 6491; GFX940-NEXT: s_mov_b32 s10, s0 6492; GFX940-NEXT: ;;#ASMSTART 6493; GFX940-NEXT: ; def s[4:9] 6494; GFX940-NEXT: ;;#ASMEND 6495; GFX940-NEXT: s_mov_b32 s11, s1 6496; GFX940-NEXT: s_mov_b32 s12, s2 6497; GFX940-NEXT: s_mov_b32 s13, s3 6498; GFX940-NEXT: ;;#ASMSTART 6499; GFX940-NEXT: ; use s[8:13] 6500; GFX940-NEXT: ;;#ASMEND 6501; GFX940-NEXT: s_setpc_b64 s[30:31] 6502 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6503 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6504 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 1> 6505 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6506 ret void 6507} 6508 6509define void @s_shuffle_v3i64_v3i64__5_2_1() { 6510; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_2_1: 6511; GFX900: ; %bb.0: 6512; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6513; GFX900-NEXT: ;;#ASMSTART 6514; GFX900-NEXT: ; def s[12:17] 6515; GFX900-NEXT: ;;#ASMEND 6516; GFX900-NEXT: ;;#ASMSTART 6517; GFX900-NEXT: ; def s[4:9] 6518; GFX900-NEXT: ;;#ASMEND 6519; GFX900-NEXT: s_mov_b32 s10, s16 6520; GFX900-NEXT: s_mov_b32 s11, s17 6521; GFX900-NEXT: s_mov_b32 s12, s14 6522; GFX900-NEXT: s_mov_b32 s13, s15 6523; GFX900-NEXT: ;;#ASMSTART 6524; GFX900-NEXT: ; use s[8:13] 6525; GFX900-NEXT: ;;#ASMEND 6526; GFX900-NEXT: s_setpc_b64 s[30:31] 6527; 6528; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_2_1: 6529; GFX90A: ; %bb.0: 6530; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6531; GFX90A-NEXT: ;;#ASMSTART 6532; GFX90A-NEXT: ; def s[12:17] 6533; GFX90A-NEXT: ;;#ASMEND 6534; GFX90A-NEXT: ;;#ASMSTART 6535; GFX90A-NEXT: ; def s[4:9] 6536; GFX90A-NEXT: ;;#ASMEND 6537; GFX90A-NEXT: s_mov_b32 s10, s16 6538; GFX90A-NEXT: s_mov_b32 s11, s17 6539; GFX90A-NEXT: s_mov_b32 s12, s14 6540; GFX90A-NEXT: s_mov_b32 s13, s15 6541; GFX90A-NEXT: ;;#ASMSTART 6542; GFX90A-NEXT: ; use s[8:13] 6543; GFX90A-NEXT: ;;#ASMEND 6544; GFX90A-NEXT: s_setpc_b64 s[30:31] 6545; 6546; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_1: 6547; GFX940: ; %bb.0: 6548; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6549; GFX940-NEXT: ;;#ASMSTART 6550; GFX940-NEXT: ; def s[8:13] 6551; GFX940-NEXT: ;;#ASMEND 6552; GFX940-NEXT: ;;#ASMSTART 6553; GFX940-NEXT: ; def s[0:5] 6554; GFX940-NEXT: ;;#ASMEND 6555; GFX940-NEXT: s_mov_b32 s8, s12 6556; GFX940-NEXT: s_mov_b32 s9, s13 6557; GFX940-NEXT: s_mov_b32 s10, s4 6558; GFX940-NEXT: s_mov_b32 s11, s5 6559; GFX940-NEXT: s_mov_b32 s12, s2 6560; GFX940-NEXT: s_mov_b32 s13, s3 6561; GFX940-NEXT: ;;#ASMSTART 6562; GFX940-NEXT: ; use s[8:13] 6563; GFX940-NEXT: ;;#ASMEND 6564; GFX940-NEXT: s_setpc_b64 s[30:31] 6565 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6566 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6567 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 1> 6568 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6569 ret void 6570} 6571 6572define void @s_shuffle_v3i64_v3i64__5_3_1() { 6573; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_1: 6574; GFX900: ; %bb.0: 6575; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6576; GFX900-NEXT: ;;#ASMSTART 6577; GFX900-NEXT: ; def s[4:9] 6578; GFX900-NEXT: ;;#ASMEND 6579; GFX900-NEXT: ;;#ASMSTART 6580; GFX900-NEXT: ; def s[12:17] 6581; GFX900-NEXT: ;;#ASMEND 6582; GFX900-NEXT: s_mov_b32 s8, s16 6583; GFX900-NEXT: s_mov_b32 s9, s17 6584; GFX900-NEXT: s_mov_b32 s10, s12 6585; GFX900-NEXT: s_mov_b32 s11, s13 6586; GFX900-NEXT: s_mov_b32 s12, s6 6587; GFX900-NEXT: s_mov_b32 s13, s7 6588; GFX900-NEXT: ;;#ASMSTART 6589; GFX900-NEXT: ; use s[8:13] 6590; GFX900-NEXT: ;;#ASMEND 6591; GFX900-NEXT: s_setpc_b64 s[30:31] 6592; 6593; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_1: 6594; GFX90A: ; %bb.0: 6595; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6596; GFX90A-NEXT: ;;#ASMSTART 6597; GFX90A-NEXT: ; def s[4:9] 6598; GFX90A-NEXT: ;;#ASMEND 6599; GFX90A-NEXT: ;;#ASMSTART 6600; GFX90A-NEXT: ; def s[12:17] 6601; GFX90A-NEXT: ;;#ASMEND 6602; GFX90A-NEXT: s_mov_b32 s8, s16 6603; GFX90A-NEXT: s_mov_b32 s9, s17 6604; GFX90A-NEXT: s_mov_b32 s10, s12 6605; GFX90A-NEXT: s_mov_b32 s11, s13 6606; GFX90A-NEXT: s_mov_b32 s12, s6 6607; GFX90A-NEXT: s_mov_b32 s13, s7 6608; GFX90A-NEXT: ;;#ASMSTART 6609; GFX90A-NEXT: ; use s[8:13] 6610; GFX90A-NEXT: ;;#ASMEND 6611; GFX90A-NEXT: s_setpc_b64 s[30:31] 6612; 6613; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_1: 6614; GFX940: ; %bb.0: 6615; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6616; GFX940-NEXT: ;;#ASMSTART 6617; GFX940-NEXT: ; def s[0:5] 6618; GFX940-NEXT: ;;#ASMEND 6619; GFX940-NEXT: s_mov_b32 s12, s2 6620; GFX940-NEXT: ;;#ASMSTART 6621; GFX940-NEXT: ; def s[4:9] 6622; GFX940-NEXT: ;;#ASMEND 6623; GFX940-NEXT: s_mov_b32 s10, s4 6624; GFX940-NEXT: s_mov_b32 s11, s5 6625; GFX940-NEXT: s_mov_b32 s13, s3 6626; GFX940-NEXT: ;;#ASMSTART 6627; GFX940-NEXT: ; use s[8:13] 6628; GFX940-NEXT: ;;#ASMEND 6629; GFX940-NEXT: s_setpc_b64 s[30:31] 6630 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6631 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6632 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 1> 6633 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6634 ret void 6635} 6636 6637define void @s_shuffle_v3i64_v3i64__5_4_1() { 6638; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_1: 6639; GFX900: ; %bb.0: 6640; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6641; GFX900-NEXT: ;;#ASMSTART 6642; GFX900-NEXT: ; def s[4:9] 6643; GFX900-NEXT: ;;#ASMEND 6644; GFX900-NEXT: ;;#ASMSTART 6645; GFX900-NEXT: ; def s[8:13] 6646; GFX900-NEXT: ;;#ASMEND 6647; GFX900-NEXT: s_mov_b32 s8, s12 6648; GFX900-NEXT: s_mov_b32 s9, s13 6649; GFX900-NEXT: s_mov_b32 s12, s6 6650; GFX900-NEXT: s_mov_b32 s13, s7 6651; GFX900-NEXT: ;;#ASMSTART 6652; GFX900-NEXT: ; use s[8:13] 6653; GFX900-NEXT: ;;#ASMEND 6654; GFX900-NEXT: s_setpc_b64 s[30:31] 6655; 6656; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_1: 6657; GFX90A: ; %bb.0: 6658; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6659; GFX90A-NEXT: ;;#ASMSTART 6660; GFX90A-NEXT: ; def s[4:9] 6661; GFX90A-NEXT: ;;#ASMEND 6662; GFX90A-NEXT: ;;#ASMSTART 6663; GFX90A-NEXT: ; def s[8:13] 6664; GFX90A-NEXT: ;;#ASMEND 6665; GFX90A-NEXT: s_mov_b32 s8, s12 6666; GFX90A-NEXT: s_mov_b32 s9, s13 6667; GFX90A-NEXT: s_mov_b32 s12, s6 6668; GFX90A-NEXT: s_mov_b32 s13, s7 6669; GFX90A-NEXT: ;;#ASMSTART 6670; GFX90A-NEXT: ; use s[8:13] 6671; GFX90A-NEXT: ;;#ASMEND 6672; GFX90A-NEXT: s_setpc_b64 s[30:31] 6673; 6674; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_4_1: 6675; GFX940: ; %bb.0: 6676; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6677; GFX940-NEXT: ;;#ASMSTART 6678; GFX940-NEXT: ; def s[8:13] 6679; GFX940-NEXT: ;;#ASMEND 6680; GFX940-NEXT: ;;#ASMSTART 6681; GFX940-NEXT: ; def s[0:5] 6682; GFX940-NEXT: ;;#ASMEND 6683; GFX940-NEXT: s_mov_b32 s8, s12 6684; GFX940-NEXT: s_mov_b32 s9, s13 6685; GFX940-NEXT: s_mov_b32 s12, s2 6686; GFX940-NEXT: s_mov_b32 s13, s3 6687; GFX940-NEXT: ;;#ASMSTART 6688; GFX940-NEXT: ; use s[8:13] 6689; GFX940-NEXT: ;;#ASMEND 6690; GFX940-NEXT: s_setpc_b64 s[30:31] 6691 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6692 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6693 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 1> 6694 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6695 ret void 6696} 6697 6698define void @s_shuffle_v3i64_v3i64__u_2_2() { 6699; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_2_2: 6700; GFX9: ; %bb.0: 6701; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6702; GFX9-NEXT: ;;#ASMSTART 6703; GFX9-NEXT: ; def s[8:13] 6704; GFX9-NEXT: ;;#ASMEND 6705; GFX9-NEXT: s_mov_b32 s10, s12 6706; GFX9-NEXT: s_mov_b32 s11, s13 6707; GFX9-NEXT: ;;#ASMSTART 6708; GFX9-NEXT: ; use s[8:13] 6709; GFX9-NEXT: ;;#ASMEND 6710; GFX9-NEXT: s_setpc_b64 s[30:31] 6711 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6712 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2> 6713 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6714 ret void 6715} 6716 6717define void @s_shuffle_v3i64_v3i64__0_2_2() { 6718; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_2_2: 6719; GFX9: ; %bb.0: 6720; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6721; GFX9-NEXT: ;;#ASMSTART 6722; GFX9-NEXT: ; def s[8:13] 6723; GFX9-NEXT: ;;#ASMEND 6724; GFX9-NEXT: s_mov_b32 s10, s12 6725; GFX9-NEXT: s_mov_b32 s11, s13 6726; GFX9-NEXT: ;;#ASMSTART 6727; GFX9-NEXT: ; use s[8:13] 6728; GFX9-NEXT: ;;#ASMEND 6729; GFX9-NEXT: s_setpc_b64 s[30:31] 6730 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6731 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2> 6732 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6733 ret void 6734} 6735 6736define void @s_shuffle_v3i64_v3i64__1_2_2() { 6737; GFX9-LABEL: s_shuffle_v3i64_v3i64__1_2_2: 6738; GFX9: ; %bb.0: 6739; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6740; GFX9-NEXT: ;;#ASMSTART 6741; GFX9-NEXT: ; def s[8:13] 6742; GFX9-NEXT: ;;#ASMEND 6743; GFX9-NEXT: s_mov_b32 s8, s10 6744; GFX9-NEXT: s_mov_b32 s9, s11 6745; GFX9-NEXT: s_mov_b32 s10, s12 6746; GFX9-NEXT: s_mov_b32 s11, s13 6747; GFX9-NEXT: ;;#ASMSTART 6748; GFX9-NEXT: ; use s[8:13] 6749; GFX9-NEXT: ;;#ASMEND 6750; GFX9-NEXT: s_setpc_b64 s[30:31] 6751 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6752 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2> 6753 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6754 ret void 6755} 6756 6757define void @s_shuffle_v3i64_v3i64__2_2_2() { 6758; GFX9-LABEL: s_shuffle_v3i64_v3i64__2_2_2: 6759; GFX9: ; %bb.0: 6760; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6761; GFX9-NEXT: ;;#ASMSTART 6762; GFX9-NEXT: ; def s[8:13] 6763; GFX9-NEXT: ;;#ASMEND 6764; GFX9-NEXT: s_mov_b32 s8, s12 6765; GFX9-NEXT: s_mov_b32 s9, s13 6766; GFX9-NEXT: s_mov_b32 s10, s12 6767; GFX9-NEXT: s_mov_b32 s11, s13 6768; GFX9-NEXT: ;;#ASMSTART 6769; GFX9-NEXT: ; use s[8:13] 6770; GFX9-NEXT: ;;#ASMEND 6771; GFX9-NEXT: s_setpc_b64 s[30:31] 6772 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6773 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2> 6774 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6775 ret void 6776} 6777 6778define void @s_shuffle_v3i64_v3i64__3_2_2() { 6779; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_2_2: 6780; GFX9: ; %bb.0: 6781; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6782; GFX9-NEXT: ;;#ASMSTART 6783; GFX9-NEXT: ; def s[8:13] 6784; GFX9-NEXT: ;;#ASMEND 6785; GFX9-NEXT: s_mov_b32 s10, s12 6786; GFX9-NEXT: s_mov_b32 s11, s13 6787; GFX9-NEXT: ;;#ASMSTART 6788; GFX9-NEXT: ; use s[8:13] 6789; GFX9-NEXT: ;;#ASMEND 6790; GFX9-NEXT: s_setpc_b64 s[30:31] 6791 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6792 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 2, i32 2> 6793 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6794 ret void 6795} 6796 6797define void @s_shuffle_v3i64_v3i64__4_2_2() { 6798; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_2_2: 6799; GFX900: ; %bb.0: 6800; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6801; GFX900-NEXT: ;;#ASMSTART 6802; GFX900-NEXT: ; def s[8:13] 6803; GFX900-NEXT: ;;#ASMEND 6804; GFX900-NEXT: ;;#ASMSTART 6805; GFX900-NEXT: ; def s[4:9] 6806; GFX900-NEXT: ;;#ASMEND 6807; GFX900-NEXT: s_mov_b32 s8, s6 6808; GFX900-NEXT: s_mov_b32 s9, s7 6809; GFX900-NEXT: s_mov_b32 s10, s12 6810; GFX900-NEXT: s_mov_b32 s11, s13 6811; GFX900-NEXT: ;;#ASMSTART 6812; GFX900-NEXT: ; use s[8:13] 6813; GFX900-NEXT: ;;#ASMEND 6814; GFX900-NEXT: s_setpc_b64 s[30:31] 6815; 6816; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_2_2: 6817; GFX90A: ; %bb.0: 6818; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6819; GFX90A-NEXT: ;;#ASMSTART 6820; GFX90A-NEXT: ; def s[8:13] 6821; GFX90A-NEXT: ;;#ASMEND 6822; GFX90A-NEXT: ;;#ASMSTART 6823; GFX90A-NEXT: ; def s[4:9] 6824; GFX90A-NEXT: ;;#ASMEND 6825; GFX90A-NEXT: s_mov_b32 s8, s6 6826; GFX90A-NEXT: s_mov_b32 s9, s7 6827; GFX90A-NEXT: s_mov_b32 s10, s12 6828; GFX90A-NEXT: s_mov_b32 s11, s13 6829; GFX90A-NEXT: ;;#ASMSTART 6830; GFX90A-NEXT: ; use s[8:13] 6831; GFX90A-NEXT: ;;#ASMEND 6832; GFX90A-NEXT: s_setpc_b64 s[30:31] 6833; 6834; GFX940-LABEL: s_shuffle_v3i64_v3i64__4_2_2: 6835; GFX940: ; %bb.0: 6836; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6837; GFX940-NEXT: ;;#ASMSTART 6838; GFX940-NEXT: ; def s[8:13] 6839; GFX940-NEXT: ;;#ASMEND 6840; GFX940-NEXT: ;;#ASMSTART 6841; GFX940-NEXT: ; def s[0:5] 6842; GFX940-NEXT: ;;#ASMEND 6843; GFX940-NEXT: s_mov_b32 s8, s2 6844; GFX940-NEXT: s_mov_b32 s9, s3 6845; GFX940-NEXT: s_mov_b32 s10, s12 6846; GFX940-NEXT: s_mov_b32 s11, s13 6847; GFX940-NEXT: ;;#ASMSTART 6848; GFX940-NEXT: ; use s[8:13] 6849; GFX940-NEXT: ;;#ASMEND 6850; GFX940-NEXT: s_setpc_b64 s[30:31] 6851 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6852 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6853 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 2, i32 2> 6854 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6855 ret void 6856} 6857 6858define void @s_shuffle_v3i64_v3i64__5_2_2() { 6859; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_2_2: 6860; GFX900: ; %bb.0: 6861; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6862; GFX900-NEXT: ;;#ASMSTART 6863; GFX900-NEXT: ; def s[8:13] 6864; GFX900-NEXT: ;;#ASMEND 6865; GFX900-NEXT: ;;#ASMSTART 6866; GFX900-NEXT: ; def s[4:9] 6867; GFX900-NEXT: ;;#ASMEND 6868; GFX900-NEXT: s_mov_b32 s10, s12 6869; GFX900-NEXT: s_mov_b32 s11, s13 6870; GFX900-NEXT: ;;#ASMSTART 6871; GFX900-NEXT: ; use s[8:13] 6872; GFX900-NEXT: ;;#ASMEND 6873; GFX900-NEXT: s_setpc_b64 s[30:31] 6874; 6875; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_2_2: 6876; GFX90A: ; %bb.0: 6877; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6878; GFX90A-NEXT: ;;#ASMSTART 6879; GFX90A-NEXT: ; def s[8:13] 6880; GFX90A-NEXT: ;;#ASMEND 6881; GFX90A-NEXT: ;;#ASMSTART 6882; GFX90A-NEXT: ; def s[4:9] 6883; GFX90A-NEXT: ;;#ASMEND 6884; GFX90A-NEXT: s_mov_b32 s10, s12 6885; GFX90A-NEXT: s_mov_b32 s11, s13 6886; GFX90A-NEXT: ;;#ASMSTART 6887; GFX90A-NEXT: ; use s[8:13] 6888; GFX90A-NEXT: ;;#ASMEND 6889; GFX90A-NEXT: s_setpc_b64 s[30:31] 6890; 6891; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_2: 6892; GFX940: ; %bb.0: 6893; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6894; GFX940-NEXT: ;;#ASMSTART 6895; GFX940-NEXT: ; def s[8:13] 6896; GFX940-NEXT: ;;#ASMEND 6897; GFX940-NEXT: ;;#ASMSTART 6898; GFX940-NEXT: ; def s[0:5] 6899; GFX940-NEXT: ;;#ASMEND 6900; GFX940-NEXT: s_mov_b32 s8, s4 6901; GFX940-NEXT: s_mov_b32 s9, s5 6902; GFX940-NEXT: s_mov_b32 s10, s12 6903; GFX940-NEXT: s_mov_b32 s11, s13 6904; GFX940-NEXT: ;;#ASMSTART 6905; GFX940-NEXT: ; use s[8:13] 6906; GFX940-NEXT: ;;#ASMEND 6907; GFX940-NEXT: s_setpc_b64 s[30:31] 6908 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6909 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6910 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 2> 6911 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6912 ret void 6913} 6914 6915define void @s_shuffle_v3i64_v3i64__5_u_2() { 6916; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_2: 6917; GFX900: ; %bb.0: 6918; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6919; GFX900-NEXT: ;;#ASMSTART 6920; GFX900-NEXT: ; def s[8:13] 6921; GFX900-NEXT: ;;#ASMEND 6922; GFX900-NEXT: ;;#ASMSTART 6923; GFX900-NEXT: ; def s[4:9] 6924; GFX900-NEXT: ;;#ASMEND 6925; GFX900-NEXT: ;;#ASMSTART 6926; GFX900-NEXT: ; use s[8:13] 6927; GFX900-NEXT: ;;#ASMEND 6928; GFX900-NEXT: s_setpc_b64 s[30:31] 6929; 6930; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_2: 6931; GFX90A: ; %bb.0: 6932; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6933; GFX90A-NEXT: ;;#ASMSTART 6934; GFX90A-NEXT: ; def s[8:13] 6935; GFX90A-NEXT: ;;#ASMEND 6936; GFX90A-NEXT: ;;#ASMSTART 6937; GFX90A-NEXT: ; def s[4:9] 6938; GFX90A-NEXT: ;;#ASMEND 6939; GFX90A-NEXT: ;;#ASMSTART 6940; GFX90A-NEXT: ; use s[8:13] 6941; GFX90A-NEXT: ;;#ASMEND 6942; GFX90A-NEXT: s_setpc_b64 s[30:31] 6943; 6944; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_2: 6945; GFX940: ; %bb.0: 6946; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6947; GFX940-NEXT: ;;#ASMSTART 6948; GFX940-NEXT: ; def s[8:13] 6949; GFX940-NEXT: ;;#ASMEND 6950; GFX940-NEXT: ;;#ASMSTART 6951; GFX940-NEXT: ; def s[0:5] 6952; GFX940-NEXT: ;;#ASMEND 6953; GFX940-NEXT: s_mov_b32 s8, s4 6954; GFX940-NEXT: s_mov_b32 s9, s5 6955; GFX940-NEXT: ;;#ASMSTART 6956; GFX940-NEXT: ; use s[8:13] 6957; GFX940-NEXT: ;;#ASMEND 6958; GFX940-NEXT: s_setpc_b64 s[30:31] 6959 %vec0 = call <3 x i64> asm "; def $0", "=s"() 6960 %vec1 = call <3 x i64> asm "; def $0", "=s"() 6961 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 2> 6962 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 6963 ret void 6964} 6965 6966define void @s_shuffle_v3i64_v3i64__5_0_2() { 6967; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_0_2: 6968; GFX900: ; %bb.0: 6969; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6970; GFX900-NEXT: ;;#ASMSTART 6971; GFX900-NEXT: ; def s[12:17] 6972; GFX900-NEXT: ;;#ASMEND 6973; GFX900-NEXT: ;;#ASMSTART 6974; GFX900-NEXT: ; def s[4:9] 6975; GFX900-NEXT: ;;#ASMEND 6976; GFX900-NEXT: s_mov_b32 s10, s12 6977; GFX900-NEXT: s_mov_b32 s11, s13 6978; GFX900-NEXT: s_mov_b32 s12, s16 6979; GFX900-NEXT: s_mov_b32 s13, s17 6980; GFX900-NEXT: ;;#ASMSTART 6981; GFX900-NEXT: ; use s[8:13] 6982; GFX900-NEXT: ;;#ASMEND 6983; GFX900-NEXT: s_setpc_b64 s[30:31] 6984; 6985; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_0_2: 6986; GFX90A: ; %bb.0: 6987; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6988; GFX90A-NEXT: ;;#ASMSTART 6989; GFX90A-NEXT: ; def s[12:17] 6990; GFX90A-NEXT: ;;#ASMEND 6991; GFX90A-NEXT: ;;#ASMSTART 6992; GFX90A-NEXT: ; def s[4:9] 6993; GFX90A-NEXT: ;;#ASMEND 6994; GFX90A-NEXT: s_mov_b32 s10, s12 6995; GFX90A-NEXT: s_mov_b32 s11, s13 6996; GFX90A-NEXT: s_mov_b32 s12, s16 6997; GFX90A-NEXT: s_mov_b32 s13, s17 6998; GFX90A-NEXT: ;;#ASMSTART 6999; GFX90A-NEXT: ; use s[8:13] 7000; GFX90A-NEXT: ;;#ASMEND 7001; GFX90A-NEXT: s_setpc_b64 s[30:31] 7002; 7003; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_2: 7004; GFX940: ; %bb.0: 7005; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7006; GFX940-NEXT: ;;#ASMSTART 7007; GFX940-NEXT: ; def s[8:13] 7008; GFX940-NEXT: ;;#ASMEND 7009; GFX940-NEXT: ;;#ASMSTART 7010; GFX940-NEXT: ; def s[0:5] 7011; GFX940-NEXT: ;;#ASMEND 7012; GFX940-NEXT: s_mov_b32 s8, s12 7013; GFX940-NEXT: s_mov_b32 s9, s13 7014; GFX940-NEXT: s_mov_b32 s10, s0 7015; GFX940-NEXT: s_mov_b32 s11, s1 7016; GFX940-NEXT: s_mov_b32 s12, s4 7017; GFX940-NEXT: s_mov_b32 s13, s5 7018; GFX940-NEXT: ;;#ASMSTART 7019; GFX940-NEXT: ; use s[8:13] 7020; GFX940-NEXT: ;;#ASMEND 7021; GFX940-NEXT: s_setpc_b64 s[30:31] 7022 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7023 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7024 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 2> 7025 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7026 ret void 7027} 7028 7029define void @s_shuffle_v3i64_v3i64__5_1_2() { 7030; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_2: 7031; GFX900: ; %bb.0: 7032; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7033; GFX900-NEXT: ;;#ASMSTART 7034; GFX900-NEXT: ; def s[8:13] 7035; GFX900-NEXT: ;;#ASMEND 7036; GFX900-NEXT: ;;#ASMSTART 7037; GFX900-NEXT: ; def s[4:9] 7038; GFX900-NEXT: ;;#ASMEND 7039; GFX900-NEXT: ;;#ASMSTART 7040; GFX900-NEXT: ; use s[8:13] 7041; GFX900-NEXT: ;;#ASMEND 7042; GFX900-NEXT: s_setpc_b64 s[30:31] 7043; 7044; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_2: 7045; GFX90A: ; %bb.0: 7046; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7047; GFX90A-NEXT: ;;#ASMSTART 7048; GFX90A-NEXT: ; def s[8:13] 7049; GFX90A-NEXT: ;;#ASMEND 7050; GFX90A-NEXT: ;;#ASMSTART 7051; GFX90A-NEXT: ; def s[4:9] 7052; GFX90A-NEXT: ;;#ASMEND 7053; GFX90A-NEXT: ;;#ASMSTART 7054; GFX90A-NEXT: ; use s[8:13] 7055; GFX90A-NEXT: ;;#ASMEND 7056; GFX90A-NEXT: s_setpc_b64 s[30:31] 7057; 7058; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_2: 7059; GFX940: ; %bb.0: 7060; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7061; GFX940-NEXT: ;;#ASMSTART 7062; GFX940-NEXT: ; def s[8:13] 7063; GFX940-NEXT: ;;#ASMEND 7064; GFX940-NEXT: ;;#ASMSTART 7065; GFX940-NEXT: ; def s[0:5] 7066; GFX940-NEXT: ;;#ASMEND 7067; GFX940-NEXT: s_mov_b32 s8, s4 7068; GFX940-NEXT: s_mov_b32 s9, s5 7069; GFX940-NEXT: ;;#ASMSTART 7070; GFX940-NEXT: ; use s[8:13] 7071; GFX940-NEXT: ;;#ASMEND 7072; GFX940-NEXT: s_setpc_b64 s[30:31] 7073 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7074 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7075 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 2> 7076 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7077 ret void 7078} 7079 7080define void @s_shuffle_v3i64_v3i64__5_3_2() { 7081; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_2: 7082; GFX900: ; %bb.0: 7083; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7084; GFX900-NEXT: ;;#ASMSTART 7085; GFX900-NEXT: ; def s[8:13] 7086; GFX900-NEXT: ;;#ASMEND 7087; GFX900-NEXT: ;;#ASMSTART 7088; GFX900-NEXT: ; def s[4:9] 7089; GFX900-NEXT: ;;#ASMEND 7090; GFX900-NEXT: s_mov_b32 s10, s4 7091; GFX900-NEXT: s_mov_b32 s11, s5 7092; GFX900-NEXT: ;;#ASMSTART 7093; GFX900-NEXT: ; use s[8:13] 7094; GFX900-NEXT: ;;#ASMEND 7095; GFX900-NEXT: s_setpc_b64 s[30:31] 7096; 7097; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_2: 7098; GFX90A: ; %bb.0: 7099; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7100; GFX90A-NEXT: ;;#ASMSTART 7101; GFX90A-NEXT: ; def s[8:13] 7102; GFX90A-NEXT: ;;#ASMEND 7103; GFX90A-NEXT: ;;#ASMSTART 7104; GFX90A-NEXT: ; def s[4:9] 7105; GFX90A-NEXT: ;;#ASMEND 7106; GFX90A-NEXT: s_mov_b32 s10, s4 7107; GFX90A-NEXT: s_mov_b32 s11, s5 7108; GFX90A-NEXT: ;;#ASMSTART 7109; GFX90A-NEXT: ; use s[8:13] 7110; GFX90A-NEXT: ;;#ASMEND 7111; GFX90A-NEXT: s_setpc_b64 s[30:31] 7112; 7113; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_2: 7114; GFX940: ; %bb.0: 7115; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7116; GFX940-NEXT: ;;#ASMSTART 7117; GFX940-NEXT: ; def s[8:13] 7118; GFX940-NEXT: ;;#ASMEND 7119; GFX940-NEXT: ;;#ASMSTART 7120; GFX940-NEXT: ; def s[0:5] 7121; GFX940-NEXT: ;;#ASMEND 7122; GFX940-NEXT: s_mov_b32 s8, s4 7123; GFX940-NEXT: s_mov_b32 s9, s5 7124; GFX940-NEXT: s_mov_b32 s10, s0 7125; GFX940-NEXT: s_mov_b32 s11, s1 7126; GFX940-NEXT: ;;#ASMSTART 7127; GFX940-NEXT: ; use s[8:13] 7128; GFX940-NEXT: ;;#ASMEND 7129; GFX940-NEXT: s_setpc_b64 s[30:31] 7130 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7131 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7132 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 2> 7133 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7134 ret void 7135} 7136 7137define void @s_shuffle_v3i64_v3i64__5_4_2() { 7138; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_2: 7139; GFX900: ; %bb.0: 7140; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7141; GFX900-NEXT: ;;#ASMSTART 7142; GFX900-NEXT: ; def s[12:17] 7143; GFX900-NEXT: ;;#ASMEND 7144; GFX900-NEXT: ;;#ASMSTART 7145; GFX900-NEXT: ; def s[8:13] 7146; GFX900-NEXT: ;;#ASMEND 7147; GFX900-NEXT: s_mov_b32 s8, s12 7148; GFX900-NEXT: s_mov_b32 s9, s13 7149; GFX900-NEXT: s_mov_b32 s12, s16 7150; GFX900-NEXT: s_mov_b32 s13, s17 7151; GFX900-NEXT: ;;#ASMSTART 7152; GFX900-NEXT: ; use s[8:13] 7153; GFX900-NEXT: ;;#ASMEND 7154; GFX900-NEXT: s_setpc_b64 s[30:31] 7155; 7156; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_2: 7157; GFX90A: ; %bb.0: 7158; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7159; GFX90A-NEXT: ;;#ASMSTART 7160; GFX90A-NEXT: ; def s[12:17] 7161; GFX90A-NEXT: ;;#ASMEND 7162; GFX90A-NEXT: ;;#ASMSTART 7163; GFX90A-NEXT: ; def s[8:13] 7164; GFX90A-NEXT: ;;#ASMEND 7165; GFX90A-NEXT: s_mov_b32 s8, s12 7166; GFX90A-NEXT: s_mov_b32 s9, s13 7167; GFX90A-NEXT: s_mov_b32 s12, s16 7168; GFX90A-NEXT: s_mov_b32 s13, s17 7169; GFX90A-NEXT: ;;#ASMSTART 7170; GFX90A-NEXT: ; use s[8:13] 7171; GFX90A-NEXT: ;;#ASMEND 7172; GFX90A-NEXT: s_setpc_b64 s[30:31] 7173; 7174; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_4_2: 7175; GFX940: ; %bb.0: 7176; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7177; GFX940-NEXT: ;;#ASMSTART 7178; GFX940-NEXT: ; def s[8:13] 7179; GFX940-NEXT: ;;#ASMEND 7180; GFX940-NEXT: ;;#ASMSTART 7181; GFX940-NEXT: ; def s[0:5] 7182; GFX940-NEXT: ;;#ASMEND 7183; GFX940-NEXT: s_mov_b32 s8, s12 7184; GFX940-NEXT: s_mov_b32 s9, s13 7185; GFX940-NEXT: s_mov_b32 s12, s4 7186; GFX940-NEXT: s_mov_b32 s13, s5 7187; GFX940-NEXT: ;;#ASMSTART 7188; GFX940-NEXT: ; use s[8:13] 7189; GFX940-NEXT: ;;#ASMEND 7190; GFX940-NEXT: s_setpc_b64 s[30:31] 7191 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7192 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7193 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 2> 7194 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7195 ret void 7196} 7197 7198define void @s_shuffle_v3i64_v3i64__u_3_3() { 7199; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_3_3: 7200; GFX9: ; %bb.0: 7201; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7202; GFX9-NEXT: ;;#ASMSTART 7203; GFX9-NEXT: ; use s[8:13] 7204; GFX9-NEXT: ;;#ASMEND 7205; GFX9-NEXT: s_setpc_b64 s[30:31] 7206 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7207 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 3, i32 3> 7208 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7209 ret void 7210} 7211 7212define void @s_shuffle_v3i64_v3i64__0_3_3() { 7213; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_3_3: 7214; GFX900: ; %bb.0: 7215; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7216; GFX900-NEXT: ;;#ASMSTART 7217; GFX900-NEXT: ; def s[8:13] 7218; GFX900-NEXT: ;;#ASMEND 7219; GFX900-NEXT: ;;#ASMSTART 7220; GFX900-NEXT: ; use s[8:13] 7221; GFX900-NEXT: ;;#ASMEND 7222; GFX900-NEXT: s_setpc_b64 s[30:31] 7223; 7224; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_3_3: 7225; GFX90A: ; %bb.0: 7226; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7227; GFX90A-NEXT: ;;#ASMSTART 7228; GFX90A-NEXT: ; def s[8:13] 7229; GFX90A-NEXT: ;;#ASMEND 7230; GFX90A-NEXT: ;;#ASMSTART 7231; GFX90A-NEXT: ; use s[8:13] 7232; GFX90A-NEXT: ;;#ASMEND 7233; GFX90A-NEXT: s_setpc_b64 s[30:31] 7234; 7235; GFX940-LABEL: s_shuffle_v3i64_v3i64__0_3_3: 7236; GFX940: ; %bb.0: 7237; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7238; GFX940-NEXT: ;;#ASMSTART 7239; GFX940-NEXT: ; def s[8:13] 7240; GFX940-NEXT: ;;#ASMEND 7241; GFX940-NEXT: s_nop 0 7242; GFX940-NEXT: ;;#ASMSTART 7243; GFX940-NEXT: ; use s[8:13] 7244; GFX940-NEXT: ;;#ASMEND 7245; GFX940-NEXT: s_setpc_b64 s[30:31] 7246 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7247 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 3, i32 3> 7248 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7249 ret void 7250} 7251 7252define void @s_shuffle_v3i64_v3i64__1_3_3() { 7253; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_3_3: 7254; GFX900: ; %bb.0: 7255; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7256; GFX900-NEXT: ;;#ASMSTART 7257; GFX900-NEXT: ; def s[4:9] 7258; GFX900-NEXT: ;;#ASMEND 7259; GFX900-NEXT: s_mov_b32 s8, s6 7260; GFX900-NEXT: s_mov_b32 s9, s7 7261; GFX900-NEXT: ;;#ASMSTART 7262; GFX900-NEXT: ; use s[8:13] 7263; GFX900-NEXT: ;;#ASMEND 7264; GFX900-NEXT: s_setpc_b64 s[30:31] 7265; 7266; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_3_3: 7267; GFX90A: ; %bb.0: 7268; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7269; GFX90A-NEXT: ;;#ASMSTART 7270; GFX90A-NEXT: ; def s[4:9] 7271; GFX90A-NEXT: ;;#ASMEND 7272; GFX90A-NEXT: s_mov_b32 s8, s6 7273; GFX90A-NEXT: s_mov_b32 s9, s7 7274; GFX90A-NEXT: ;;#ASMSTART 7275; GFX90A-NEXT: ; use s[8:13] 7276; GFX90A-NEXT: ;;#ASMEND 7277; GFX90A-NEXT: s_setpc_b64 s[30:31] 7278; 7279; GFX940-LABEL: s_shuffle_v3i64_v3i64__1_3_3: 7280; GFX940: ; %bb.0: 7281; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7282; GFX940-NEXT: ;;#ASMSTART 7283; GFX940-NEXT: ; def s[0:5] 7284; GFX940-NEXT: ;;#ASMEND 7285; GFX940-NEXT: s_mov_b32 s8, s2 7286; GFX940-NEXT: s_mov_b32 s9, s3 7287; GFX940-NEXT: ;;#ASMSTART 7288; GFX940-NEXT: ; use s[8:13] 7289; GFX940-NEXT: ;;#ASMEND 7290; GFX940-NEXT: s_setpc_b64 s[30:31] 7291 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7292 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 3, i32 3> 7293 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7294 ret void 7295} 7296 7297define void @s_shuffle_v3i64_v3i64__2_3_3() { 7298; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_3_3: 7299; GFX900: ; %bb.0: 7300; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7301; GFX900-NEXT: ;;#ASMSTART 7302; GFX900-NEXT: ; def s[4:9] 7303; GFX900-NEXT: ;;#ASMEND 7304; GFX900-NEXT: ;;#ASMSTART 7305; GFX900-NEXT: ; use s[8:13] 7306; GFX900-NEXT: ;;#ASMEND 7307; GFX900-NEXT: s_setpc_b64 s[30:31] 7308; 7309; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_3_3: 7310; GFX90A: ; %bb.0: 7311; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7312; GFX90A-NEXT: ;;#ASMSTART 7313; GFX90A-NEXT: ; def s[4:9] 7314; GFX90A-NEXT: ;;#ASMEND 7315; GFX90A-NEXT: ;;#ASMSTART 7316; GFX90A-NEXT: ; use s[8:13] 7317; GFX90A-NEXT: ;;#ASMEND 7318; GFX90A-NEXT: s_setpc_b64 s[30:31] 7319; 7320; GFX940-LABEL: s_shuffle_v3i64_v3i64__2_3_3: 7321; GFX940: ; %bb.0: 7322; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7323; GFX940-NEXT: ;;#ASMSTART 7324; GFX940-NEXT: ; def s[0:5] 7325; GFX940-NEXT: ;;#ASMEND 7326; GFX940-NEXT: s_mov_b32 s8, s4 7327; GFX940-NEXT: s_mov_b32 s9, s5 7328; GFX940-NEXT: ;;#ASMSTART 7329; GFX940-NEXT: ; use s[8:13] 7330; GFX940-NEXT: ;;#ASMEND 7331; GFX940-NEXT: s_setpc_b64 s[30:31] 7332 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7333 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 3, i32 3> 7334 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7335 ret void 7336} 7337 7338define void @s_shuffle_v3i64_v3i64__3_3_3() { 7339; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_3_3: 7340; GFX9: ; %bb.0: 7341; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7342; GFX9-NEXT: ;;#ASMSTART 7343; GFX9-NEXT: ; use s[8:13] 7344; GFX9-NEXT: ;;#ASMEND 7345; GFX9-NEXT: s_setpc_b64 s[30:31] 7346 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7347 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 3, i32 3> 7348 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7349 ret void 7350} 7351 7352define void @s_shuffle_v3i64_v3i64__4_3_3() { 7353; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_3_3: 7354; GFX900: ; %bb.0: 7355; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7356; GFX900-NEXT: ;;#ASMSTART 7357; GFX900-NEXT: ; def s[4:9] 7358; GFX900-NEXT: ;;#ASMEND 7359; GFX900-NEXT: s_mov_b32 s8, s6 7360; GFX900-NEXT: s_mov_b32 s9, s7 7361; GFX900-NEXT: s_mov_b32 s10, s4 7362; GFX900-NEXT: s_mov_b32 s11, s5 7363; GFX900-NEXT: s_mov_b32 s12, s4 7364; GFX900-NEXT: s_mov_b32 s13, s5 7365; GFX900-NEXT: ;;#ASMSTART 7366; GFX900-NEXT: ; use s[8:13] 7367; GFX900-NEXT: ;;#ASMEND 7368; GFX900-NEXT: s_setpc_b64 s[30:31] 7369; 7370; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_3_3: 7371; GFX90A: ; %bb.0: 7372; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7373; GFX90A-NEXT: ;;#ASMSTART 7374; GFX90A-NEXT: ; def s[4:9] 7375; GFX90A-NEXT: ;;#ASMEND 7376; GFX90A-NEXT: s_mov_b32 s8, s6 7377; GFX90A-NEXT: s_mov_b32 s9, s7 7378; GFX90A-NEXT: s_mov_b32 s10, s4 7379; GFX90A-NEXT: s_mov_b32 s11, s5 7380; GFX90A-NEXT: s_mov_b32 s12, s4 7381; GFX90A-NEXT: s_mov_b32 s13, s5 7382; GFX90A-NEXT: ;;#ASMSTART 7383; GFX90A-NEXT: ; use s[8:13] 7384; GFX90A-NEXT: ;;#ASMEND 7385; GFX90A-NEXT: s_setpc_b64 s[30:31] 7386; 7387; GFX940-LABEL: s_shuffle_v3i64_v3i64__4_3_3: 7388; GFX940: ; %bb.0: 7389; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7390; GFX940-NEXT: ;;#ASMSTART 7391; GFX940-NEXT: ; def s[0:5] 7392; GFX940-NEXT: ;;#ASMEND 7393; GFX940-NEXT: s_mov_b32 s8, s2 7394; GFX940-NEXT: s_mov_b32 s9, s3 7395; GFX940-NEXT: s_mov_b32 s10, s0 7396; GFX940-NEXT: s_mov_b32 s11, s1 7397; GFX940-NEXT: s_mov_b32 s12, s0 7398; GFX940-NEXT: s_mov_b32 s13, s1 7399; GFX940-NEXT: ;;#ASMSTART 7400; GFX940-NEXT: ; use s[8:13] 7401; GFX940-NEXT: ;;#ASMEND 7402; GFX940-NEXT: s_setpc_b64 s[30:31] 7403 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7404 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7405 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 3, i32 3> 7406 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7407 ret void 7408} 7409 7410define void @s_shuffle_v3i64_v3i64__5_3_3() { 7411; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_3: 7412; GFX900: ; %bb.0: 7413; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7414; GFX900-NEXT: ;;#ASMSTART 7415; GFX900-NEXT: ; def s[4:9] 7416; GFX900-NEXT: ;;#ASMEND 7417; GFX900-NEXT: s_mov_b32 s10, s4 7418; GFX900-NEXT: s_mov_b32 s11, s5 7419; GFX900-NEXT: s_mov_b32 s12, s4 7420; GFX900-NEXT: s_mov_b32 s13, s5 7421; GFX900-NEXT: ;;#ASMSTART 7422; GFX900-NEXT: ; use s[8:13] 7423; GFX900-NEXT: ;;#ASMEND 7424; GFX900-NEXT: s_setpc_b64 s[30:31] 7425; 7426; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_3: 7427; GFX90A: ; %bb.0: 7428; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7429; GFX90A-NEXT: ;;#ASMSTART 7430; GFX90A-NEXT: ; def s[4:9] 7431; GFX90A-NEXT: ;;#ASMEND 7432; GFX90A-NEXT: s_mov_b32 s10, s4 7433; GFX90A-NEXT: s_mov_b32 s11, s5 7434; GFX90A-NEXT: s_mov_b32 s12, s4 7435; GFX90A-NEXT: s_mov_b32 s13, s5 7436; GFX90A-NEXT: ;;#ASMSTART 7437; GFX90A-NEXT: ; use s[8:13] 7438; GFX90A-NEXT: ;;#ASMEND 7439; GFX90A-NEXT: s_setpc_b64 s[30:31] 7440; 7441; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_3: 7442; GFX940: ; %bb.0: 7443; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7444; GFX940-NEXT: ;;#ASMSTART 7445; GFX940-NEXT: ; def s[0:5] 7446; GFX940-NEXT: ;;#ASMEND 7447; GFX940-NEXT: s_mov_b32 s8, s4 7448; GFX940-NEXT: s_mov_b32 s9, s5 7449; GFX940-NEXT: s_mov_b32 s10, s0 7450; GFX940-NEXT: s_mov_b32 s11, s1 7451; GFX940-NEXT: s_mov_b32 s12, s0 7452; GFX940-NEXT: s_mov_b32 s13, s1 7453; GFX940-NEXT: ;;#ASMSTART 7454; GFX940-NEXT: ; use s[8:13] 7455; GFX940-NEXT: ;;#ASMEND 7456; GFX940-NEXT: s_setpc_b64 s[30:31] 7457 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7458 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7459 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 3> 7460 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7461 ret void 7462} 7463 7464define void @s_shuffle_v3i64_v3i64__5_u_3() { 7465; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_3: 7466; GFX900: ; %bb.0: 7467; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7468; GFX900-NEXT: ;;#ASMSTART 7469; GFX900-NEXT: ; def s[4:9] 7470; GFX900-NEXT: ;;#ASMEND 7471; GFX900-NEXT: s_mov_b32 s12, s4 7472; GFX900-NEXT: s_mov_b32 s13, s5 7473; GFX900-NEXT: ;;#ASMSTART 7474; GFX900-NEXT: ; use s[8:13] 7475; GFX900-NEXT: ;;#ASMEND 7476; GFX900-NEXT: s_setpc_b64 s[30:31] 7477; 7478; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_3: 7479; GFX90A: ; %bb.0: 7480; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7481; GFX90A-NEXT: ;;#ASMSTART 7482; GFX90A-NEXT: ; def s[4:9] 7483; GFX90A-NEXT: ;;#ASMEND 7484; GFX90A-NEXT: s_mov_b32 s12, s4 7485; GFX90A-NEXT: s_mov_b32 s13, s5 7486; GFX90A-NEXT: ;;#ASMSTART 7487; GFX90A-NEXT: ; use s[8:13] 7488; GFX90A-NEXT: ;;#ASMEND 7489; GFX90A-NEXT: s_setpc_b64 s[30:31] 7490; 7491; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_3: 7492; GFX940: ; %bb.0: 7493; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7494; GFX940-NEXT: ;;#ASMSTART 7495; GFX940-NEXT: ; def s[0:5] 7496; GFX940-NEXT: ;;#ASMEND 7497; GFX940-NEXT: s_mov_b32 s8, s4 7498; GFX940-NEXT: s_mov_b32 s9, s5 7499; GFX940-NEXT: s_mov_b32 s12, s0 7500; GFX940-NEXT: s_mov_b32 s13, s1 7501; GFX940-NEXT: ;;#ASMSTART 7502; GFX940-NEXT: ; use s[8:13] 7503; GFX940-NEXT: ;;#ASMEND 7504; GFX940-NEXT: s_setpc_b64 s[30:31] 7505 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7506 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7507 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 3> 7508 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7509 ret void 7510} 7511 7512define void @s_shuffle_v3i64_v3i64__5_0_3() { 7513; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_0_3: 7514; GFX900: ; %bb.0: 7515; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7516; GFX900-NEXT: ;;#ASMSTART 7517; GFX900-NEXT: ; def s[4:9] 7518; GFX900-NEXT: ;;#ASMEND 7519; GFX900-NEXT: ;;#ASMSTART 7520; GFX900-NEXT: ; def s[12:17] 7521; GFX900-NEXT: ;;#ASMEND 7522; GFX900-NEXT: s_mov_b32 s8, s16 7523; GFX900-NEXT: s_mov_b32 s9, s17 7524; GFX900-NEXT: s_mov_b32 s10, s4 7525; GFX900-NEXT: s_mov_b32 s11, s5 7526; GFX900-NEXT: ;;#ASMSTART 7527; GFX900-NEXT: ; use s[8:13] 7528; GFX900-NEXT: ;;#ASMEND 7529; GFX900-NEXT: s_setpc_b64 s[30:31] 7530; 7531; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_0_3: 7532; GFX90A: ; %bb.0: 7533; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7534; GFX90A-NEXT: ;;#ASMSTART 7535; GFX90A-NEXT: ; def s[4:9] 7536; GFX90A-NEXT: ;;#ASMEND 7537; GFX90A-NEXT: ;;#ASMSTART 7538; GFX90A-NEXT: ; def s[12:17] 7539; GFX90A-NEXT: ;;#ASMEND 7540; GFX90A-NEXT: s_mov_b32 s8, s16 7541; GFX90A-NEXT: s_mov_b32 s9, s17 7542; GFX90A-NEXT: s_mov_b32 s10, s4 7543; GFX90A-NEXT: s_mov_b32 s11, s5 7544; GFX90A-NEXT: ;;#ASMSTART 7545; GFX90A-NEXT: ; use s[8:13] 7546; GFX90A-NEXT: ;;#ASMEND 7547; GFX90A-NEXT: s_setpc_b64 s[30:31] 7548; 7549; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_3: 7550; GFX940: ; %bb.0: 7551; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7552; GFX940-NEXT: ;;#ASMSTART 7553; GFX940-NEXT: ; def s[0:5] 7554; GFX940-NEXT: ;;#ASMEND 7555; GFX940-NEXT: s_mov_b32 s10, s0 7556; GFX940-NEXT: ;;#ASMSTART 7557; GFX940-NEXT: ; def s[4:9] 7558; GFX940-NEXT: ;;#ASMEND 7559; GFX940-NEXT: s_mov_b32 s11, s1 7560; GFX940-NEXT: s_mov_b32 s12, s4 7561; GFX940-NEXT: s_mov_b32 s13, s5 7562; GFX940-NEXT: ;;#ASMSTART 7563; GFX940-NEXT: ; use s[8:13] 7564; GFX940-NEXT: ;;#ASMEND 7565; GFX940-NEXT: s_setpc_b64 s[30:31] 7566 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7567 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7568 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 3> 7569 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7570 ret void 7571} 7572 7573define void @s_shuffle_v3i64_v3i64__5_1_3() { 7574; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_3: 7575; GFX900: ; %bb.0: 7576; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7577; GFX900-NEXT: ;;#ASMSTART 7578; GFX900-NEXT: ; def s[8:13] 7579; GFX900-NEXT: ;;#ASMEND 7580; GFX900-NEXT: ;;#ASMSTART 7581; GFX900-NEXT: ; def s[4:9] 7582; GFX900-NEXT: ;;#ASMEND 7583; GFX900-NEXT: s_mov_b32 s12, s4 7584; GFX900-NEXT: s_mov_b32 s13, s5 7585; GFX900-NEXT: ;;#ASMSTART 7586; GFX900-NEXT: ; use s[8:13] 7587; GFX900-NEXT: ;;#ASMEND 7588; GFX900-NEXT: s_setpc_b64 s[30:31] 7589; 7590; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_3: 7591; GFX90A: ; %bb.0: 7592; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7593; GFX90A-NEXT: ;;#ASMSTART 7594; GFX90A-NEXT: ; def s[8:13] 7595; GFX90A-NEXT: ;;#ASMEND 7596; GFX90A-NEXT: ;;#ASMSTART 7597; GFX90A-NEXT: ; def s[4:9] 7598; GFX90A-NEXT: ;;#ASMEND 7599; GFX90A-NEXT: s_mov_b32 s12, s4 7600; GFX90A-NEXT: s_mov_b32 s13, s5 7601; GFX90A-NEXT: ;;#ASMSTART 7602; GFX90A-NEXT: ; use s[8:13] 7603; GFX90A-NEXT: ;;#ASMEND 7604; GFX90A-NEXT: s_setpc_b64 s[30:31] 7605; 7606; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_3: 7607; GFX940: ; %bb.0: 7608; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7609; GFX940-NEXT: ;;#ASMSTART 7610; GFX940-NEXT: ; def s[8:13] 7611; GFX940-NEXT: ;;#ASMEND 7612; GFX940-NEXT: ;;#ASMSTART 7613; GFX940-NEXT: ; def s[0:5] 7614; GFX940-NEXT: ;;#ASMEND 7615; GFX940-NEXT: s_mov_b32 s8, s4 7616; GFX940-NEXT: s_mov_b32 s9, s5 7617; GFX940-NEXT: s_mov_b32 s12, s0 7618; GFX940-NEXT: s_mov_b32 s13, s1 7619; GFX940-NEXT: ;;#ASMSTART 7620; GFX940-NEXT: ; use s[8:13] 7621; GFX940-NEXT: ;;#ASMEND 7622; GFX940-NEXT: s_setpc_b64 s[30:31] 7623 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7624 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7625 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 3> 7626 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7627 ret void 7628} 7629 7630define void @s_shuffle_v3i64_v3i64__5_2_3() { 7631; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_2_3: 7632; GFX900: ; %bb.0: 7633; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7634; GFX900-NEXT: ;;#ASMSTART 7635; GFX900-NEXT: ; def s[8:13] 7636; GFX900-NEXT: ;;#ASMEND 7637; GFX900-NEXT: ;;#ASMSTART 7638; GFX900-NEXT: ; def s[4:9] 7639; GFX900-NEXT: ;;#ASMEND 7640; GFX900-NEXT: s_mov_b32 s10, s12 7641; GFX900-NEXT: s_mov_b32 s11, s13 7642; GFX900-NEXT: s_mov_b32 s12, s4 7643; GFX900-NEXT: s_mov_b32 s13, s5 7644; GFX900-NEXT: ;;#ASMSTART 7645; GFX900-NEXT: ; use s[8:13] 7646; GFX900-NEXT: ;;#ASMEND 7647; GFX900-NEXT: s_setpc_b64 s[30:31] 7648; 7649; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_2_3: 7650; GFX90A: ; %bb.0: 7651; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7652; GFX90A-NEXT: ;;#ASMSTART 7653; GFX90A-NEXT: ; def s[8:13] 7654; GFX90A-NEXT: ;;#ASMEND 7655; GFX90A-NEXT: ;;#ASMSTART 7656; GFX90A-NEXT: ; def s[4:9] 7657; GFX90A-NEXT: ;;#ASMEND 7658; GFX90A-NEXT: s_mov_b32 s10, s12 7659; GFX90A-NEXT: s_mov_b32 s11, s13 7660; GFX90A-NEXT: s_mov_b32 s12, s4 7661; GFX90A-NEXT: s_mov_b32 s13, s5 7662; GFX90A-NEXT: ;;#ASMSTART 7663; GFX90A-NEXT: ; use s[8:13] 7664; GFX90A-NEXT: ;;#ASMEND 7665; GFX90A-NEXT: s_setpc_b64 s[30:31] 7666; 7667; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_3: 7668; GFX940: ; %bb.0: 7669; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7670; GFX940-NEXT: ;;#ASMSTART 7671; GFX940-NEXT: ; def s[0:5] 7672; GFX940-NEXT: ;;#ASMEND 7673; GFX940-NEXT: ;;#ASMSTART 7674; GFX940-NEXT: ; def s[12:17] 7675; GFX940-NEXT: ;;#ASMEND 7676; GFX940-NEXT: s_mov_b32 s8, s16 7677; GFX940-NEXT: s_mov_b32 s9, s17 7678; GFX940-NEXT: s_mov_b32 s10, s4 7679; GFX940-NEXT: s_mov_b32 s11, s5 7680; GFX940-NEXT: ;;#ASMSTART 7681; GFX940-NEXT: ; use s[8:13] 7682; GFX940-NEXT: ;;#ASMEND 7683; GFX940-NEXT: s_setpc_b64 s[30:31] 7684 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7685 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7686 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 3> 7687 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7688 ret void 7689} 7690 7691define void @s_shuffle_v3i64_v3i64__5_4_3() { 7692; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_3: 7693; GFX900: ; %bb.0: 7694; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7695; GFX900-NEXT: ;;#ASMSTART 7696; GFX900-NEXT: ; def s[4:9] 7697; GFX900-NEXT: ;;#ASMEND 7698; GFX900-NEXT: s_mov_b32 s10, s6 7699; GFX900-NEXT: s_mov_b32 s11, s7 7700; GFX900-NEXT: s_mov_b32 s12, s4 7701; GFX900-NEXT: s_mov_b32 s13, s5 7702; GFX900-NEXT: ;;#ASMSTART 7703; GFX900-NEXT: ; use s[8:13] 7704; GFX900-NEXT: ;;#ASMEND 7705; GFX900-NEXT: s_setpc_b64 s[30:31] 7706; 7707; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_3: 7708; GFX90A: ; %bb.0: 7709; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7710; GFX90A-NEXT: ;;#ASMSTART 7711; GFX90A-NEXT: ; def s[4:9] 7712; GFX90A-NEXT: ;;#ASMEND 7713; GFX90A-NEXT: s_mov_b32 s10, s6 7714; GFX90A-NEXT: s_mov_b32 s11, s7 7715; GFX90A-NEXT: s_mov_b32 s12, s4 7716; GFX90A-NEXT: s_mov_b32 s13, s5 7717; GFX90A-NEXT: ;;#ASMSTART 7718; GFX90A-NEXT: ; use s[8:13] 7719; GFX90A-NEXT: ;;#ASMEND 7720; GFX90A-NEXT: s_setpc_b64 s[30:31] 7721; 7722; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_4_3: 7723; GFX940: ; %bb.0: 7724; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7725; GFX940-NEXT: ;;#ASMSTART 7726; GFX940-NEXT: ; def s[0:5] 7727; GFX940-NEXT: ;;#ASMEND 7728; GFX940-NEXT: s_mov_b32 s8, s4 7729; GFX940-NEXT: s_mov_b32 s9, s5 7730; GFX940-NEXT: s_mov_b32 s10, s2 7731; GFX940-NEXT: s_mov_b32 s11, s3 7732; GFX940-NEXT: s_mov_b32 s12, s0 7733; GFX940-NEXT: s_mov_b32 s13, s1 7734; GFX940-NEXT: ;;#ASMSTART 7735; GFX940-NEXT: ; use s[8:13] 7736; GFX940-NEXT: ;;#ASMEND 7737; GFX940-NEXT: s_setpc_b64 s[30:31] 7738 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7739 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7740 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 3> 7741 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7742 ret void 7743} 7744 7745define void @s_shuffle_v3i64_v3i64__u_4_4() { 7746; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_4_4: 7747; GFX9: ; %bb.0: 7748; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7749; GFX9-NEXT: ;;#ASMSTART 7750; GFX9-NEXT: ; def s[8:13] 7751; GFX9-NEXT: ;;#ASMEND 7752; GFX9-NEXT: s_mov_b32 s12, s10 7753; GFX9-NEXT: s_mov_b32 s13, s11 7754; GFX9-NEXT: ;;#ASMSTART 7755; GFX9-NEXT: ; use s[8:13] 7756; GFX9-NEXT: ;;#ASMEND 7757; GFX9-NEXT: s_setpc_b64 s[30:31] 7758 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7759 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7760 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 poison, i32 4, i32 4> 7761 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7762 ret void 7763} 7764 7765define void @s_shuffle_v3i64_v3i64__0_4_4() { 7766; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_4_4: 7767; GFX900: ; %bb.0: 7768; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7769; GFX900-NEXT: ;;#ASMSTART 7770; GFX900-NEXT: ; def s[8:13] 7771; GFX900-NEXT: ;;#ASMEND 7772; GFX900-NEXT: ;;#ASMSTART 7773; GFX900-NEXT: ; def s[12:17] 7774; GFX900-NEXT: ;;#ASMEND 7775; GFX900-NEXT: s_mov_b32 s10, s14 7776; GFX900-NEXT: s_mov_b32 s11, s15 7777; GFX900-NEXT: s_mov_b32 s12, s14 7778; GFX900-NEXT: s_mov_b32 s13, s15 7779; GFX900-NEXT: ;;#ASMSTART 7780; GFX900-NEXT: ; use s[8:13] 7781; GFX900-NEXT: ;;#ASMEND 7782; GFX900-NEXT: s_setpc_b64 s[30:31] 7783; 7784; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_4_4: 7785; GFX90A: ; %bb.0: 7786; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7787; GFX90A-NEXT: ;;#ASMSTART 7788; GFX90A-NEXT: ; def s[8:13] 7789; GFX90A-NEXT: ;;#ASMEND 7790; GFX90A-NEXT: ;;#ASMSTART 7791; GFX90A-NEXT: ; def s[12:17] 7792; GFX90A-NEXT: ;;#ASMEND 7793; GFX90A-NEXT: s_mov_b32 s10, s14 7794; GFX90A-NEXT: s_mov_b32 s11, s15 7795; GFX90A-NEXT: s_mov_b32 s12, s14 7796; GFX90A-NEXT: s_mov_b32 s13, s15 7797; GFX90A-NEXT: ;;#ASMSTART 7798; GFX90A-NEXT: ; use s[8:13] 7799; GFX90A-NEXT: ;;#ASMEND 7800; GFX90A-NEXT: s_setpc_b64 s[30:31] 7801; 7802; GFX940-LABEL: s_shuffle_v3i64_v3i64__0_4_4: 7803; GFX940: ; %bb.0: 7804; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7805; GFX940-NEXT: ;;#ASMSTART 7806; GFX940-NEXT: ; def s[8:13] 7807; GFX940-NEXT: ;;#ASMEND 7808; GFX940-NEXT: ;;#ASMSTART 7809; GFX940-NEXT: ; def s[0:5] 7810; GFX940-NEXT: ;;#ASMEND 7811; GFX940-NEXT: s_mov_b32 s10, s2 7812; GFX940-NEXT: s_mov_b32 s11, s3 7813; GFX940-NEXT: s_mov_b32 s12, s2 7814; GFX940-NEXT: s_mov_b32 s13, s3 7815; GFX940-NEXT: ;;#ASMSTART 7816; GFX940-NEXT: ; use s[8:13] 7817; GFX940-NEXT: ;;#ASMEND 7818; GFX940-NEXT: s_setpc_b64 s[30:31] 7819 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7820 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7821 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 0, i32 4, i32 4> 7822 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7823 ret void 7824} 7825 7826define void @s_shuffle_v3i64_v3i64__1_4_4() { 7827; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_4_4: 7828; GFX900: ; %bb.0: 7829; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7830; GFX900-NEXT: ;;#ASMSTART 7831; GFX900-NEXT: ; def s[4:9] 7832; GFX900-NEXT: ;;#ASMEND 7833; GFX900-NEXT: ;;#ASMSTART 7834; GFX900-NEXT: ; def s[8:13] 7835; GFX900-NEXT: ;;#ASMEND 7836; GFX900-NEXT: s_mov_b32 s8, s6 7837; GFX900-NEXT: s_mov_b32 s9, s7 7838; GFX900-NEXT: s_mov_b32 s12, s10 7839; GFX900-NEXT: s_mov_b32 s13, s11 7840; GFX900-NEXT: ;;#ASMSTART 7841; GFX900-NEXT: ; use s[8:13] 7842; GFX900-NEXT: ;;#ASMEND 7843; GFX900-NEXT: s_setpc_b64 s[30:31] 7844; 7845; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_4_4: 7846; GFX90A: ; %bb.0: 7847; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7848; GFX90A-NEXT: ;;#ASMSTART 7849; GFX90A-NEXT: ; def s[4:9] 7850; GFX90A-NEXT: ;;#ASMEND 7851; GFX90A-NEXT: ;;#ASMSTART 7852; GFX90A-NEXT: ; def s[8:13] 7853; GFX90A-NEXT: ;;#ASMEND 7854; GFX90A-NEXT: s_mov_b32 s8, s6 7855; GFX90A-NEXT: s_mov_b32 s9, s7 7856; GFX90A-NEXT: s_mov_b32 s12, s10 7857; GFX90A-NEXT: s_mov_b32 s13, s11 7858; GFX90A-NEXT: ;;#ASMSTART 7859; GFX90A-NEXT: ; use s[8:13] 7860; GFX90A-NEXT: ;;#ASMEND 7861; GFX90A-NEXT: s_setpc_b64 s[30:31] 7862; 7863; GFX940-LABEL: s_shuffle_v3i64_v3i64__1_4_4: 7864; GFX940: ; %bb.0: 7865; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7866; GFX940-NEXT: ;;#ASMSTART 7867; GFX940-NEXT: ; def s[8:13] 7868; GFX940-NEXT: ;;#ASMEND 7869; GFX940-NEXT: ;;#ASMSTART 7870; GFX940-NEXT: ; def s[0:5] 7871; GFX940-NEXT: ;;#ASMEND 7872; GFX940-NEXT: s_mov_b32 s8, s2 7873; GFX940-NEXT: s_mov_b32 s9, s3 7874; GFX940-NEXT: s_mov_b32 s12, s10 7875; GFX940-NEXT: s_mov_b32 s13, s11 7876; GFX940-NEXT: ;;#ASMSTART 7877; GFX940-NEXT: ; use s[8:13] 7878; GFX940-NEXT: ;;#ASMEND 7879; GFX940-NEXT: s_setpc_b64 s[30:31] 7880 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7881 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7882 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 1, i32 4, i32 4> 7883 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7884 ret void 7885} 7886 7887define void @s_shuffle_v3i64_v3i64__2_4_4() { 7888; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_4_4: 7889; GFX900: ; %bb.0: 7890; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7891; GFX900-NEXT: ;;#ASMSTART 7892; GFX900-NEXT: ; def s[12:17] 7893; GFX900-NEXT: ;;#ASMEND 7894; GFX900-NEXT: ;;#ASMSTART 7895; GFX900-NEXT: ; def s[8:13] 7896; GFX900-NEXT: ;;#ASMEND 7897; GFX900-NEXT: s_mov_b32 s8, s16 7898; GFX900-NEXT: s_mov_b32 s9, s17 7899; GFX900-NEXT: s_mov_b32 s12, s10 7900; GFX900-NEXT: s_mov_b32 s13, s11 7901; GFX900-NEXT: ;;#ASMSTART 7902; GFX900-NEXT: ; use s[8:13] 7903; GFX900-NEXT: ;;#ASMEND 7904; GFX900-NEXT: s_setpc_b64 s[30:31] 7905; 7906; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_4_4: 7907; GFX90A: ; %bb.0: 7908; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7909; GFX90A-NEXT: ;;#ASMSTART 7910; GFX90A-NEXT: ; def s[12:17] 7911; GFX90A-NEXT: ;;#ASMEND 7912; GFX90A-NEXT: ;;#ASMSTART 7913; GFX90A-NEXT: ; def s[8:13] 7914; GFX90A-NEXT: ;;#ASMEND 7915; GFX90A-NEXT: s_mov_b32 s8, s16 7916; GFX90A-NEXT: s_mov_b32 s9, s17 7917; GFX90A-NEXT: s_mov_b32 s12, s10 7918; GFX90A-NEXT: s_mov_b32 s13, s11 7919; GFX90A-NEXT: ;;#ASMSTART 7920; GFX90A-NEXT: ; use s[8:13] 7921; GFX90A-NEXT: ;;#ASMEND 7922; GFX90A-NEXT: s_setpc_b64 s[30:31] 7923; 7924; GFX940-LABEL: s_shuffle_v3i64_v3i64__2_4_4: 7925; GFX940: ; %bb.0: 7926; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7927; GFX940-NEXT: ;;#ASMSTART 7928; GFX940-NEXT: ; def s[8:13] 7929; GFX940-NEXT: ;;#ASMEND 7930; GFX940-NEXT: ;;#ASMSTART 7931; GFX940-NEXT: ; def s[0:5] 7932; GFX940-NEXT: ;;#ASMEND 7933; GFX940-NEXT: s_mov_b32 s8, s4 7934; GFX940-NEXT: s_mov_b32 s9, s5 7935; GFX940-NEXT: s_mov_b32 s12, s10 7936; GFX940-NEXT: s_mov_b32 s13, s11 7937; GFX940-NEXT: ;;#ASMSTART 7938; GFX940-NEXT: ; use s[8:13] 7939; GFX940-NEXT: ;;#ASMEND 7940; GFX940-NEXT: s_setpc_b64 s[30:31] 7941 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7942 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7943 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 2, i32 4, i32 4> 7944 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7945 ret void 7946} 7947 7948define void @s_shuffle_v3i64_v3i64__3_4_4() { 7949; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_4_4: 7950; GFX9: ; %bb.0: 7951; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7952; GFX9-NEXT: ;;#ASMSTART 7953; GFX9-NEXT: ; def s[8:13] 7954; GFX9-NEXT: ;;#ASMEND 7955; GFX9-NEXT: s_mov_b32 s12, s10 7956; GFX9-NEXT: s_mov_b32 s13, s11 7957; GFX9-NEXT: ;;#ASMSTART 7958; GFX9-NEXT: ; use s[8:13] 7959; GFX9-NEXT: ;;#ASMEND 7960; GFX9-NEXT: s_setpc_b64 s[30:31] 7961 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7962 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7963 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 3, i32 4, i32 4> 7964 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7965 ret void 7966} 7967 7968define void @s_shuffle_v3i64_v3i64__4_4_4() { 7969; GFX9-LABEL: s_shuffle_v3i64_v3i64__4_4_4: 7970; GFX9: ; %bb.0: 7971; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7972; GFX9-NEXT: ;;#ASMSTART 7973; GFX9-NEXT: ; def s[8:13] 7974; GFX9-NEXT: ;;#ASMEND 7975; GFX9-NEXT: s_mov_b32 s8, s10 7976; GFX9-NEXT: s_mov_b32 s9, s11 7977; GFX9-NEXT: s_mov_b32 s12, s10 7978; GFX9-NEXT: s_mov_b32 s13, s11 7979; GFX9-NEXT: ;;#ASMSTART 7980; GFX9-NEXT: ; use s[8:13] 7981; GFX9-NEXT: ;;#ASMEND 7982; GFX9-NEXT: s_setpc_b64 s[30:31] 7983 %vec0 = call <3 x i64> asm "; def $0", "=s"() 7984 %vec1 = call <3 x i64> asm "; def $0", "=s"() 7985 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 4, i32 4> 7986 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 7987 ret void 7988} 7989 7990define void @s_shuffle_v3i64_v3i64__5_4_4() { 7991; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_4: 7992; GFX9: ; %bb.0: 7993; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7994; GFX9-NEXT: ;;#ASMSTART 7995; GFX9-NEXT: ; def s[8:13] 7996; GFX9-NEXT: ;;#ASMEND 7997; GFX9-NEXT: s_mov_b32 s8, s12 7998; GFX9-NEXT: s_mov_b32 s9, s13 7999; GFX9-NEXT: s_mov_b32 s12, s10 8000; GFX9-NEXT: s_mov_b32 s13, s11 8001; GFX9-NEXT: ;;#ASMSTART 8002; GFX9-NEXT: ; use s[8:13] 8003; GFX9-NEXT: ;;#ASMEND 8004; GFX9-NEXT: s_setpc_b64 s[30:31] 8005 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8006 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8007 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4> 8008 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8009 ret void 8010} 8011 8012define void @s_shuffle_v3i64_v3i64__5_u_4() { 8013; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_4: 8014; GFX900: ; %bb.0: 8015; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8016; GFX900-NEXT: ;;#ASMSTART 8017; GFX900-NEXT: ; def s[4:9] 8018; GFX900-NEXT: ;;#ASMEND 8019; GFX900-NEXT: s_mov_b32 s12, s6 8020; GFX900-NEXT: s_mov_b32 s13, s7 8021; GFX900-NEXT: ;;#ASMSTART 8022; GFX900-NEXT: ; use s[8:13] 8023; GFX900-NEXT: ;;#ASMEND 8024; GFX900-NEXT: s_setpc_b64 s[30:31] 8025; 8026; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_4: 8027; GFX90A: ; %bb.0: 8028; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8029; GFX90A-NEXT: ;;#ASMSTART 8030; GFX90A-NEXT: ; def s[4:9] 8031; GFX90A-NEXT: ;;#ASMEND 8032; GFX90A-NEXT: s_mov_b32 s12, s6 8033; GFX90A-NEXT: s_mov_b32 s13, s7 8034; GFX90A-NEXT: ;;#ASMSTART 8035; GFX90A-NEXT: ; use s[8:13] 8036; GFX90A-NEXT: ;;#ASMEND 8037; GFX90A-NEXT: s_setpc_b64 s[30:31] 8038; 8039; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_u_4: 8040; GFX940: ; %bb.0: 8041; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8042; GFX940-NEXT: ;;#ASMSTART 8043; GFX940-NEXT: ; def s[0:5] 8044; GFX940-NEXT: ;;#ASMEND 8045; GFX940-NEXT: s_mov_b32 s8, s4 8046; GFX940-NEXT: s_mov_b32 s9, s5 8047; GFX940-NEXT: s_mov_b32 s12, s2 8048; GFX940-NEXT: s_mov_b32 s13, s3 8049; GFX940-NEXT: ;;#ASMSTART 8050; GFX940-NEXT: ; use s[8:13] 8051; GFX940-NEXT: ;;#ASMEND 8052; GFX940-NEXT: s_setpc_b64 s[30:31] 8053 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8054 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8055 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 4> 8056 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8057 ret void 8058} 8059 8060define void @s_shuffle_v3i64_v3i64__5_0_4() { 8061; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_0_4: 8062; GFX900: ; %bb.0: 8063; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8064; GFX900-NEXT: ;;#ASMSTART 8065; GFX900-NEXT: ; def s[4:9] 8066; GFX900-NEXT: ;;#ASMEND 8067; GFX900-NEXT: ;;#ASMSTART 8068; GFX900-NEXT: ; def s[12:17] 8069; GFX900-NEXT: ;;#ASMEND 8070; GFX900-NEXT: s_mov_b32 s8, s16 8071; GFX900-NEXT: s_mov_b32 s9, s17 8072; GFX900-NEXT: s_mov_b32 s10, s4 8073; GFX900-NEXT: s_mov_b32 s11, s5 8074; GFX900-NEXT: s_mov_b32 s12, s14 8075; GFX900-NEXT: s_mov_b32 s13, s15 8076; GFX900-NEXT: ;;#ASMSTART 8077; GFX900-NEXT: ; use s[8:13] 8078; GFX900-NEXT: ;;#ASMEND 8079; GFX900-NEXT: s_setpc_b64 s[30:31] 8080; 8081; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_0_4: 8082; GFX90A: ; %bb.0: 8083; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8084; GFX90A-NEXT: ;;#ASMSTART 8085; GFX90A-NEXT: ; def s[4:9] 8086; GFX90A-NEXT: ;;#ASMEND 8087; GFX90A-NEXT: ;;#ASMSTART 8088; GFX90A-NEXT: ; def s[12:17] 8089; GFX90A-NEXT: ;;#ASMEND 8090; GFX90A-NEXT: s_mov_b32 s8, s16 8091; GFX90A-NEXT: s_mov_b32 s9, s17 8092; GFX90A-NEXT: s_mov_b32 s10, s4 8093; GFX90A-NEXT: s_mov_b32 s11, s5 8094; GFX90A-NEXT: s_mov_b32 s12, s14 8095; GFX90A-NEXT: s_mov_b32 s13, s15 8096; GFX90A-NEXT: ;;#ASMSTART 8097; GFX90A-NEXT: ; use s[8:13] 8098; GFX90A-NEXT: ;;#ASMEND 8099; GFX90A-NEXT: s_setpc_b64 s[30:31] 8100; 8101; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_4: 8102; GFX940: ; %bb.0: 8103; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8104; GFX940-NEXT: ;;#ASMSTART 8105; GFX940-NEXT: ; def s[0:5] 8106; GFX940-NEXT: ;;#ASMEND 8107; GFX940-NEXT: s_mov_b32 s10, s0 8108; GFX940-NEXT: ;;#ASMSTART 8109; GFX940-NEXT: ; def s[4:9] 8110; GFX940-NEXT: ;;#ASMEND 8111; GFX940-NEXT: s_mov_b32 s11, s1 8112; GFX940-NEXT: s_mov_b32 s12, s6 8113; GFX940-NEXT: s_mov_b32 s13, s7 8114; GFX940-NEXT: ;;#ASMSTART 8115; GFX940-NEXT: ; use s[8:13] 8116; GFX940-NEXT: ;;#ASMEND 8117; GFX940-NEXT: s_setpc_b64 s[30:31] 8118 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8119 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8120 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 4> 8121 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8122 ret void 8123} 8124 8125define void @s_shuffle_v3i64_v3i64__5_1_4() { 8126; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_4: 8127; GFX900: ; %bb.0: 8128; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8129; GFX900-NEXT: ;;#ASMSTART 8130; GFX900-NEXT: ; def s[8:13] 8131; GFX900-NEXT: ;;#ASMEND 8132; GFX900-NEXT: ;;#ASMSTART 8133; GFX900-NEXT: ; def s[4:9] 8134; GFX900-NEXT: ;;#ASMEND 8135; GFX900-NEXT: s_mov_b32 s12, s6 8136; GFX900-NEXT: s_mov_b32 s13, s7 8137; GFX900-NEXT: ;;#ASMSTART 8138; GFX900-NEXT: ; use s[8:13] 8139; GFX900-NEXT: ;;#ASMEND 8140; GFX900-NEXT: s_setpc_b64 s[30:31] 8141; 8142; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_4: 8143; GFX90A: ; %bb.0: 8144; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8145; GFX90A-NEXT: ;;#ASMSTART 8146; GFX90A-NEXT: ; def s[8:13] 8147; GFX90A-NEXT: ;;#ASMEND 8148; GFX90A-NEXT: ;;#ASMSTART 8149; GFX90A-NEXT: ; def s[4:9] 8150; GFX90A-NEXT: ;;#ASMEND 8151; GFX90A-NEXT: s_mov_b32 s12, s6 8152; GFX90A-NEXT: s_mov_b32 s13, s7 8153; GFX90A-NEXT: ;;#ASMSTART 8154; GFX90A-NEXT: ; use s[8:13] 8155; GFX90A-NEXT: ;;#ASMEND 8156; GFX90A-NEXT: s_setpc_b64 s[30:31] 8157; 8158; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_4: 8159; GFX940: ; %bb.0: 8160; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8161; GFX940-NEXT: ;;#ASMSTART 8162; GFX940-NEXT: ; def s[8:13] 8163; GFX940-NEXT: ;;#ASMEND 8164; GFX940-NEXT: ;;#ASMSTART 8165; GFX940-NEXT: ; def s[0:5] 8166; GFX940-NEXT: ;;#ASMEND 8167; GFX940-NEXT: s_mov_b32 s8, s4 8168; GFX940-NEXT: s_mov_b32 s9, s5 8169; GFX940-NEXT: s_mov_b32 s12, s2 8170; GFX940-NEXT: s_mov_b32 s13, s3 8171; GFX940-NEXT: ;;#ASMSTART 8172; GFX940-NEXT: ; use s[8:13] 8173; GFX940-NEXT: ;;#ASMEND 8174; GFX940-NEXT: s_setpc_b64 s[30:31] 8175 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8176 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8177 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 4> 8178 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8179 ret void 8180} 8181 8182define void @s_shuffle_v3i64_v3i64__5_2_4() { 8183; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_2_4: 8184; GFX900: ; %bb.0: 8185; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8186; GFX900-NEXT: ;;#ASMSTART 8187; GFX900-NEXT: ; def s[8:13] 8188; GFX900-NEXT: ;;#ASMEND 8189; GFX900-NEXT: ;;#ASMSTART 8190; GFX900-NEXT: ; def s[4:9] 8191; GFX900-NEXT: ;;#ASMEND 8192; GFX900-NEXT: s_mov_b32 s10, s12 8193; GFX900-NEXT: s_mov_b32 s11, s13 8194; GFX900-NEXT: s_mov_b32 s12, s6 8195; GFX900-NEXT: s_mov_b32 s13, s7 8196; GFX900-NEXT: ;;#ASMSTART 8197; GFX900-NEXT: ; use s[8:13] 8198; GFX900-NEXT: ;;#ASMEND 8199; GFX900-NEXT: s_setpc_b64 s[30:31] 8200; 8201; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_2_4: 8202; GFX90A: ; %bb.0: 8203; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8204; GFX90A-NEXT: ;;#ASMSTART 8205; GFX90A-NEXT: ; def s[8:13] 8206; GFX90A-NEXT: ;;#ASMEND 8207; GFX90A-NEXT: ;;#ASMSTART 8208; GFX90A-NEXT: ; def s[4:9] 8209; GFX90A-NEXT: ;;#ASMEND 8210; GFX90A-NEXT: s_mov_b32 s10, s12 8211; GFX90A-NEXT: s_mov_b32 s11, s13 8212; GFX90A-NEXT: s_mov_b32 s12, s6 8213; GFX90A-NEXT: s_mov_b32 s13, s7 8214; GFX90A-NEXT: ;;#ASMSTART 8215; GFX90A-NEXT: ; use s[8:13] 8216; GFX90A-NEXT: ;;#ASMEND 8217; GFX90A-NEXT: s_setpc_b64 s[30:31] 8218; 8219; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_4: 8220; GFX940: ; %bb.0: 8221; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8222; GFX940-NEXT: ;;#ASMSTART 8223; GFX940-NEXT: ; def s[12:17] 8224; GFX940-NEXT: ;;#ASMEND 8225; GFX940-NEXT: ;;#ASMSTART 8226; GFX940-NEXT: ; def s[0:5] 8227; GFX940-NEXT: ;;#ASMEND 8228; GFX940-NEXT: s_mov_b32 s8, s16 8229; GFX940-NEXT: s_mov_b32 s9, s17 8230; GFX940-NEXT: s_mov_b32 s10, s4 8231; GFX940-NEXT: s_mov_b32 s11, s5 8232; GFX940-NEXT: s_mov_b32 s12, s14 8233; GFX940-NEXT: s_mov_b32 s13, s15 8234; GFX940-NEXT: ;;#ASMSTART 8235; GFX940-NEXT: ; use s[8:13] 8236; GFX940-NEXT: ;;#ASMEND 8237; GFX940-NEXT: s_setpc_b64 s[30:31] 8238 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8239 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8240 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 4> 8241 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8242 ret void 8243} 8244 8245define void @s_shuffle_v3i64_v3i64__5_3_4() { 8246; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_4: 8247; GFX900: ; %bb.0: 8248; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8249; GFX900-NEXT: ;;#ASMSTART 8250; GFX900-NEXT: ; def s[4:9] 8251; GFX900-NEXT: ;;#ASMEND 8252; GFX900-NEXT: s_mov_b32 s10, s4 8253; GFX900-NEXT: s_mov_b32 s11, s5 8254; GFX900-NEXT: s_mov_b32 s12, s6 8255; GFX900-NEXT: s_mov_b32 s13, s7 8256; GFX900-NEXT: ;;#ASMSTART 8257; GFX900-NEXT: ; use s[8:13] 8258; GFX900-NEXT: ;;#ASMEND 8259; GFX900-NEXT: s_setpc_b64 s[30:31] 8260; 8261; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_4: 8262; GFX90A: ; %bb.0: 8263; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8264; GFX90A-NEXT: ;;#ASMSTART 8265; GFX90A-NEXT: ; def s[4:9] 8266; GFX90A-NEXT: ;;#ASMEND 8267; GFX90A-NEXT: s_mov_b32 s10, s4 8268; GFX90A-NEXT: s_mov_b32 s11, s5 8269; GFX90A-NEXT: s_mov_b32 s12, s6 8270; GFX90A-NEXT: s_mov_b32 s13, s7 8271; GFX90A-NEXT: ;;#ASMSTART 8272; GFX90A-NEXT: ; use s[8:13] 8273; GFX90A-NEXT: ;;#ASMEND 8274; GFX90A-NEXT: s_setpc_b64 s[30:31] 8275; 8276; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_4: 8277; GFX940: ; %bb.0: 8278; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8279; GFX940-NEXT: ;;#ASMSTART 8280; GFX940-NEXT: ; def s[0:5] 8281; GFX940-NEXT: ;;#ASMEND 8282; GFX940-NEXT: s_mov_b32 s8, s4 8283; GFX940-NEXT: s_mov_b32 s9, s5 8284; GFX940-NEXT: s_mov_b32 s10, s0 8285; GFX940-NEXT: s_mov_b32 s11, s1 8286; GFX940-NEXT: s_mov_b32 s12, s2 8287; GFX940-NEXT: s_mov_b32 s13, s3 8288; GFX940-NEXT: ;;#ASMSTART 8289; GFX940-NEXT: ; use s[8:13] 8290; GFX940-NEXT: ;;#ASMEND 8291; GFX940-NEXT: s_setpc_b64 s[30:31] 8292 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8293 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8294 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 4> 8295 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8296 ret void 8297} 8298 8299define void @s_shuffle_v3i64_v3i64__u_5_5() { 8300; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_5_5: 8301; GFX9: ; %bb.0: 8302; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8303; GFX9-NEXT: ;;#ASMSTART 8304; GFX9-NEXT: ; def s[8:13] 8305; GFX9-NEXT: ;;#ASMEND 8306; GFX9-NEXT: s_mov_b32 s10, s12 8307; GFX9-NEXT: s_mov_b32 s11, s13 8308; GFX9-NEXT: ;;#ASMSTART 8309; GFX9-NEXT: ; use s[8:13] 8310; GFX9-NEXT: ;;#ASMEND 8311; GFX9-NEXT: s_setpc_b64 s[30:31] 8312 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8313 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8314 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 poison, i32 5, i32 5> 8315 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8316 ret void 8317} 8318 8319define void @s_shuffle_v3i64_v3i64__0_5_5() { 8320; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_5_5: 8321; GFX900: ; %bb.0: 8322; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8323; GFX900-NEXT: ;;#ASMSTART 8324; GFX900-NEXT: ; def s[8:13] 8325; GFX900-NEXT: ;;#ASMEND 8326; GFX900-NEXT: ;;#ASMSTART 8327; GFX900-NEXT: ; def s[12:17] 8328; GFX900-NEXT: ;;#ASMEND 8329; GFX900-NEXT: s_mov_b32 s10, s16 8330; GFX900-NEXT: s_mov_b32 s11, s17 8331; GFX900-NEXT: s_mov_b32 s12, s16 8332; GFX900-NEXT: s_mov_b32 s13, s17 8333; GFX900-NEXT: ;;#ASMSTART 8334; GFX900-NEXT: ; use s[8:13] 8335; GFX900-NEXT: ;;#ASMEND 8336; GFX900-NEXT: s_setpc_b64 s[30:31] 8337; 8338; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_5_5: 8339; GFX90A: ; %bb.0: 8340; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8341; GFX90A-NEXT: ;;#ASMSTART 8342; GFX90A-NEXT: ; def s[8:13] 8343; GFX90A-NEXT: ;;#ASMEND 8344; GFX90A-NEXT: ;;#ASMSTART 8345; GFX90A-NEXT: ; def s[12:17] 8346; GFX90A-NEXT: ;;#ASMEND 8347; GFX90A-NEXT: s_mov_b32 s10, s16 8348; GFX90A-NEXT: s_mov_b32 s11, s17 8349; GFX90A-NEXT: s_mov_b32 s12, s16 8350; GFX90A-NEXT: s_mov_b32 s13, s17 8351; GFX90A-NEXT: ;;#ASMSTART 8352; GFX90A-NEXT: ; use s[8:13] 8353; GFX90A-NEXT: ;;#ASMEND 8354; GFX90A-NEXT: s_setpc_b64 s[30:31] 8355; 8356; GFX940-LABEL: s_shuffle_v3i64_v3i64__0_5_5: 8357; GFX940: ; %bb.0: 8358; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8359; GFX940-NEXT: ;;#ASMSTART 8360; GFX940-NEXT: ; def s[8:13] 8361; GFX940-NEXT: ;;#ASMEND 8362; GFX940-NEXT: ;;#ASMSTART 8363; GFX940-NEXT: ; def s[0:5] 8364; GFX940-NEXT: ;;#ASMEND 8365; GFX940-NEXT: s_mov_b32 s10, s4 8366; GFX940-NEXT: s_mov_b32 s11, s5 8367; GFX940-NEXT: s_mov_b32 s12, s4 8368; GFX940-NEXT: s_mov_b32 s13, s5 8369; GFX940-NEXT: ;;#ASMSTART 8370; GFX940-NEXT: ; use s[8:13] 8371; GFX940-NEXT: ;;#ASMEND 8372; GFX940-NEXT: s_setpc_b64 s[30:31] 8373 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8374 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8375 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 0, i32 5, i32 5> 8376 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8377 ret void 8378} 8379 8380define void @s_shuffle_v3i64_v3i64__1_5_5() { 8381; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_5_5: 8382; GFX900: ; %bb.0: 8383; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8384; GFX900-NEXT: ;;#ASMSTART 8385; GFX900-NEXT: ; def s[4:9] 8386; GFX900-NEXT: ;;#ASMEND 8387; GFX900-NEXT: ;;#ASMSTART 8388; GFX900-NEXT: ; def s[8:13] 8389; GFX900-NEXT: ;;#ASMEND 8390; GFX900-NEXT: s_mov_b32 s8, s6 8391; GFX900-NEXT: s_mov_b32 s9, s7 8392; GFX900-NEXT: s_mov_b32 s10, s12 8393; GFX900-NEXT: s_mov_b32 s11, s13 8394; GFX900-NEXT: ;;#ASMSTART 8395; GFX900-NEXT: ; use s[8:13] 8396; GFX900-NEXT: ;;#ASMEND 8397; GFX900-NEXT: s_setpc_b64 s[30:31] 8398; 8399; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_5_5: 8400; GFX90A: ; %bb.0: 8401; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8402; GFX90A-NEXT: ;;#ASMSTART 8403; GFX90A-NEXT: ; def s[4:9] 8404; GFX90A-NEXT: ;;#ASMEND 8405; GFX90A-NEXT: ;;#ASMSTART 8406; GFX90A-NEXT: ; def s[8:13] 8407; GFX90A-NEXT: ;;#ASMEND 8408; GFX90A-NEXT: s_mov_b32 s8, s6 8409; GFX90A-NEXT: s_mov_b32 s9, s7 8410; GFX90A-NEXT: s_mov_b32 s10, s12 8411; GFX90A-NEXT: s_mov_b32 s11, s13 8412; GFX90A-NEXT: ;;#ASMSTART 8413; GFX90A-NEXT: ; use s[8:13] 8414; GFX90A-NEXT: ;;#ASMEND 8415; GFX90A-NEXT: s_setpc_b64 s[30:31] 8416; 8417; GFX940-LABEL: s_shuffle_v3i64_v3i64__1_5_5: 8418; GFX940: ; %bb.0: 8419; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8420; GFX940-NEXT: ;;#ASMSTART 8421; GFX940-NEXT: ; def s[8:13] 8422; GFX940-NEXT: ;;#ASMEND 8423; GFX940-NEXT: ;;#ASMSTART 8424; GFX940-NEXT: ; def s[0:5] 8425; GFX940-NEXT: ;;#ASMEND 8426; GFX940-NEXT: s_mov_b32 s8, s2 8427; GFX940-NEXT: s_mov_b32 s9, s3 8428; GFX940-NEXT: s_mov_b32 s10, s12 8429; GFX940-NEXT: s_mov_b32 s11, s13 8430; GFX940-NEXT: ;;#ASMSTART 8431; GFX940-NEXT: ; use s[8:13] 8432; GFX940-NEXT: ;;#ASMEND 8433; GFX940-NEXT: s_setpc_b64 s[30:31] 8434 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8435 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8436 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 1, i32 5, i32 5> 8437 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8438 ret void 8439} 8440 8441define void @s_shuffle_v3i64_v3i64__2_5_5() { 8442; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_5_5: 8443; GFX900: ; %bb.0: 8444; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8445; GFX900-NEXT: ;;#ASMSTART 8446; GFX900-NEXT: ; def s[12:17] 8447; GFX900-NEXT: ;;#ASMEND 8448; GFX900-NEXT: ;;#ASMSTART 8449; GFX900-NEXT: ; def s[8:13] 8450; GFX900-NEXT: ;;#ASMEND 8451; GFX900-NEXT: s_mov_b32 s8, s16 8452; GFX900-NEXT: s_mov_b32 s9, s17 8453; GFX900-NEXT: s_mov_b32 s10, s12 8454; GFX900-NEXT: s_mov_b32 s11, s13 8455; GFX900-NEXT: ;;#ASMSTART 8456; GFX900-NEXT: ; use s[8:13] 8457; GFX900-NEXT: ;;#ASMEND 8458; GFX900-NEXT: s_setpc_b64 s[30:31] 8459; 8460; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_5_5: 8461; GFX90A: ; %bb.0: 8462; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8463; GFX90A-NEXT: ;;#ASMSTART 8464; GFX90A-NEXT: ; def s[12:17] 8465; GFX90A-NEXT: ;;#ASMEND 8466; GFX90A-NEXT: ;;#ASMSTART 8467; GFX90A-NEXT: ; def s[8:13] 8468; GFX90A-NEXT: ;;#ASMEND 8469; GFX90A-NEXT: s_mov_b32 s8, s16 8470; GFX90A-NEXT: s_mov_b32 s9, s17 8471; GFX90A-NEXT: s_mov_b32 s10, s12 8472; GFX90A-NEXT: s_mov_b32 s11, s13 8473; GFX90A-NEXT: ;;#ASMSTART 8474; GFX90A-NEXT: ; use s[8:13] 8475; GFX90A-NEXT: ;;#ASMEND 8476; GFX90A-NEXT: s_setpc_b64 s[30:31] 8477; 8478; GFX940-LABEL: s_shuffle_v3i64_v3i64__2_5_5: 8479; GFX940: ; %bb.0: 8480; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8481; GFX940-NEXT: ;;#ASMSTART 8482; GFX940-NEXT: ; def s[8:13] 8483; GFX940-NEXT: ;;#ASMEND 8484; GFX940-NEXT: ;;#ASMSTART 8485; GFX940-NEXT: ; def s[0:5] 8486; GFX940-NEXT: ;;#ASMEND 8487; GFX940-NEXT: s_mov_b32 s8, s4 8488; GFX940-NEXT: s_mov_b32 s9, s5 8489; GFX940-NEXT: s_mov_b32 s10, s12 8490; GFX940-NEXT: s_mov_b32 s11, s13 8491; GFX940-NEXT: ;;#ASMSTART 8492; GFX940-NEXT: ; use s[8:13] 8493; GFX940-NEXT: ;;#ASMEND 8494; GFX940-NEXT: s_setpc_b64 s[30:31] 8495 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8496 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8497 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 2, i32 5, i32 5> 8498 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8499 ret void 8500} 8501 8502define void @s_shuffle_v3i64_v3i64__3_5_5() { 8503; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_5_5: 8504; GFX9: ; %bb.0: 8505; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8506; GFX9-NEXT: ;;#ASMSTART 8507; GFX9-NEXT: ; def s[8:13] 8508; GFX9-NEXT: ;;#ASMEND 8509; GFX9-NEXT: s_mov_b32 s10, s12 8510; GFX9-NEXT: s_mov_b32 s11, s13 8511; GFX9-NEXT: ;;#ASMSTART 8512; GFX9-NEXT: ; use s[8:13] 8513; GFX9-NEXT: ;;#ASMEND 8514; GFX9-NEXT: s_setpc_b64 s[30:31] 8515 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8516 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8517 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 3, i32 5, i32 5> 8518 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8519 ret void 8520} 8521 8522define void @s_shuffle_v3i64_v3i64__4_5_5() { 8523; GFX9-LABEL: s_shuffle_v3i64_v3i64__4_5_5: 8524; GFX9: ; %bb.0: 8525; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8526; GFX9-NEXT: ;;#ASMSTART 8527; GFX9-NEXT: ; def s[8:13] 8528; GFX9-NEXT: ;;#ASMEND 8529; GFX9-NEXT: s_mov_b32 s8, s10 8530; GFX9-NEXT: s_mov_b32 s9, s11 8531; GFX9-NEXT: s_mov_b32 s10, s12 8532; GFX9-NEXT: s_mov_b32 s11, s13 8533; GFX9-NEXT: ;;#ASMSTART 8534; GFX9-NEXT: ; use s[8:13] 8535; GFX9-NEXT: ;;#ASMEND 8536; GFX9-NEXT: s_setpc_b64 s[30:31] 8537 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8538 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8539 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 5, i32 5> 8540 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8541 ret void 8542} 8543 8544define void @s_shuffle_v3i64_v3i64__5_u_5() { 8545; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_u_5: 8546; GFX9: ; %bb.0: 8547; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8548; GFX9-NEXT: ;;#ASMSTART 8549; GFX9-NEXT: ; def s[8:13] 8550; GFX9-NEXT: ;;#ASMEND 8551; GFX9-NEXT: s_mov_b32 s8, s12 8552; GFX9-NEXT: s_mov_b32 s9, s13 8553; GFX9-NEXT: ;;#ASMSTART 8554; GFX9-NEXT: ; use s[8:13] 8555; GFX9-NEXT: ;;#ASMEND 8556; GFX9-NEXT: s_setpc_b64 s[30:31] 8557 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8558 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8559 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 5> 8560 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8561 ret void 8562} 8563 8564define void @s_shuffle_v3i64_v3i64__5_0_5() { 8565; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_0_5: 8566; GFX900: ; %bb.0: 8567; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8568; GFX900-NEXT: ;;#ASMSTART 8569; GFX900-NEXT: ; def s[4:9] 8570; GFX900-NEXT: ;;#ASMEND 8571; GFX900-NEXT: ;;#ASMSTART 8572; GFX900-NEXT: ; def s[8:13] 8573; GFX900-NEXT: ;;#ASMEND 8574; GFX900-NEXT: s_mov_b32 s8, s12 8575; GFX900-NEXT: s_mov_b32 s9, s13 8576; GFX900-NEXT: s_mov_b32 s10, s4 8577; GFX900-NEXT: s_mov_b32 s11, s5 8578; GFX900-NEXT: ;;#ASMSTART 8579; GFX900-NEXT: ; use s[8:13] 8580; GFX900-NEXT: ;;#ASMEND 8581; GFX900-NEXT: s_setpc_b64 s[30:31] 8582; 8583; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_0_5: 8584; GFX90A: ; %bb.0: 8585; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8586; GFX90A-NEXT: ;;#ASMSTART 8587; GFX90A-NEXT: ; def s[4:9] 8588; GFX90A-NEXT: ;;#ASMEND 8589; GFX90A-NEXT: ;;#ASMSTART 8590; GFX90A-NEXT: ; def s[8:13] 8591; GFX90A-NEXT: ;;#ASMEND 8592; GFX90A-NEXT: s_mov_b32 s8, s12 8593; GFX90A-NEXT: s_mov_b32 s9, s13 8594; GFX90A-NEXT: s_mov_b32 s10, s4 8595; GFX90A-NEXT: s_mov_b32 s11, s5 8596; GFX90A-NEXT: ;;#ASMSTART 8597; GFX90A-NEXT: ; use s[8:13] 8598; GFX90A-NEXT: ;;#ASMEND 8599; GFX90A-NEXT: s_setpc_b64 s[30:31] 8600; 8601; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_0_5: 8602; GFX940: ; %bb.0: 8603; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8604; GFX940-NEXT: ;;#ASMSTART 8605; GFX940-NEXT: ; def s[8:13] 8606; GFX940-NEXT: ;;#ASMEND 8607; GFX940-NEXT: ;;#ASMSTART 8608; GFX940-NEXT: ; def s[0:5] 8609; GFX940-NEXT: ;;#ASMEND 8610; GFX940-NEXT: s_mov_b32 s8, s12 8611; GFX940-NEXT: s_mov_b32 s9, s13 8612; GFX940-NEXT: s_mov_b32 s10, s0 8613; GFX940-NEXT: s_mov_b32 s11, s1 8614; GFX940-NEXT: ;;#ASMSTART 8615; GFX940-NEXT: ; use s[8:13] 8616; GFX940-NEXT: ;;#ASMEND 8617; GFX940-NEXT: s_setpc_b64 s[30:31] 8618 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8619 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8620 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 5> 8621 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8622 ret void 8623} 8624 8625define void @s_shuffle_v3i64_v3i64__5_1_5() { 8626; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_5: 8627; GFX900: ; %bb.0: 8628; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8629; GFX900-NEXT: ;;#ASMSTART 8630; GFX900-NEXT: ; def s[8:13] 8631; GFX900-NEXT: ;;#ASMEND 8632; GFX900-NEXT: ;;#ASMSTART 8633; GFX900-NEXT: ; def s[12:17] 8634; GFX900-NEXT: ;;#ASMEND 8635; GFX900-NEXT: s_mov_b32 s8, s16 8636; GFX900-NEXT: s_mov_b32 s9, s17 8637; GFX900-NEXT: s_mov_b32 s12, s16 8638; GFX900-NEXT: s_mov_b32 s13, s17 8639; GFX900-NEXT: ;;#ASMSTART 8640; GFX900-NEXT: ; use s[8:13] 8641; GFX900-NEXT: ;;#ASMEND 8642; GFX900-NEXT: s_setpc_b64 s[30:31] 8643; 8644; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_5: 8645; GFX90A: ; %bb.0: 8646; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8647; GFX90A-NEXT: ;;#ASMSTART 8648; GFX90A-NEXT: ; def s[8:13] 8649; GFX90A-NEXT: ;;#ASMEND 8650; GFX90A-NEXT: ;;#ASMSTART 8651; GFX90A-NEXT: ; def s[12:17] 8652; GFX90A-NEXT: ;;#ASMEND 8653; GFX90A-NEXT: s_mov_b32 s8, s16 8654; GFX90A-NEXT: s_mov_b32 s9, s17 8655; GFX90A-NEXT: s_mov_b32 s12, s16 8656; GFX90A-NEXT: s_mov_b32 s13, s17 8657; GFX90A-NEXT: ;;#ASMSTART 8658; GFX90A-NEXT: ; use s[8:13] 8659; GFX90A-NEXT: ;;#ASMEND 8660; GFX90A-NEXT: s_setpc_b64 s[30:31] 8661; 8662; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_1_5: 8663; GFX940: ; %bb.0: 8664; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8665; GFX940-NEXT: ;;#ASMSTART 8666; GFX940-NEXT: ; def s[8:13] 8667; GFX940-NEXT: ;;#ASMEND 8668; GFX940-NEXT: ;;#ASMSTART 8669; GFX940-NEXT: ; def s[0:5] 8670; GFX940-NEXT: ;;#ASMEND 8671; GFX940-NEXT: s_mov_b32 s8, s4 8672; GFX940-NEXT: s_mov_b32 s9, s5 8673; GFX940-NEXT: s_mov_b32 s12, s4 8674; GFX940-NEXT: s_mov_b32 s13, s5 8675; GFX940-NEXT: ;;#ASMSTART 8676; GFX940-NEXT: ; use s[8:13] 8677; GFX940-NEXT: ;;#ASMEND 8678; GFX940-NEXT: s_setpc_b64 s[30:31] 8679 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8680 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8681 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 5> 8682 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8683 ret void 8684} 8685 8686define void @s_shuffle_v3i64_v3i64__5_2_5() { 8687; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_2_5: 8688; GFX900: ; %bb.0: 8689; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8690; GFX900-NEXT: ;;#ASMSTART 8691; GFX900-NEXT: ; def s[12:17] 8692; GFX900-NEXT: ;;#ASMEND 8693; GFX900-NEXT: ;;#ASMSTART 8694; GFX900-NEXT: ; def s[8:13] 8695; GFX900-NEXT: ;;#ASMEND 8696; GFX900-NEXT: s_mov_b32 s8, s12 8697; GFX900-NEXT: s_mov_b32 s9, s13 8698; GFX900-NEXT: s_mov_b32 s10, s16 8699; GFX900-NEXT: s_mov_b32 s11, s17 8700; GFX900-NEXT: ;;#ASMSTART 8701; GFX900-NEXT: ; use s[8:13] 8702; GFX900-NEXT: ;;#ASMEND 8703; GFX900-NEXT: s_setpc_b64 s[30:31] 8704; 8705; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_2_5: 8706; GFX90A: ; %bb.0: 8707; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8708; GFX90A-NEXT: ;;#ASMSTART 8709; GFX90A-NEXT: ; def s[12:17] 8710; GFX90A-NEXT: ;;#ASMEND 8711; GFX90A-NEXT: ;;#ASMSTART 8712; GFX90A-NEXT: ; def s[8:13] 8713; GFX90A-NEXT: ;;#ASMEND 8714; GFX90A-NEXT: s_mov_b32 s8, s12 8715; GFX90A-NEXT: s_mov_b32 s9, s13 8716; GFX90A-NEXT: s_mov_b32 s10, s16 8717; GFX90A-NEXT: s_mov_b32 s11, s17 8718; GFX90A-NEXT: ;;#ASMSTART 8719; GFX90A-NEXT: ; use s[8:13] 8720; GFX90A-NEXT: ;;#ASMEND 8721; GFX90A-NEXT: s_setpc_b64 s[30:31] 8722; 8723; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_2_5: 8724; GFX940: ; %bb.0: 8725; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8726; GFX940-NEXT: ;;#ASMSTART 8727; GFX940-NEXT: ; def s[8:13] 8728; GFX940-NEXT: ;;#ASMEND 8729; GFX940-NEXT: ;;#ASMSTART 8730; GFX940-NEXT: ; def s[0:5] 8731; GFX940-NEXT: ;;#ASMEND 8732; GFX940-NEXT: s_mov_b32 s8, s12 8733; GFX940-NEXT: s_mov_b32 s9, s13 8734; GFX940-NEXT: s_mov_b32 s10, s4 8735; GFX940-NEXT: s_mov_b32 s11, s5 8736; GFX940-NEXT: ;;#ASMSTART 8737; GFX940-NEXT: ; use s[8:13] 8738; GFX940-NEXT: ;;#ASMEND 8739; GFX940-NEXT: s_setpc_b64 s[30:31] 8740 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8741 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8742 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 5> 8743 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8744 ret void 8745} 8746 8747define void @s_shuffle_v3i64_v3i64__5_3_5() { 8748; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_5: 8749; GFX900: ; %bb.0: 8750; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8751; GFX900-NEXT: ;;#ASMSTART 8752; GFX900-NEXT: ; def s[12:17] 8753; GFX900-NEXT: ;;#ASMEND 8754; GFX900-NEXT: s_mov_b32 s8, s16 8755; GFX900-NEXT: s_mov_b32 s9, s17 8756; GFX900-NEXT: s_mov_b32 s10, s12 8757; GFX900-NEXT: s_mov_b32 s11, s13 8758; GFX900-NEXT: s_mov_b32 s12, s16 8759; GFX900-NEXT: s_mov_b32 s13, s17 8760; GFX900-NEXT: ;;#ASMSTART 8761; GFX900-NEXT: ; use s[8:13] 8762; GFX900-NEXT: ;;#ASMEND 8763; GFX900-NEXT: s_setpc_b64 s[30:31] 8764; 8765; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_5: 8766; GFX90A: ; %bb.0: 8767; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8768; GFX90A-NEXT: ;;#ASMSTART 8769; GFX90A-NEXT: ; def s[12:17] 8770; GFX90A-NEXT: ;;#ASMEND 8771; GFX90A-NEXT: s_mov_b32 s8, s16 8772; GFX90A-NEXT: s_mov_b32 s9, s17 8773; GFX90A-NEXT: s_mov_b32 s10, s12 8774; GFX90A-NEXT: s_mov_b32 s11, s13 8775; GFX90A-NEXT: s_mov_b32 s12, s16 8776; GFX90A-NEXT: s_mov_b32 s13, s17 8777; GFX90A-NEXT: ;;#ASMSTART 8778; GFX90A-NEXT: ; use s[8:13] 8779; GFX90A-NEXT: ;;#ASMEND 8780; GFX90A-NEXT: s_setpc_b64 s[30:31] 8781; 8782; GFX940-LABEL: s_shuffle_v3i64_v3i64__5_3_5: 8783; GFX940: ; %bb.0: 8784; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8785; GFX940-NEXT: ;;#ASMSTART 8786; GFX940-NEXT: ; def s[0:5] 8787; GFX940-NEXT: ;;#ASMEND 8788; GFX940-NEXT: s_mov_b32 s8, s4 8789; GFX940-NEXT: s_mov_b32 s9, s5 8790; GFX940-NEXT: s_mov_b32 s10, s0 8791; GFX940-NEXT: s_mov_b32 s11, s1 8792; GFX940-NEXT: s_mov_b32 s12, s4 8793; GFX940-NEXT: s_mov_b32 s13, s5 8794; GFX940-NEXT: ;;#ASMSTART 8795; GFX940-NEXT: ; use s[8:13] 8796; GFX940-NEXT: ;;#ASMEND 8797; GFX940-NEXT: s_setpc_b64 s[30:31] 8798 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8799 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8800 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 5> 8801 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8802 ret void 8803} 8804 8805define void @s_shuffle_v3i64_v3i64__5_4_5() { 8806; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_5: 8807; GFX9: ; %bb.0: 8808; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8809; GFX9-NEXT: ;;#ASMSTART 8810; GFX9-NEXT: ; def s[8:13] 8811; GFX9-NEXT: ;;#ASMEND 8812; GFX9-NEXT: s_mov_b32 s8, s12 8813; GFX9-NEXT: s_mov_b32 s9, s13 8814; GFX9-NEXT: ;;#ASMSTART 8815; GFX9-NEXT: ; use s[8:13] 8816; GFX9-NEXT: ;;#ASMEND 8817; GFX9-NEXT: s_setpc_b64 s[30:31] 8818 %vec0 = call <3 x i64> asm "; def $0", "=s"() 8819 %vec1 = call <3 x i64> asm "; def $0", "=s"() 8820 %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 5> 8821 call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) 8822 ret void 8823} 8824;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 8825; GFX90APLUS: {{.*}} 8826