1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v4i32_v3i32__u_u_u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v4i32_v3i32__u_u_u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <3 x i32> asm "; def $0", "=v"() 13 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> poison 14 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 15 ret void 16} 17 18define void @v_shuffle_v4i32_v3i32__0_u_u_u(ptr addrspace(1) inreg %ptr) { 19; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_u_u_u: 20; GFX900: ; %bb.0: 21; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX900-NEXT: v_mov_b32_e32 v3, 0 23; GFX900-NEXT: ;;#ASMSTART 24; GFX900-NEXT: ; def v[0:2] 25; GFX900-NEXT: ;;#ASMEND 26; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 27; GFX900-NEXT: s_waitcnt vmcnt(0) 28; GFX900-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_u_u_u: 31; GFX90A: ; %bb.0: 32; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX90A-NEXT: v_mov_b32_e32 v3, 0 34; GFX90A-NEXT: ;;#ASMSTART 35; GFX90A-NEXT: ; def v[0:2] 36; GFX90A-NEXT: ;;#ASMEND 37; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 38; GFX90A-NEXT: s_waitcnt vmcnt(0) 39; GFX90A-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_u_u_u: 42; GFX940: ; %bb.0: 43; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX940-NEXT: v_mov_b32_e32 v3, 0 45; GFX940-NEXT: ;;#ASMSTART 46; GFX940-NEXT: ; def v[0:2] 47; GFX940-NEXT: ;;#ASMEND 48; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 49; GFX940-NEXT: s_waitcnt vmcnt(0) 50; GFX940-NEXT: s_setpc_b64 s[30:31] 51 %vec0 = call <3 x i32> asm "; def $0", "=v"() 52 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 53 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 54 ret void 55} 56 57define void @v_shuffle_v4i32_v3i32__1_u_u_u(ptr addrspace(1) inreg %ptr) { 58; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_u_u_u: 59; GFX900: ; %bb.0: 60; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GFX900-NEXT: ;;#ASMSTART 62; GFX900-NEXT: ; def v[0:2] 63; GFX900-NEXT: ;;#ASMEND 64; GFX900-NEXT: v_mov_b32_e32 v3, 0 65; GFX900-NEXT: v_mov_b32_e32 v0, v1 66; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 67; GFX900-NEXT: s_waitcnt vmcnt(0) 68; GFX900-NEXT: s_setpc_b64 s[30:31] 69; 70; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_u_u_u: 71; GFX90A: ; %bb.0: 72; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX90A-NEXT: ;;#ASMSTART 74; GFX90A-NEXT: ; def v[0:2] 75; GFX90A-NEXT: ;;#ASMEND 76; GFX90A-NEXT: v_mov_b32_e32 v3, 0 77; GFX90A-NEXT: v_mov_b32_e32 v0, v1 78; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 79; GFX90A-NEXT: s_waitcnt vmcnt(0) 80; GFX90A-NEXT: s_setpc_b64 s[30:31] 81; 82; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_u_u_u: 83; GFX940: ; %bb.0: 84; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GFX940-NEXT: ;;#ASMSTART 86; GFX940-NEXT: ; def v[0:2] 87; GFX940-NEXT: ;;#ASMEND 88; GFX940-NEXT: v_mov_b32_e32 v3, 0 89; GFX940-NEXT: v_mov_b32_e32 v0, v1 90; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 91; GFX940-NEXT: s_waitcnt vmcnt(0) 92; GFX940-NEXT: s_setpc_b64 s[30:31] 93 %vec0 = call <3 x i32> asm "; def $0", "=v"() 94 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 95 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 96 ret void 97} 98 99define void @v_shuffle_v4i32_v3i32__2_u_u_u(ptr addrspace(1) inreg %ptr) { 100; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_u_u_u: 101; GFX900: ; %bb.0: 102; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX900-NEXT: ;;#ASMSTART 104; GFX900-NEXT: ; def v[0:2] 105; GFX900-NEXT: ;;#ASMEND 106; GFX900-NEXT: v_mov_b32_e32 v3, 0 107; GFX900-NEXT: v_mov_b32_e32 v0, v2 108; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 109; GFX900-NEXT: s_waitcnt vmcnt(0) 110; GFX900-NEXT: s_setpc_b64 s[30:31] 111; 112; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_u_u_u: 113; GFX90A: ; %bb.0: 114; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; GFX90A-NEXT: ;;#ASMSTART 116; GFX90A-NEXT: ; def v[0:2] 117; GFX90A-NEXT: ;;#ASMEND 118; GFX90A-NEXT: v_mov_b32_e32 v3, 0 119; GFX90A-NEXT: v_mov_b32_e32 v0, v2 120; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 121; GFX90A-NEXT: s_waitcnt vmcnt(0) 122; GFX90A-NEXT: s_setpc_b64 s[30:31] 123; 124; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_u_u_u: 125; GFX940: ; %bb.0: 126; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX940-NEXT: ;;#ASMSTART 128; GFX940-NEXT: ; def v[0:2] 129; GFX940-NEXT: ;;#ASMEND 130; GFX940-NEXT: v_mov_b32_e32 v3, 0 131; GFX940-NEXT: v_mov_b32_e32 v0, v2 132; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 133; GFX940-NEXT: s_waitcnt vmcnt(0) 134; GFX940-NEXT: s_setpc_b64 s[30:31] 135 %vec0 = call <3 x i32> asm "; def $0", "=v"() 136 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 137 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 138 ret void 139} 140 141define void @v_shuffle_v4i32_v3i32__3_u_u_u(ptr addrspace(1) inreg %ptr) { 142; GFX9-LABEL: v_shuffle_v4i32_v3i32__3_u_u_u: 143; GFX9: ; %bb.0: 144; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GFX9-NEXT: s_setpc_b64 s[30:31] 146 %vec0 = call <3 x i32> asm "; def $0", "=v"() 147 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison> 148 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 149 ret void 150} 151 152define void @v_shuffle_v4i32_v3i32__4_u_u_u(ptr addrspace(1) inreg %ptr) { 153; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_u_u_u: 154; GFX900: ; %bb.0: 155; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX900-NEXT: ;;#ASMSTART 157; GFX900-NEXT: ; def v[0:2] 158; GFX900-NEXT: ;;#ASMEND 159; GFX900-NEXT: v_mov_b32_e32 v3, 0 160; GFX900-NEXT: v_mov_b32_e32 v0, v1 161; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 162; GFX900-NEXT: s_waitcnt vmcnt(0) 163; GFX900-NEXT: s_setpc_b64 s[30:31] 164; 165; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_u_u_u: 166; GFX90A: ; %bb.0: 167; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GFX90A-NEXT: ;;#ASMSTART 169; GFX90A-NEXT: ; def v[0:2] 170; GFX90A-NEXT: ;;#ASMEND 171; GFX90A-NEXT: v_mov_b32_e32 v3, 0 172; GFX90A-NEXT: v_mov_b32_e32 v0, v1 173; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 174; GFX90A-NEXT: s_waitcnt vmcnt(0) 175; GFX90A-NEXT: s_setpc_b64 s[30:31] 176; 177; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_u_u_u: 178; GFX940: ; %bb.0: 179; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 180; GFX940-NEXT: ;;#ASMSTART 181; GFX940-NEXT: ; def v[0:2] 182; GFX940-NEXT: ;;#ASMEND 183; GFX940-NEXT: v_mov_b32_e32 v3, 0 184; GFX940-NEXT: v_mov_b32_e32 v0, v1 185; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 186; GFX940-NEXT: s_waitcnt vmcnt(0) 187; GFX940-NEXT: s_setpc_b64 s[30:31] 188 %vec0 = call <3 x i32> asm "; def $0", "=v"() 189 %vec1 = call <3 x i32> asm "; def $0", "=v"() 190 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison> 191 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 192 ret void 193} 194 195define void @v_shuffle_v4i32_v3i32__5_u_u_u(ptr addrspace(1) inreg %ptr) { 196; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_u_u: 197; GFX900: ; %bb.0: 198; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 199; GFX900-NEXT: ;;#ASMSTART 200; GFX900-NEXT: ; def v[0:2] 201; GFX900-NEXT: ;;#ASMEND 202; GFX900-NEXT: v_mov_b32_e32 v3, 0 203; GFX900-NEXT: v_mov_b32_e32 v0, v2 204; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 205; GFX900-NEXT: s_waitcnt vmcnt(0) 206; GFX900-NEXT: s_setpc_b64 s[30:31] 207; 208; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_u_u: 209; GFX90A: ; %bb.0: 210; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 211; GFX90A-NEXT: ;;#ASMSTART 212; GFX90A-NEXT: ; def v[0:2] 213; GFX90A-NEXT: ;;#ASMEND 214; GFX90A-NEXT: v_mov_b32_e32 v3, 0 215; GFX90A-NEXT: v_mov_b32_e32 v0, v2 216; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 217; GFX90A-NEXT: s_waitcnt vmcnt(0) 218; GFX90A-NEXT: s_setpc_b64 s[30:31] 219; 220; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_u_u: 221; GFX940: ; %bb.0: 222; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 223; GFX940-NEXT: ;;#ASMSTART 224; GFX940-NEXT: ; def v[0:2] 225; GFX940-NEXT: ;;#ASMEND 226; GFX940-NEXT: v_mov_b32_e32 v3, 0 227; GFX940-NEXT: v_mov_b32_e32 v0, v2 228; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 229; GFX940-NEXT: s_waitcnt vmcnt(0) 230; GFX940-NEXT: s_setpc_b64 s[30:31] 231 %vec0 = call <3 x i32> asm "; def $0", "=v"() 232 %vec1 = call <3 x i32> asm "; def $0", "=v"() 233 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison> 234 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 235 ret void 236} 237 238define void @v_shuffle_v4i32_v3i32__5_0_u_u(ptr addrspace(1) inreg %ptr) { 239; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_u_u: 240; GFX900: ; %bb.0: 241; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 242; GFX900-NEXT: ;;#ASMSTART 243; GFX900-NEXT: ; def v[1:3] 244; GFX900-NEXT: ;;#ASMEND 245; GFX900-NEXT: v_mov_b32_e32 v5, 0 246; GFX900-NEXT: ;;#ASMSTART 247; GFX900-NEXT: ; def v[2:4] 248; GFX900-NEXT: ;;#ASMEND 249; GFX900-NEXT: v_mov_b32_e32 v0, v4 250; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 251; GFX900-NEXT: s_waitcnt vmcnt(0) 252; GFX900-NEXT: s_setpc_b64 s[30:31] 253; 254; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_u_u: 255; GFX90A: ; %bb.0: 256; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 257; GFX90A-NEXT: ;;#ASMSTART 258; GFX90A-NEXT: ; def v[2:4] 259; GFX90A-NEXT: ;;#ASMEND 260; GFX90A-NEXT: v_mov_b32_e32 v7, 0 261; GFX90A-NEXT: ;;#ASMSTART 262; GFX90A-NEXT: ; def v[4:6] 263; GFX90A-NEXT: ;;#ASMEND 264; GFX90A-NEXT: v_mov_b32_e32 v0, v6 265; GFX90A-NEXT: v_mov_b32_e32 v1, v2 266; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 267; GFX90A-NEXT: s_waitcnt vmcnt(0) 268; GFX90A-NEXT: s_setpc_b64 s[30:31] 269; 270; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_u_u: 271; GFX940: ; %bb.0: 272; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX940-NEXT: ;;#ASMSTART 274; GFX940-NEXT: ; def v[2:4] 275; GFX940-NEXT: ;;#ASMEND 276; GFX940-NEXT: v_mov_b32_e32 v7, 0 277; GFX940-NEXT: ;;#ASMSTART 278; GFX940-NEXT: ; def v[4:6] 279; GFX940-NEXT: ;;#ASMEND 280; GFX940-NEXT: v_mov_b32_e32 v1, v2 281; GFX940-NEXT: v_mov_b32_e32 v0, v6 282; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 283; GFX940-NEXT: s_waitcnt vmcnt(0) 284; GFX940-NEXT: s_setpc_b64 s[30:31] 285 %vec0 = call <3 x i32> asm "; def $0", "=v"() 286 %vec1 = call <3 x i32> asm "; def $0", "=v"() 287 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison> 288 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 289 ret void 290} 291 292define void @v_shuffle_v4i32_v3i32__5_1_u_u(ptr addrspace(1) inreg %ptr) { 293; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_u_u: 294; GFX900: ; %bb.0: 295; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 296; GFX900-NEXT: ;;#ASMSTART 297; GFX900-NEXT: ; def v[0:2] 298; GFX900-NEXT: ;;#ASMEND 299; GFX900-NEXT: v_mov_b32_e32 v5, 0 300; GFX900-NEXT: ;;#ASMSTART 301; GFX900-NEXT: ; def v[2:4] 302; GFX900-NEXT: ;;#ASMEND 303; GFX900-NEXT: v_mov_b32_e32 v0, v4 304; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 305; GFX900-NEXT: s_waitcnt vmcnt(0) 306; GFX900-NEXT: s_setpc_b64 s[30:31] 307; 308; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_u_u: 309; GFX90A: ; %bb.0: 310; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 311; GFX90A-NEXT: ;;#ASMSTART 312; GFX90A-NEXT: ; def v[0:2] 313; GFX90A-NEXT: ;;#ASMEND 314; GFX90A-NEXT: v_mov_b32_e32 v5, 0 315; GFX90A-NEXT: ;;#ASMSTART 316; GFX90A-NEXT: ; def v[2:4] 317; GFX90A-NEXT: ;;#ASMEND 318; GFX90A-NEXT: v_mov_b32_e32 v0, v4 319; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 320; GFX90A-NEXT: s_waitcnt vmcnt(0) 321; GFX90A-NEXT: s_setpc_b64 s[30:31] 322; 323; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_u_u: 324; GFX940: ; %bb.0: 325; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 326; GFX940-NEXT: ;;#ASMSTART 327; GFX940-NEXT: ; def v[0:2] 328; GFX940-NEXT: ;;#ASMEND 329; GFX940-NEXT: v_mov_b32_e32 v5, 0 330; GFX940-NEXT: ;;#ASMSTART 331; GFX940-NEXT: ; def v[2:4] 332; GFX940-NEXT: ;;#ASMEND 333; GFX940-NEXT: s_nop 0 334; GFX940-NEXT: v_mov_b32_e32 v0, v4 335; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 336; GFX940-NEXT: s_waitcnt vmcnt(0) 337; GFX940-NEXT: s_setpc_b64 s[30:31] 338 %vec0 = call <3 x i32> asm "; def $0", "=v"() 339 %vec1 = call <3 x i32> asm "; def $0", "=v"() 340 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison> 341 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 342 ret void 343} 344 345define void @v_shuffle_v4i32_v3i32__5_2_u_u(ptr addrspace(1) inreg %ptr) { 346; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_u_u: 347; GFX900: ; %bb.0: 348; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GFX900-NEXT: ;;#ASMSTART 350; GFX900-NEXT: ; def v[0:2] 351; GFX900-NEXT: ;;#ASMEND 352; GFX900-NEXT: v_mov_b32_e32 v6, 0 353; GFX900-NEXT: ;;#ASMSTART 354; GFX900-NEXT: ; def v[3:5] 355; GFX900-NEXT: ;;#ASMEND 356; GFX900-NEXT: v_mov_b32_e32 v0, v5 357; GFX900-NEXT: v_mov_b32_e32 v1, v2 358; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 359; GFX900-NEXT: s_waitcnt vmcnt(0) 360; GFX900-NEXT: s_setpc_b64 s[30:31] 361; 362; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_u_u: 363; GFX90A: ; %bb.0: 364; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 365; GFX90A-NEXT: ;;#ASMSTART 366; GFX90A-NEXT: ; def v[0:2] 367; GFX90A-NEXT: ;;#ASMEND 368; GFX90A-NEXT: v_mov_b32_e32 v3, 0 369; GFX90A-NEXT: ;;#ASMSTART 370; GFX90A-NEXT: ; def v[4:6] 371; GFX90A-NEXT: ;;#ASMEND 372; GFX90A-NEXT: v_mov_b32_e32 v0, v6 373; GFX90A-NEXT: v_mov_b32_e32 v1, v2 374; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 375; GFX90A-NEXT: s_waitcnt vmcnt(0) 376; GFX90A-NEXT: s_setpc_b64 s[30:31] 377; 378; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_u_u: 379; GFX940: ; %bb.0: 380; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 381; GFX940-NEXT: ;;#ASMSTART 382; GFX940-NEXT: ; def v[0:2] 383; GFX940-NEXT: ;;#ASMEND 384; GFX940-NEXT: v_mov_b32_e32 v3, 0 385; GFX940-NEXT: ;;#ASMSTART 386; GFX940-NEXT: ; def v[4:6] 387; GFX940-NEXT: ;;#ASMEND 388; GFX940-NEXT: v_mov_b32_e32 v1, v2 389; GFX940-NEXT: v_mov_b32_e32 v0, v6 390; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 391; GFX940-NEXT: s_waitcnt vmcnt(0) 392; GFX940-NEXT: s_setpc_b64 s[30:31] 393 %vec0 = call <3 x i32> asm "; def $0", "=v"() 394 %vec1 = call <3 x i32> asm "; def $0", "=v"() 395 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison> 396 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 397 ret void 398} 399 400define void @v_shuffle_v4i32_v3i32__5_3_u_u(ptr addrspace(1) inreg %ptr) { 401; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u: 402; GFX900: ; %bb.0: 403; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 404; GFX900-NEXT: v_mov_b32_e32 v4, 0 405; GFX900-NEXT: ;;#ASMSTART 406; GFX900-NEXT: ; def v[1:3] 407; GFX900-NEXT: ;;#ASMEND 408; GFX900-NEXT: v_mov_b32_e32 v0, v3 409; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 410; GFX900-NEXT: s_waitcnt vmcnt(0) 411; GFX900-NEXT: s_setpc_b64 s[30:31] 412; 413; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u: 414; GFX90A: ; %bb.0: 415; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; GFX90A-NEXT: v_mov_b32_e32 v5, 0 417; GFX90A-NEXT: ;;#ASMSTART 418; GFX90A-NEXT: ; def v[2:4] 419; GFX90A-NEXT: ;;#ASMEND 420; GFX90A-NEXT: v_mov_b32_e32 v0, v4 421; GFX90A-NEXT: v_mov_b32_e32 v1, v2 422; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 423; GFX90A-NEXT: s_waitcnt vmcnt(0) 424; GFX90A-NEXT: s_setpc_b64 s[30:31] 425; 426; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u: 427; GFX940: ; %bb.0: 428; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 429; GFX940-NEXT: v_mov_b32_e32 v5, 0 430; GFX940-NEXT: ;;#ASMSTART 431; GFX940-NEXT: ; def v[2:4] 432; GFX940-NEXT: ;;#ASMEND 433; GFX940-NEXT: s_nop 0 434; GFX940-NEXT: v_mov_b32_e32 v0, v4 435; GFX940-NEXT: v_mov_b32_e32 v1, v2 436; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 437; GFX940-NEXT: s_waitcnt vmcnt(0) 438; GFX940-NEXT: s_setpc_b64 s[30:31] 439 %vec0 = call <3 x i32> asm "; def $0", "=v"() 440 %vec1 = call <3 x i32> asm "; def $0", "=v"() 441 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison> 442 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 443 ret void 444} 445 446define void @v_shuffle_v4i32_v3i32__5_4_u_u(ptr addrspace(1) inreg %ptr) { 447; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_u_u: 448; GFX900: ; %bb.0: 449; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 450; GFX900-NEXT: ;;#ASMSTART 451; GFX900-NEXT: ; def v[0:2] 452; GFX900-NEXT: ;;#ASMEND 453; GFX900-NEXT: v_mov_b32_e32 v3, 0 454; GFX900-NEXT: v_mov_b32_e32 v0, v2 455; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 456; GFX900-NEXT: s_waitcnt vmcnt(0) 457; GFX900-NEXT: s_setpc_b64 s[30:31] 458; 459; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_u_u: 460; GFX90A: ; %bb.0: 461; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462; GFX90A-NEXT: ;;#ASMSTART 463; GFX90A-NEXT: ; def v[0:2] 464; GFX90A-NEXT: ;;#ASMEND 465; GFX90A-NEXT: v_mov_b32_e32 v3, 0 466; GFX90A-NEXT: v_mov_b32_e32 v0, v2 467; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 468; GFX90A-NEXT: s_waitcnt vmcnt(0) 469; GFX90A-NEXT: s_setpc_b64 s[30:31] 470; 471; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_u_u: 472; GFX940: ; %bb.0: 473; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 474; GFX940-NEXT: ;;#ASMSTART 475; GFX940-NEXT: ; def v[0:2] 476; GFX940-NEXT: ;;#ASMEND 477; GFX940-NEXT: v_mov_b32_e32 v3, 0 478; GFX940-NEXT: v_mov_b32_e32 v0, v2 479; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 480; GFX940-NEXT: s_waitcnt vmcnt(0) 481; GFX940-NEXT: s_setpc_b64 s[30:31] 482 %vec0 = call <3 x i32> asm "; def $0", "=v"() 483 %vec1 = call <3 x i32> asm "; def $0", "=v"() 484 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison> 485 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 486 ret void 487} 488 489define void @v_shuffle_v4i32_v3i32__5_5_u_u(ptr addrspace(1) inreg %ptr) { 490; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_u: 491; GFX900: ; %bb.0: 492; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 493; GFX900-NEXT: ;;#ASMSTART 494; GFX900-NEXT: ; def v[0:2] 495; GFX900-NEXT: ;;#ASMEND 496; GFX900-NEXT: v_mov_b32_e32 v3, 0 497; GFX900-NEXT: v_mov_b32_e32 v0, v2 498; GFX900-NEXT: v_mov_b32_e32 v1, v2 499; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 500; GFX900-NEXT: s_waitcnt vmcnt(0) 501; GFX900-NEXT: s_setpc_b64 s[30:31] 502; 503; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_u: 504; GFX90A: ; %bb.0: 505; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 506; GFX90A-NEXT: ;;#ASMSTART 507; GFX90A-NEXT: ; def v[0:2] 508; GFX90A-NEXT: ;;#ASMEND 509; GFX90A-NEXT: v_mov_b32_e32 v3, 0 510; GFX90A-NEXT: v_mov_b32_e32 v0, v2 511; GFX90A-NEXT: v_mov_b32_e32 v1, v2 512; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 513; GFX90A-NEXT: s_waitcnt vmcnt(0) 514; GFX90A-NEXT: s_setpc_b64 s[30:31] 515; 516; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_u: 517; GFX940: ; %bb.0: 518; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 519; GFX940-NEXT: ;;#ASMSTART 520; GFX940-NEXT: ; def v[0:2] 521; GFX940-NEXT: ;;#ASMEND 522; GFX940-NEXT: v_mov_b32_e32 v3, 0 523; GFX940-NEXT: v_mov_b32_e32 v0, v2 524; GFX940-NEXT: v_mov_b32_e32 v1, v2 525; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 526; GFX940-NEXT: s_waitcnt vmcnt(0) 527; GFX940-NEXT: s_setpc_b64 s[30:31] 528 %vec0 = call <3 x i32> asm "; def $0", "=v"() 529 %vec1 = call <3 x i32> asm "; def $0", "=v"() 530 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison> 531 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 532 ret void 533} 534 535define void @v_shuffle_v4i32_v3i32__5_5_0_u(ptr addrspace(1) inreg %ptr) { 536; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_u: 537; GFX900: ; %bb.0: 538; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 539; GFX900-NEXT: ;;#ASMSTART 540; GFX900-NEXT: ; def v[2:4] 541; GFX900-NEXT: ;;#ASMEND 542; GFX900-NEXT: v_mov_b32_e32 v6, 0 543; GFX900-NEXT: ;;#ASMSTART 544; GFX900-NEXT: ; def v[3:5] 545; GFX900-NEXT: ;;#ASMEND 546; GFX900-NEXT: v_mov_b32_e32 v0, v5 547; GFX900-NEXT: v_mov_b32_e32 v1, v5 548; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 549; GFX900-NEXT: s_waitcnt vmcnt(0) 550; GFX900-NEXT: s_setpc_b64 s[30:31] 551; 552; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_u: 553; GFX90A: ; %bb.0: 554; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 555; GFX90A-NEXT: ;;#ASMSTART 556; GFX90A-NEXT: ; def v[2:4] 557; GFX90A-NEXT: ;;#ASMEND 558; GFX90A-NEXT: v_mov_b32_e32 v7, 0 559; GFX90A-NEXT: ;;#ASMSTART 560; GFX90A-NEXT: ; def v[4:6] 561; GFX90A-NEXT: ;;#ASMEND 562; GFX90A-NEXT: v_mov_b32_e32 v0, v6 563; GFX90A-NEXT: v_mov_b32_e32 v1, v6 564; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 565; GFX90A-NEXT: s_waitcnt vmcnt(0) 566; GFX90A-NEXT: s_setpc_b64 s[30:31] 567; 568; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_u: 569; GFX940: ; %bb.0: 570; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 571; GFX940-NEXT: ;;#ASMSTART 572; GFX940-NEXT: ; def v[2:4] 573; GFX940-NEXT: ;;#ASMEND 574; GFX940-NEXT: v_mov_b32_e32 v7, 0 575; GFX940-NEXT: ;;#ASMSTART 576; GFX940-NEXT: ; def v[4:6] 577; GFX940-NEXT: ;;#ASMEND 578; GFX940-NEXT: s_nop 0 579; GFX940-NEXT: v_mov_b32_e32 v0, v6 580; GFX940-NEXT: v_mov_b32_e32 v1, v6 581; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 582; GFX940-NEXT: s_waitcnt vmcnt(0) 583; GFX940-NEXT: s_setpc_b64 s[30:31] 584 %vec0 = call <3 x i32> asm "; def $0", "=v"() 585 %vec1 = call <3 x i32> asm "; def $0", "=v"() 586 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison> 587 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 588 ret void 589} 590 591define void @v_shuffle_v4i32_v3i32__5_5_1_u(ptr addrspace(1) inreg %ptr) { 592; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_u: 593; GFX900: ; %bb.0: 594; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 595; GFX900-NEXT: ;;#ASMSTART 596; GFX900-NEXT: ; def v[1:3] 597; GFX900-NEXT: ;;#ASMEND 598; GFX900-NEXT: v_mov_b32_e32 v6, 0 599; GFX900-NEXT: ;;#ASMSTART 600; GFX900-NEXT: ; def v[3:5] 601; GFX900-NEXT: ;;#ASMEND 602; GFX900-NEXT: v_mov_b32_e32 v0, v5 603; GFX900-NEXT: v_mov_b32_e32 v1, v5 604; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 605; GFX900-NEXT: s_waitcnt vmcnt(0) 606; GFX900-NEXT: s_setpc_b64 s[30:31] 607; 608; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_u: 609; GFX90A: ; %bb.0: 610; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 611; GFX90A-NEXT: ;;#ASMSTART 612; GFX90A-NEXT: ; def v[2:4] 613; GFX90A-NEXT: ;;#ASMEND 614; GFX90A-NEXT: ;;#ASMSTART 615; GFX90A-NEXT: ; def v[0:2] 616; GFX90A-NEXT: ;;#ASMEND 617; GFX90A-NEXT: v_mov_b32_e32 v5, 0 618; GFX90A-NEXT: v_mov_b32_e32 v0, v2 619; GFX90A-NEXT: v_mov_b32_e32 v1, v2 620; GFX90A-NEXT: v_mov_b32_e32 v2, v3 621; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 622; GFX90A-NEXT: s_waitcnt vmcnt(0) 623; GFX90A-NEXT: s_setpc_b64 s[30:31] 624; 625; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_u: 626; GFX940: ; %bb.0: 627; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 628; GFX940-NEXT: ;;#ASMSTART 629; GFX940-NEXT: ; def v[2:4] 630; GFX940-NEXT: ;;#ASMEND 631; GFX940-NEXT: v_mov_b32_e32 v5, 0 632; GFX940-NEXT: ;;#ASMSTART 633; GFX940-NEXT: ; def v[0:2] 634; GFX940-NEXT: ;;#ASMEND 635; GFX940-NEXT: s_nop 0 636; GFX940-NEXT: v_mov_b32_e32 v0, v2 637; GFX940-NEXT: v_mov_b32_e32 v1, v2 638; GFX940-NEXT: v_mov_b32_e32 v2, v3 639; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 640; GFX940-NEXT: s_waitcnt vmcnt(0) 641; GFX940-NEXT: s_setpc_b64 s[30:31] 642 %vec0 = call <3 x i32> asm "; def $0", "=v"() 643 %vec1 = call <3 x i32> asm "; def $0", "=v"() 644 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 poison> 645 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 646 ret void 647} 648 649define void @v_shuffle_v4i32_v3i32__5_5_2_u(ptr addrspace(1) inreg %ptr) { 650; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_u: 651; GFX900: ; %bb.0: 652; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 653; GFX900-NEXT: ;;#ASMSTART 654; GFX900-NEXT: ; def v[0:2] 655; GFX900-NEXT: ;;#ASMEND 656; GFX900-NEXT: v_mov_b32_e32 v6, 0 657; GFX900-NEXT: ;;#ASMSTART 658; GFX900-NEXT: ; def v[3:5] 659; GFX900-NEXT: ;;#ASMEND 660; GFX900-NEXT: v_mov_b32_e32 v0, v5 661; GFX900-NEXT: v_mov_b32_e32 v1, v5 662; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 663; GFX900-NEXT: s_waitcnt vmcnt(0) 664; GFX900-NEXT: s_setpc_b64 s[30:31] 665; 666; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_u: 667; GFX90A: ; %bb.0: 668; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 669; GFX90A-NEXT: ;;#ASMSTART 670; GFX90A-NEXT: ; def v[0:2] 671; GFX90A-NEXT: ;;#ASMEND 672; GFX90A-NEXT: v_mov_b32_e32 v3, 0 673; GFX90A-NEXT: ;;#ASMSTART 674; GFX90A-NEXT: ; def v[4:6] 675; GFX90A-NEXT: ;;#ASMEND 676; GFX90A-NEXT: v_mov_b32_e32 v0, v6 677; GFX90A-NEXT: v_mov_b32_e32 v1, v6 678; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 679; GFX90A-NEXT: s_waitcnt vmcnt(0) 680; GFX90A-NEXT: s_setpc_b64 s[30:31] 681; 682; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_u: 683; GFX940: ; %bb.0: 684; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 685; GFX940-NEXT: ;;#ASMSTART 686; GFX940-NEXT: ; def v[0:2] 687; GFX940-NEXT: ;;#ASMEND 688; GFX940-NEXT: v_mov_b32_e32 v3, 0 689; GFX940-NEXT: ;;#ASMSTART 690; GFX940-NEXT: ; def v[4:6] 691; GFX940-NEXT: ;;#ASMEND 692; GFX940-NEXT: s_nop 0 693; GFX940-NEXT: v_mov_b32_e32 v0, v6 694; GFX940-NEXT: v_mov_b32_e32 v1, v6 695; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 696; GFX940-NEXT: s_waitcnt vmcnt(0) 697; GFX940-NEXT: s_setpc_b64 s[30:31] 698 %vec0 = call <3 x i32> asm "; def $0", "=v"() 699 %vec1 = call <3 x i32> asm "; def $0", "=v"() 700 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 poison> 701 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 702 ret void 703} 704 705define void @v_shuffle_v4i32_v3i32__5_5_3_u(ptr addrspace(1) inreg %ptr) { 706; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u: 707; GFX900: ; %bb.0: 708; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 709; GFX900-NEXT: v_mov_b32_e32 v5, 0 710; GFX900-NEXT: ;;#ASMSTART 711; GFX900-NEXT: ; def v[2:4] 712; GFX900-NEXT: ;;#ASMEND 713; GFX900-NEXT: v_mov_b32_e32 v0, v4 714; GFX900-NEXT: v_mov_b32_e32 v1, v4 715; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 716; GFX900-NEXT: s_waitcnt vmcnt(0) 717; GFX900-NEXT: s_setpc_b64 s[30:31] 718; 719; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u: 720; GFX90A: ; %bb.0: 721; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 722; GFX90A-NEXT: v_mov_b32_e32 v5, 0 723; GFX90A-NEXT: ;;#ASMSTART 724; GFX90A-NEXT: ; def v[2:4] 725; GFX90A-NEXT: ;;#ASMEND 726; GFX90A-NEXT: v_mov_b32_e32 v0, v4 727; GFX90A-NEXT: v_mov_b32_e32 v1, v4 728; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 729; GFX90A-NEXT: s_waitcnt vmcnt(0) 730; GFX90A-NEXT: s_setpc_b64 s[30:31] 731; 732; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u: 733; GFX940: ; %bb.0: 734; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 735; GFX940-NEXT: v_mov_b32_e32 v5, 0 736; GFX940-NEXT: ;;#ASMSTART 737; GFX940-NEXT: ; def v[2:4] 738; GFX940-NEXT: ;;#ASMEND 739; GFX940-NEXT: s_nop 0 740; GFX940-NEXT: v_mov_b32_e32 v0, v4 741; GFX940-NEXT: v_mov_b32_e32 v1, v4 742; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 743; GFX940-NEXT: s_waitcnt vmcnt(0) 744; GFX940-NEXT: s_setpc_b64 s[30:31] 745 %vec0 = call <3 x i32> asm "; def $0", "=v"() 746 %vec1 = call <3 x i32> asm "; def $0", "=v"() 747 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison> 748 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 749 ret void 750} 751 752define void @v_shuffle_v4i32_v3i32__5_5_4_u(ptr addrspace(1) inreg %ptr) { 753; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u: 754; GFX900: ; %bb.0: 755; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 756; GFX900-NEXT: ;;#ASMSTART 757; GFX900-NEXT: ; def v[1:3] 758; GFX900-NEXT: ;;#ASMEND 759; GFX900-NEXT: v_mov_b32_e32 v4, 0 760; GFX900-NEXT: v_mov_b32_e32 v0, v3 761; GFX900-NEXT: v_mov_b32_e32 v1, v3 762; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 763; GFX900-NEXT: s_waitcnt vmcnt(0) 764; GFX900-NEXT: s_setpc_b64 s[30:31] 765; 766; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u: 767; GFX90A: ; %bb.0: 768; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 769; GFX90A-NEXT: ;;#ASMSTART 770; GFX90A-NEXT: ; def v[2:4] 771; GFX90A-NEXT: ;;#ASMEND 772; GFX90A-NEXT: v_mov_b32_e32 v5, 0 773; GFX90A-NEXT: v_mov_b32_e32 v0, v4 774; GFX90A-NEXT: v_mov_b32_e32 v1, v4 775; GFX90A-NEXT: v_mov_b32_e32 v2, v3 776; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 777; GFX90A-NEXT: s_waitcnt vmcnt(0) 778; GFX90A-NEXT: s_setpc_b64 s[30:31] 779; 780; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u: 781; GFX940: ; %bb.0: 782; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 783; GFX940-NEXT: ;;#ASMSTART 784; GFX940-NEXT: ; def v[2:4] 785; GFX940-NEXT: ;;#ASMEND 786; GFX940-NEXT: v_mov_b32_e32 v5, 0 787; GFX940-NEXT: v_mov_b32_e32 v0, v4 788; GFX940-NEXT: v_mov_b32_e32 v1, v4 789; GFX940-NEXT: v_mov_b32_e32 v2, v3 790; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 791; GFX940-NEXT: s_waitcnt vmcnt(0) 792; GFX940-NEXT: s_setpc_b64 s[30:31] 793 %vec0 = call <3 x i32> asm "; def $0", "=v"() 794 %vec1 = call <3 x i32> asm "; def $0", "=v"() 795 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison> 796 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 797 ret void 798} 799 800define void @v_shuffle_v4i32_v3i32__5_5_5_u(ptr addrspace(1) inreg %ptr) { 801; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_u: 802; GFX900: ; %bb.0: 803; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 804; GFX900-NEXT: ;;#ASMSTART 805; GFX900-NEXT: ; def v[0:2] 806; GFX900-NEXT: ;;#ASMEND 807; GFX900-NEXT: v_mov_b32_e32 v3, 0 808; GFX900-NEXT: v_mov_b32_e32 v0, v2 809; GFX900-NEXT: v_mov_b32_e32 v1, v2 810; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 811; GFX900-NEXT: s_waitcnt vmcnt(0) 812; GFX900-NEXT: s_setpc_b64 s[30:31] 813; 814; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_u: 815; GFX90A: ; %bb.0: 816; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 817; GFX90A-NEXT: ;;#ASMSTART 818; GFX90A-NEXT: ; def v[0:2] 819; GFX90A-NEXT: ;;#ASMEND 820; GFX90A-NEXT: v_mov_b32_e32 v3, 0 821; GFX90A-NEXT: v_mov_b32_e32 v0, v2 822; GFX90A-NEXT: v_mov_b32_e32 v1, v2 823; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 824; GFX90A-NEXT: s_waitcnt vmcnt(0) 825; GFX90A-NEXT: s_setpc_b64 s[30:31] 826; 827; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_u: 828; GFX940: ; %bb.0: 829; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 830; GFX940-NEXT: ;;#ASMSTART 831; GFX940-NEXT: ; def v[0:2] 832; GFX940-NEXT: ;;#ASMEND 833; GFX940-NEXT: v_mov_b32_e32 v3, 0 834; GFX940-NEXT: v_mov_b32_e32 v0, v2 835; GFX940-NEXT: v_mov_b32_e32 v1, v2 836; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 837; GFX940-NEXT: s_waitcnt vmcnt(0) 838; GFX940-NEXT: s_setpc_b64 s[30:31] 839 %vec0 = call <3 x i32> asm "; def $0", "=v"() 840 %vec1 = call <3 x i32> asm "; def $0", "=v"() 841 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison> 842 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 843 ret void 844} 845 846define void @v_shuffle_v4i32_v3i32__5_5_5_0(ptr addrspace(1) inreg %ptr) { 847; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_0: 848; GFX900: ; %bb.0: 849; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 850; GFX900-NEXT: ;;#ASMSTART 851; GFX900-NEXT: ; def v[0:2] 852; GFX900-NEXT: ;;#ASMEND 853; GFX900-NEXT: v_mov_b32_e32 v6, 0 854; GFX900-NEXT: v_mov_b32_e32 v0, v2 855; GFX900-NEXT: v_mov_b32_e32 v1, v2 856; GFX900-NEXT: ;;#ASMSTART 857; GFX900-NEXT: ; def v[3:5] 858; GFX900-NEXT: ;;#ASMEND 859; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 860; GFX900-NEXT: s_waitcnt vmcnt(0) 861; GFX900-NEXT: s_setpc_b64 s[30:31] 862; 863; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_0: 864; GFX90A: ; %bb.0: 865; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 866; GFX90A-NEXT: ;;#ASMSTART 867; GFX90A-NEXT: ; def v[0:2] 868; GFX90A-NEXT: ;;#ASMEND 869; GFX90A-NEXT: v_mov_b32_e32 v7, 0 870; GFX90A-NEXT: ;;#ASMSTART 871; GFX90A-NEXT: ; def v[4:6] 872; GFX90A-NEXT: ;;#ASMEND 873; GFX90A-NEXT: v_mov_b32_e32 v0, v2 874; GFX90A-NEXT: v_mov_b32_e32 v1, v2 875; GFX90A-NEXT: v_mov_b32_e32 v3, v4 876; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 877; GFX90A-NEXT: s_waitcnt vmcnt(0) 878; GFX90A-NEXT: s_setpc_b64 s[30:31] 879; 880; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_0: 881; GFX940: ; %bb.0: 882; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 883; GFX940-NEXT: ;;#ASMSTART 884; GFX940-NEXT: ; def v[0:2] 885; GFX940-NEXT: ;;#ASMEND 886; GFX940-NEXT: v_mov_b32_e32 v7, 0 887; GFX940-NEXT: ;;#ASMSTART 888; GFX940-NEXT: ; def v[4:6] 889; GFX940-NEXT: ;;#ASMEND 890; GFX940-NEXT: v_mov_b32_e32 v0, v2 891; GFX940-NEXT: v_mov_b32_e32 v1, v2 892; GFX940-NEXT: v_mov_b32_e32 v3, v4 893; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 894; GFX940-NEXT: s_waitcnt vmcnt(0) 895; GFX940-NEXT: s_setpc_b64 s[30:31] 896 %vec0 = call <3 x i32> asm "; def $0", "=v"() 897 %vec1 = call <3 x i32> asm "; def $0", "=v"() 898 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 0> 899 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 900 ret void 901} 902 903define void @v_shuffle_v4i32_v3i32__5_5_5_1(ptr addrspace(1) inreg %ptr) { 904; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_1: 905; GFX900: ; %bb.0: 906; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 907; GFX900-NEXT: ;;#ASMSTART 908; GFX900-NEXT: ; def v[2:4] 909; GFX900-NEXT: ;;#ASMEND 910; GFX900-NEXT: ;;#ASMSTART 911; GFX900-NEXT: ; def v[0:2] 912; GFX900-NEXT: ;;#ASMEND 913; GFX900-NEXT: v_mov_b32_e32 v5, 0 914; GFX900-NEXT: v_mov_b32_e32 v0, v2 915; GFX900-NEXT: v_mov_b32_e32 v1, v2 916; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 917; GFX900-NEXT: s_waitcnt vmcnt(0) 918; GFX900-NEXT: s_setpc_b64 s[30:31] 919; 920; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_1: 921; GFX90A: ; %bb.0: 922; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 923; GFX90A-NEXT: ;;#ASMSTART 924; GFX90A-NEXT: ; def v[2:4] 925; GFX90A-NEXT: ;;#ASMEND 926; GFX90A-NEXT: ;;#ASMSTART 927; GFX90A-NEXT: ; def v[0:2] 928; GFX90A-NEXT: ;;#ASMEND 929; GFX90A-NEXT: v_mov_b32_e32 v5, 0 930; GFX90A-NEXT: v_mov_b32_e32 v0, v2 931; GFX90A-NEXT: v_mov_b32_e32 v1, v2 932; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 933; GFX90A-NEXT: s_waitcnt vmcnt(0) 934; GFX90A-NEXT: s_setpc_b64 s[30:31] 935; 936; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_1: 937; GFX940: ; %bb.0: 938; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 939; GFX940-NEXT: ;;#ASMSTART 940; GFX940-NEXT: ; def v[2:4] 941; GFX940-NEXT: ;;#ASMEND 942; GFX940-NEXT: v_mov_b32_e32 v5, 0 943; GFX940-NEXT: ;;#ASMSTART 944; GFX940-NEXT: ; def v[0:2] 945; GFX940-NEXT: ;;#ASMEND 946; GFX940-NEXT: s_nop 0 947; GFX940-NEXT: v_mov_b32_e32 v0, v2 948; GFX940-NEXT: v_mov_b32_e32 v1, v2 949; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 950; GFX940-NEXT: s_waitcnt vmcnt(0) 951; GFX940-NEXT: s_setpc_b64 s[30:31] 952 %vec0 = call <3 x i32> asm "; def $0", "=v"() 953 %vec1 = call <3 x i32> asm "; def $0", "=v"() 954 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1> 955 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 956 ret void 957} 958 959define void @v_shuffle_v4i32_v3i32__5_5_5_2(ptr addrspace(1) inreg %ptr) { 960; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_2: 961; GFX900: ; %bb.0: 962; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 963; GFX900-NEXT: ;;#ASMSTART 964; GFX900-NEXT: ; def v[1:3] 965; GFX900-NEXT: ;;#ASMEND 966; GFX900-NEXT: ;;#ASMSTART 967; GFX900-NEXT: ; def v[0:2] 968; GFX900-NEXT: ;;#ASMEND 969; GFX900-NEXT: v_mov_b32_e32 v4, 0 970; GFX900-NEXT: v_mov_b32_e32 v0, v2 971; GFX900-NEXT: v_mov_b32_e32 v1, v2 972; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 973; GFX900-NEXT: s_waitcnt vmcnt(0) 974; GFX900-NEXT: s_setpc_b64 s[30:31] 975; 976; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_2: 977; GFX90A: ; %bb.0: 978; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 979; GFX90A-NEXT: ;;#ASMSTART 980; GFX90A-NEXT: ; def v[2:4] 981; GFX90A-NEXT: ;;#ASMEND 982; GFX90A-NEXT: ;;#ASMSTART 983; GFX90A-NEXT: ; def v[0:2] 984; GFX90A-NEXT: ;;#ASMEND 985; GFX90A-NEXT: v_mov_b32_e32 v5, 0 986; GFX90A-NEXT: v_mov_b32_e32 v0, v2 987; GFX90A-NEXT: v_mov_b32_e32 v1, v2 988; GFX90A-NEXT: v_mov_b32_e32 v3, v4 989; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 990; GFX90A-NEXT: s_waitcnt vmcnt(0) 991; GFX90A-NEXT: s_setpc_b64 s[30:31] 992; 993; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_2: 994; GFX940: ; %bb.0: 995; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 996; GFX940-NEXT: ;;#ASMSTART 997; GFX940-NEXT: ; def v[2:4] 998; GFX940-NEXT: ;;#ASMEND 999; GFX940-NEXT: v_mov_b32_e32 v5, 0 1000; GFX940-NEXT: ;;#ASMSTART 1001; GFX940-NEXT: ; def v[0:2] 1002; GFX940-NEXT: ;;#ASMEND 1003; GFX940-NEXT: v_mov_b32_e32 v3, v4 1004; GFX940-NEXT: v_mov_b32_e32 v0, v2 1005; GFX940-NEXT: v_mov_b32_e32 v1, v2 1006; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 1007; GFX940-NEXT: s_waitcnt vmcnt(0) 1008; GFX940-NEXT: s_setpc_b64 s[30:31] 1009 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1010 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1011 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2> 1012 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1013 ret void 1014} 1015 1016define void @v_shuffle_v4i32_v3i32__5_5_5_3(ptr addrspace(1) inreg %ptr) { 1017; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_3: 1018; GFX900: ; %bb.0: 1019; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1020; GFX900-NEXT: v_mov_b32_e32 v6, 0 1021; GFX900-NEXT: ;;#ASMSTART 1022; GFX900-NEXT: ; def v[3:5] 1023; GFX900-NEXT: ;;#ASMEND 1024; GFX900-NEXT: v_mov_b32_e32 v0, v5 1025; GFX900-NEXT: v_mov_b32_e32 v1, v5 1026; GFX900-NEXT: v_mov_b32_e32 v2, v5 1027; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1028; GFX900-NEXT: s_waitcnt vmcnt(0) 1029; GFX900-NEXT: s_setpc_b64 s[30:31] 1030; 1031; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_3: 1032; GFX90A: ; %bb.0: 1033; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1034; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1035; GFX90A-NEXT: ;;#ASMSTART 1036; GFX90A-NEXT: ; def v[4:6] 1037; GFX90A-NEXT: ;;#ASMEND 1038; GFX90A-NEXT: v_mov_b32_e32 v0, v6 1039; GFX90A-NEXT: v_mov_b32_e32 v1, v6 1040; GFX90A-NEXT: v_mov_b32_e32 v2, v6 1041; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1042; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1043; GFX90A-NEXT: s_waitcnt vmcnt(0) 1044; GFX90A-NEXT: s_setpc_b64 s[30:31] 1045; 1046; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_3: 1047; GFX940: ; %bb.0: 1048; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1049; GFX940-NEXT: v_mov_b32_e32 v7, 0 1050; GFX940-NEXT: ;;#ASMSTART 1051; GFX940-NEXT: ; def v[4:6] 1052; GFX940-NEXT: ;;#ASMEND 1053; GFX940-NEXT: s_nop 0 1054; GFX940-NEXT: v_mov_b32_e32 v0, v6 1055; GFX940-NEXT: v_mov_b32_e32 v1, v6 1056; GFX940-NEXT: v_mov_b32_e32 v2, v6 1057; GFX940-NEXT: v_mov_b32_e32 v3, v4 1058; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1059; GFX940-NEXT: s_waitcnt vmcnt(0) 1060; GFX940-NEXT: s_setpc_b64 s[30:31] 1061 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1062 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1063 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3> 1064 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1065 ret void 1066} 1067 1068define void @v_shuffle_v4i32_v3i32__5_5_5_4(ptr addrspace(1) inreg %ptr) { 1069; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_4: 1070; GFX900: ; %bb.0: 1071; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1072; GFX900-NEXT: ;;#ASMSTART 1073; GFX900-NEXT: ; def v[2:4] 1074; GFX900-NEXT: ;;#ASMEND 1075; GFX900-NEXT: v_mov_b32_e32 v5, 0 1076; GFX900-NEXT: v_mov_b32_e32 v0, v4 1077; GFX900-NEXT: v_mov_b32_e32 v1, v4 1078; GFX900-NEXT: v_mov_b32_e32 v2, v4 1079; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 1080; GFX900-NEXT: s_waitcnt vmcnt(0) 1081; GFX900-NEXT: s_setpc_b64 s[30:31] 1082; 1083; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_4: 1084; GFX90A: ; %bb.0: 1085; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1086; GFX90A-NEXT: ;;#ASMSTART 1087; GFX90A-NEXT: ; def v[2:4] 1088; GFX90A-NEXT: ;;#ASMEND 1089; GFX90A-NEXT: v_mov_b32_e32 v5, 0 1090; GFX90A-NEXT: v_mov_b32_e32 v0, v4 1091; GFX90A-NEXT: v_mov_b32_e32 v1, v4 1092; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1093; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 1094; GFX90A-NEXT: s_waitcnt vmcnt(0) 1095; GFX90A-NEXT: s_setpc_b64 s[30:31] 1096; 1097; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_4: 1098; GFX940: ; %bb.0: 1099; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1100; GFX940-NEXT: ;;#ASMSTART 1101; GFX940-NEXT: ; def v[2:4] 1102; GFX940-NEXT: ;;#ASMEND 1103; GFX940-NEXT: v_mov_b32_e32 v5, 0 1104; GFX940-NEXT: v_mov_b32_e32 v0, v4 1105; GFX940-NEXT: v_mov_b32_e32 v1, v4 1106; GFX940-NEXT: v_mov_b32_e32 v2, v4 1107; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 1108; GFX940-NEXT: s_waitcnt vmcnt(0) 1109; GFX940-NEXT: s_setpc_b64 s[30:31] 1110 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1111 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1112 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4> 1113 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1114 ret void 1115} 1116 1117define void @v_shuffle_v4i32_v3i32__5_5_5_5(ptr addrspace(1) inreg %ptr) { 1118; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_5: 1119; GFX900: ; %bb.0: 1120; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1121; GFX900-NEXT: ;;#ASMSTART 1122; GFX900-NEXT: ; def v[0:2] 1123; GFX900-NEXT: ;;#ASMEND 1124; GFX900-NEXT: v_mov_b32_e32 v4, 0 1125; GFX900-NEXT: v_mov_b32_e32 v0, v2 1126; GFX900-NEXT: v_mov_b32_e32 v1, v2 1127; GFX900-NEXT: v_mov_b32_e32 v3, v2 1128; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 1129; GFX900-NEXT: s_waitcnt vmcnt(0) 1130; GFX900-NEXT: s_setpc_b64 s[30:31] 1131; 1132; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_5: 1133; GFX90A: ; %bb.0: 1134; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1135; GFX90A-NEXT: ;;#ASMSTART 1136; GFX90A-NEXT: ; def v[0:2] 1137; GFX90A-NEXT: ;;#ASMEND 1138; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1139; GFX90A-NEXT: v_mov_b32_e32 v0, v2 1140; GFX90A-NEXT: v_mov_b32_e32 v1, v2 1141; GFX90A-NEXT: v_mov_b32_e32 v3, v2 1142; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 1143; GFX90A-NEXT: s_waitcnt vmcnt(0) 1144; GFX90A-NEXT: s_setpc_b64 s[30:31] 1145; 1146; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_5: 1147; GFX940: ; %bb.0: 1148; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1149; GFX940-NEXT: ;;#ASMSTART 1150; GFX940-NEXT: ; def v[0:2] 1151; GFX940-NEXT: ;;#ASMEND 1152; GFX940-NEXT: v_mov_b32_e32 v4, 0 1153; GFX940-NEXT: v_mov_b32_e32 v0, v2 1154; GFX940-NEXT: v_mov_b32_e32 v1, v2 1155; GFX940-NEXT: v_mov_b32_e32 v3, v2 1156; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 1157; GFX940-NEXT: s_waitcnt vmcnt(0) 1158; GFX940-NEXT: s_setpc_b64 s[30:31] 1159 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1160 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1161 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5> 1162 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1163 ret void 1164} 1165 1166define void @v_shuffle_v4i32_v3i32__u_0_0_0(ptr addrspace(1) inreg %ptr) { 1167; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0: 1168; GFX900: ; %bb.0: 1169; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1170; GFX900-NEXT: ;;#ASMSTART 1171; GFX900-NEXT: ; def v[1:3] 1172; GFX900-NEXT: ;;#ASMEND 1173; GFX900-NEXT: v_mov_b32_e32 v0, 0 1174; GFX900-NEXT: v_mov_b32_e32 v2, v1 1175; GFX900-NEXT: v_mov_b32_e32 v3, v1 1176; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] 1177; GFX900-NEXT: s_waitcnt vmcnt(0) 1178; GFX900-NEXT: s_setpc_b64 s[30:31] 1179; 1180; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0: 1181; GFX90A: ; %bb.0: 1182; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1183; GFX90A-NEXT: ;;#ASMSTART 1184; GFX90A-NEXT: ; def v[0:2] 1185; GFX90A-NEXT: ;;#ASMEND 1186; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1187; GFX90A-NEXT: v_mov_b32_e32 v1, v0 1188; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1189; GFX90A-NEXT: v_mov_b32_e32 v3, v0 1190; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 1191; GFX90A-NEXT: s_waitcnt vmcnt(0) 1192; GFX90A-NEXT: s_setpc_b64 s[30:31] 1193; 1194; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0: 1195; GFX940: ; %bb.0: 1196; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1197; GFX940-NEXT: ;;#ASMSTART 1198; GFX940-NEXT: ; def v[0:2] 1199; GFX940-NEXT: ;;#ASMEND 1200; GFX940-NEXT: v_mov_b32_e32 v4, 0 1201; GFX940-NEXT: v_mov_b32_e32 v1, v0 1202; GFX940-NEXT: v_mov_b32_e32 v2, v0 1203; GFX940-NEXT: v_mov_b32_e32 v3, v0 1204; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 1205; GFX940-NEXT: s_waitcnt vmcnt(0) 1206; GFX940-NEXT: s_setpc_b64 s[30:31] 1207 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1208 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0> 1209 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1210 ret void 1211} 1212 1213define void @v_shuffle_v4i32_v3i32__0_0_0_0(ptr addrspace(1) inreg %ptr) { 1214; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_0_0_0: 1215; GFX900: ; %bb.0: 1216; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1217; GFX900-NEXT: ;;#ASMSTART 1218; GFX900-NEXT: ; def v[0:2] 1219; GFX900-NEXT: ;;#ASMEND 1220; GFX900-NEXT: v_mov_b32_e32 v4, 0 1221; GFX900-NEXT: v_mov_b32_e32 v1, v0 1222; GFX900-NEXT: v_mov_b32_e32 v2, v0 1223; GFX900-NEXT: v_mov_b32_e32 v3, v0 1224; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 1225; GFX900-NEXT: s_waitcnt vmcnt(0) 1226; GFX900-NEXT: s_setpc_b64 s[30:31] 1227; 1228; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_0_0_0: 1229; GFX90A: ; %bb.0: 1230; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1231; GFX90A-NEXT: ;;#ASMSTART 1232; GFX90A-NEXT: ; def v[0:2] 1233; GFX90A-NEXT: ;;#ASMEND 1234; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1235; GFX90A-NEXT: v_mov_b32_e32 v1, v0 1236; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1237; GFX90A-NEXT: v_mov_b32_e32 v3, v0 1238; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 1239; GFX90A-NEXT: s_waitcnt vmcnt(0) 1240; GFX90A-NEXT: s_setpc_b64 s[30:31] 1241; 1242; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_0_0_0: 1243; GFX940: ; %bb.0: 1244; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1245; GFX940-NEXT: ;;#ASMSTART 1246; GFX940-NEXT: ; def v[0:2] 1247; GFX940-NEXT: ;;#ASMEND 1248; GFX940-NEXT: v_mov_b32_e32 v4, 0 1249; GFX940-NEXT: v_mov_b32_e32 v1, v0 1250; GFX940-NEXT: v_mov_b32_e32 v2, v0 1251; GFX940-NEXT: v_mov_b32_e32 v3, v0 1252; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 1253; GFX940-NEXT: s_waitcnt vmcnt(0) 1254; GFX940-NEXT: s_setpc_b64 s[30:31] 1255 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1256 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> zeroinitializer 1257 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1258 ret void 1259} 1260 1261define void @v_shuffle_v4i32_v3i32__1_0_0_0(ptr addrspace(1) inreg %ptr) { 1262; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_0_0_0: 1263; GFX900: ; %bb.0: 1264; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1265; GFX900-NEXT: ;;#ASMSTART 1266; GFX900-NEXT: ; def v[1:3] 1267; GFX900-NEXT: ;;#ASMEND 1268; GFX900-NEXT: v_mov_b32_e32 v4, 0 1269; GFX900-NEXT: v_mov_b32_e32 v0, v2 1270; GFX900-NEXT: v_mov_b32_e32 v2, v1 1271; GFX900-NEXT: v_mov_b32_e32 v3, v1 1272; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 1273; GFX900-NEXT: s_waitcnt vmcnt(0) 1274; GFX900-NEXT: s_setpc_b64 s[30:31] 1275; 1276; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_0_0_0: 1277; GFX90A: ; %bb.0: 1278; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1279; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1280; GFX90A-NEXT: ;;#ASMSTART 1281; GFX90A-NEXT: ; def v[4:6] 1282; GFX90A-NEXT: ;;#ASMEND 1283; GFX90A-NEXT: v_mov_b32_e32 v0, v5 1284; GFX90A-NEXT: v_mov_b32_e32 v1, v4 1285; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1286; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1287; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1288; GFX90A-NEXT: s_waitcnt vmcnt(0) 1289; GFX90A-NEXT: s_setpc_b64 s[30:31] 1290; 1291; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_0_0_0: 1292; GFX940: ; %bb.0: 1293; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1294; GFX940-NEXT: v_mov_b32_e32 v7, 0 1295; GFX940-NEXT: ;;#ASMSTART 1296; GFX940-NEXT: ; def v[4:6] 1297; GFX940-NEXT: ;;#ASMEND 1298; GFX940-NEXT: s_nop 0 1299; GFX940-NEXT: v_mov_b32_e32 v0, v5 1300; GFX940-NEXT: v_mov_b32_e32 v1, v4 1301; GFX940-NEXT: v_mov_b32_e32 v2, v4 1302; GFX940-NEXT: v_mov_b32_e32 v3, v4 1303; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1304; GFX940-NEXT: s_waitcnt vmcnt(0) 1305; GFX940-NEXT: s_setpc_b64 s[30:31] 1306 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1307 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 1308 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1309 ret void 1310} 1311 1312define void @v_shuffle_v4i32_v3i32__2_0_0_0(ptr addrspace(1) inreg %ptr) { 1313; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_0_0_0: 1314; GFX900: ; %bb.0: 1315; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1316; GFX900-NEXT: ;;#ASMSTART 1317; GFX900-NEXT: ; def v[1:3] 1318; GFX900-NEXT: ;;#ASMEND 1319; GFX900-NEXT: v_mov_b32_e32 v4, 0 1320; GFX900-NEXT: v_mov_b32_e32 v0, v3 1321; GFX900-NEXT: v_mov_b32_e32 v2, v1 1322; GFX900-NEXT: v_mov_b32_e32 v3, v1 1323; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 1324; GFX900-NEXT: s_waitcnt vmcnt(0) 1325; GFX900-NEXT: s_setpc_b64 s[30:31] 1326; 1327; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_0_0_0: 1328; GFX90A: ; %bb.0: 1329; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1330; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1331; GFX90A-NEXT: ;;#ASMSTART 1332; GFX90A-NEXT: ; def v[4:6] 1333; GFX90A-NEXT: ;;#ASMEND 1334; GFX90A-NEXT: v_mov_b32_e32 v0, v6 1335; GFX90A-NEXT: v_mov_b32_e32 v1, v4 1336; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1337; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1338; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1339; GFX90A-NEXT: s_waitcnt vmcnt(0) 1340; GFX90A-NEXT: s_setpc_b64 s[30:31] 1341; 1342; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_0_0_0: 1343; GFX940: ; %bb.0: 1344; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1345; GFX940-NEXT: v_mov_b32_e32 v7, 0 1346; GFX940-NEXT: ;;#ASMSTART 1347; GFX940-NEXT: ; def v[4:6] 1348; GFX940-NEXT: ;;#ASMEND 1349; GFX940-NEXT: s_nop 0 1350; GFX940-NEXT: v_mov_b32_e32 v0, v6 1351; GFX940-NEXT: v_mov_b32_e32 v1, v4 1352; GFX940-NEXT: v_mov_b32_e32 v2, v4 1353; GFX940-NEXT: v_mov_b32_e32 v3, v4 1354; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1355; GFX940-NEXT: s_waitcnt vmcnt(0) 1356; GFX940-NEXT: s_setpc_b64 s[30:31] 1357 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1358 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0> 1359 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1360 ret void 1361} 1362 1363define void @v_shuffle_v4i32_v3i32__3_0_0_0(ptr addrspace(1) inreg %ptr) { 1364; GFX900-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0: 1365; GFX900: ; %bb.0: 1366; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1367; GFX900-NEXT: ;;#ASMSTART 1368; GFX900-NEXT: ; def v[1:3] 1369; GFX900-NEXT: ;;#ASMEND 1370; GFX900-NEXT: v_mov_b32_e32 v0, 0 1371; GFX900-NEXT: v_mov_b32_e32 v2, v1 1372; GFX900-NEXT: v_mov_b32_e32 v3, v1 1373; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] 1374; GFX900-NEXT: s_waitcnt vmcnt(0) 1375; GFX900-NEXT: s_setpc_b64 s[30:31] 1376; 1377; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0: 1378; GFX90A: ; %bb.0: 1379; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1380; GFX90A-NEXT: ;;#ASMSTART 1381; GFX90A-NEXT: ; def v[0:2] 1382; GFX90A-NEXT: ;;#ASMEND 1383; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1384; GFX90A-NEXT: v_mov_b32_e32 v1, v0 1385; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1386; GFX90A-NEXT: v_mov_b32_e32 v3, v0 1387; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 1388; GFX90A-NEXT: s_waitcnt vmcnt(0) 1389; GFX90A-NEXT: s_setpc_b64 s[30:31] 1390; 1391; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0: 1392; GFX940: ; %bb.0: 1393; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1394; GFX940-NEXT: ;;#ASMSTART 1395; GFX940-NEXT: ; def v[0:2] 1396; GFX940-NEXT: ;;#ASMEND 1397; GFX940-NEXT: v_mov_b32_e32 v4, 0 1398; GFX940-NEXT: v_mov_b32_e32 v1, v0 1399; GFX940-NEXT: v_mov_b32_e32 v2, v0 1400; GFX940-NEXT: v_mov_b32_e32 v3, v0 1401; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 1402; GFX940-NEXT: s_waitcnt vmcnt(0) 1403; GFX940-NEXT: s_setpc_b64 s[30:31] 1404 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1405 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 1406 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1407 ret void 1408} 1409 1410define void @v_shuffle_v4i32_v3i32__4_0_0_0(ptr addrspace(1) inreg %ptr) { 1411; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_0_0_0: 1412; GFX900: ; %bb.0: 1413; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1414; GFX900-NEXT: ;;#ASMSTART 1415; GFX900-NEXT: ; def v[1:3] 1416; GFX900-NEXT: ;;#ASMEND 1417; GFX900-NEXT: ;;#ASMSTART 1418; GFX900-NEXT: ; def v[2:4] 1419; GFX900-NEXT: ;;#ASMEND 1420; GFX900-NEXT: v_mov_b32_e32 v5, 0 1421; GFX900-NEXT: v_mov_b32_e32 v0, v3 1422; GFX900-NEXT: v_mov_b32_e32 v2, v1 1423; GFX900-NEXT: v_mov_b32_e32 v3, v1 1424; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 1425; GFX900-NEXT: s_waitcnt vmcnt(0) 1426; GFX900-NEXT: s_setpc_b64 s[30:31] 1427; 1428; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_0_0_0: 1429; GFX90A: ; %bb.0: 1430; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1431; GFX90A-NEXT: ;;#ASMSTART 1432; GFX90A-NEXT: ; def v[0:2] 1433; GFX90A-NEXT: ;;#ASMEND 1434; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1435; GFX90A-NEXT: ;;#ASMSTART 1436; GFX90A-NEXT: ; def v[4:6] 1437; GFX90A-NEXT: ;;#ASMEND 1438; GFX90A-NEXT: v_mov_b32_e32 v0, v1 1439; GFX90A-NEXT: v_mov_b32_e32 v1, v4 1440; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1441; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1442; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1443; GFX90A-NEXT: s_waitcnt vmcnt(0) 1444; GFX90A-NEXT: s_setpc_b64 s[30:31] 1445; 1446; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_0_0_0: 1447; GFX940: ; %bb.0: 1448; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1449; GFX940-NEXT: ;;#ASMSTART 1450; GFX940-NEXT: ; def v[0:2] 1451; GFX940-NEXT: ;;#ASMEND 1452; GFX940-NEXT: v_mov_b32_e32 v7, 0 1453; GFX940-NEXT: ;;#ASMSTART 1454; GFX940-NEXT: ; def v[4:6] 1455; GFX940-NEXT: ;;#ASMEND 1456; GFX940-NEXT: v_mov_b32_e32 v0, v1 1457; GFX940-NEXT: v_mov_b32_e32 v1, v4 1458; GFX940-NEXT: v_mov_b32_e32 v2, v4 1459; GFX940-NEXT: v_mov_b32_e32 v3, v4 1460; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1461; GFX940-NEXT: s_waitcnt vmcnt(0) 1462; GFX940-NEXT: s_setpc_b64 s[30:31] 1463 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1464 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1465 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0> 1466 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1467 ret void 1468} 1469 1470define void @v_shuffle_v4i32_v3i32__5_0_0_0(ptr addrspace(1) inreg %ptr) { 1471; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_0_0: 1472; GFX900: ; %bb.0: 1473; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1474; GFX900-NEXT: ;;#ASMSTART 1475; GFX900-NEXT: ; def v[1:3] 1476; GFX900-NEXT: ;;#ASMEND 1477; GFX900-NEXT: ;;#ASMSTART 1478; GFX900-NEXT: ; def v[2:4] 1479; GFX900-NEXT: ;;#ASMEND 1480; GFX900-NEXT: v_mov_b32_e32 v5, 0 1481; GFX900-NEXT: v_mov_b32_e32 v0, v4 1482; GFX900-NEXT: v_mov_b32_e32 v2, v1 1483; GFX900-NEXT: v_mov_b32_e32 v3, v1 1484; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 1485; GFX900-NEXT: s_waitcnt vmcnt(0) 1486; GFX900-NEXT: s_setpc_b64 s[30:31] 1487; 1488; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_0_0: 1489; GFX90A: ; %bb.0: 1490; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1491; GFX90A-NEXT: ;;#ASMSTART 1492; GFX90A-NEXT: ; def v[0:2] 1493; GFX90A-NEXT: ;;#ASMEND 1494; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1495; GFX90A-NEXT: ;;#ASMSTART 1496; GFX90A-NEXT: ; def v[4:6] 1497; GFX90A-NEXT: ;;#ASMEND 1498; GFX90A-NEXT: v_mov_b32_e32 v0, v2 1499; GFX90A-NEXT: v_mov_b32_e32 v1, v4 1500; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1501; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1502; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1503; GFX90A-NEXT: s_waitcnt vmcnt(0) 1504; GFX90A-NEXT: s_setpc_b64 s[30:31] 1505; 1506; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_0_0: 1507; GFX940: ; %bb.0: 1508; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1509; GFX940-NEXT: ;;#ASMSTART 1510; GFX940-NEXT: ; def v[0:2] 1511; GFX940-NEXT: ;;#ASMEND 1512; GFX940-NEXT: v_mov_b32_e32 v7, 0 1513; GFX940-NEXT: ;;#ASMSTART 1514; GFX940-NEXT: ; def v[4:6] 1515; GFX940-NEXT: ;;#ASMEND 1516; GFX940-NEXT: v_mov_b32_e32 v0, v2 1517; GFX940-NEXT: v_mov_b32_e32 v1, v4 1518; GFX940-NEXT: v_mov_b32_e32 v2, v4 1519; GFX940-NEXT: v_mov_b32_e32 v3, v4 1520; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1521; GFX940-NEXT: s_waitcnt vmcnt(0) 1522; GFX940-NEXT: s_setpc_b64 s[30:31] 1523 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1524 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1525 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0> 1526 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1527 ret void 1528} 1529 1530define void @v_shuffle_v4i32_v3i32__5_u_0_0(ptr addrspace(1) inreg %ptr) { 1531; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_0_0: 1532; GFX900: ; %bb.0: 1533; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1534; GFX900-NEXT: ;;#ASMSTART 1535; GFX900-NEXT: ; def v[1:3] 1536; GFX900-NEXT: ;;#ASMEND 1537; GFX900-NEXT: ;;#ASMSTART 1538; GFX900-NEXT: ; def v[2:4] 1539; GFX900-NEXT: ;;#ASMEND 1540; GFX900-NEXT: v_mov_b32_e32 v5, 0 1541; GFX900-NEXT: v_mov_b32_e32 v0, v4 1542; GFX900-NEXT: v_mov_b32_e32 v2, v1 1543; GFX900-NEXT: v_mov_b32_e32 v3, v1 1544; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 1545; GFX900-NEXT: s_waitcnt vmcnt(0) 1546; GFX900-NEXT: s_setpc_b64 s[30:31] 1547; 1548; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_0_0: 1549; GFX90A: ; %bb.0: 1550; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1551; GFX90A-NEXT: ;;#ASMSTART 1552; GFX90A-NEXT: ; def v[0:2] 1553; GFX90A-NEXT: ;;#ASMEND 1554; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1555; GFX90A-NEXT: ;;#ASMSTART 1556; GFX90A-NEXT: ; def v[4:6] 1557; GFX90A-NEXT: ;;#ASMEND 1558; GFX90A-NEXT: v_mov_b32_e32 v0, v2 1559; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1560; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1561; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1562; GFX90A-NEXT: s_waitcnt vmcnt(0) 1563; GFX90A-NEXT: s_setpc_b64 s[30:31] 1564; 1565; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_0_0: 1566; GFX940: ; %bb.0: 1567; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1568; GFX940-NEXT: ;;#ASMSTART 1569; GFX940-NEXT: ; def v[0:2] 1570; GFX940-NEXT: ;;#ASMEND 1571; GFX940-NEXT: v_mov_b32_e32 v7, 0 1572; GFX940-NEXT: ;;#ASMSTART 1573; GFX940-NEXT: ; def v[4:6] 1574; GFX940-NEXT: ;;#ASMEND 1575; GFX940-NEXT: v_mov_b32_e32 v0, v2 1576; GFX940-NEXT: v_mov_b32_e32 v2, v4 1577; GFX940-NEXT: v_mov_b32_e32 v3, v4 1578; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1579; GFX940-NEXT: s_waitcnt vmcnt(0) 1580; GFX940-NEXT: s_setpc_b64 s[30:31] 1581 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1582 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1583 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0> 1584 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1585 ret void 1586} 1587 1588define void @v_shuffle_v4i32_v3i32__5_1_0_0(ptr addrspace(1) inreg %ptr) { 1589; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_0_0: 1590; GFX900: ; %bb.0: 1591; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1592; GFX900-NEXT: ;;#ASMSTART 1593; GFX900-NEXT: ; def v[0:2] 1594; GFX900-NEXT: ;;#ASMEND 1595; GFX900-NEXT: v_mov_b32_e32 v6, 0 1596; GFX900-NEXT: ;;#ASMSTART 1597; GFX900-NEXT: ; def v[3:5] 1598; GFX900-NEXT: ;;#ASMEND 1599; GFX900-NEXT: v_mov_b32_e32 v0, v2 1600; GFX900-NEXT: v_mov_b32_e32 v1, v4 1601; GFX900-NEXT: v_mov_b32_e32 v2, v3 1602; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1603; GFX900-NEXT: s_waitcnt vmcnt(0) 1604; GFX900-NEXT: s_setpc_b64 s[30:31] 1605; 1606; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_0_0: 1607; GFX90A: ; %bb.0: 1608; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1609; GFX90A-NEXT: ;;#ASMSTART 1610; GFX90A-NEXT: ; def v[0:2] 1611; GFX90A-NEXT: ;;#ASMEND 1612; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1613; GFX90A-NEXT: ;;#ASMSTART 1614; GFX90A-NEXT: ; def v[4:6] 1615; GFX90A-NEXT: ;;#ASMEND 1616; GFX90A-NEXT: v_mov_b32_e32 v0, v2 1617; GFX90A-NEXT: v_mov_b32_e32 v1, v5 1618; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1619; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1620; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1621; GFX90A-NEXT: s_waitcnt vmcnt(0) 1622; GFX90A-NEXT: s_setpc_b64 s[30:31] 1623; 1624; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_0_0: 1625; GFX940: ; %bb.0: 1626; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1627; GFX940-NEXT: ;;#ASMSTART 1628; GFX940-NEXT: ; def v[0:2] 1629; GFX940-NEXT: ;;#ASMEND 1630; GFX940-NEXT: v_mov_b32_e32 v7, 0 1631; GFX940-NEXT: ;;#ASMSTART 1632; GFX940-NEXT: ; def v[4:6] 1633; GFX940-NEXT: ;;#ASMEND 1634; GFX940-NEXT: v_mov_b32_e32 v0, v2 1635; GFX940-NEXT: v_mov_b32_e32 v1, v5 1636; GFX940-NEXT: v_mov_b32_e32 v2, v4 1637; GFX940-NEXT: v_mov_b32_e32 v3, v4 1638; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1639; GFX940-NEXT: s_waitcnt vmcnt(0) 1640; GFX940-NEXT: s_setpc_b64 s[30:31] 1641 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1642 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1643 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0> 1644 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1645 ret void 1646} 1647 1648define void @v_shuffle_v4i32_v3i32__5_2_0_0(ptr addrspace(1) inreg %ptr) { 1649; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_0_0: 1650; GFX900: ; %bb.0: 1651; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1652; GFX900-NEXT: ;;#ASMSTART 1653; GFX900-NEXT: ; def v[0:2] 1654; GFX900-NEXT: ;;#ASMEND 1655; GFX900-NEXT: v_mov_b32_e32 v6, 0 1656; GFX900-NEXT: ;;#ASMSTART 1657; GFX900-NEXT: ; def v[3:5] 1658; GFX900-NEXT: ;;#ASMEND 1659; GFX900-NEXT: v_mov_b32_e32 v0, v2 1660; GFX900-NEXT: v_mov_b32_e32 v1, v5 1661; GFX900-NEXT: v_mov_b32_e32 v2, v3 1662; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1663; GFX900-NEXT: s_waitcnt vmcnt(0) 1664; GFX900-NEXT: s_setpc_b64 s[30:31] 1665; 1666; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_0_0: 1667; GFX90A: ; %bb.0: 1668; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1669; GFX90A-NEXT: ;;#ASMSTART 1670; GFX90A-NEXT: ; def v[0:2] 1671; GFX90A-NEXT: ;;#ASMEND 1672; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1673; GFX90A-NEXT: ;;#ASMSTART 1674; GFX90A-NEXT: ; def v[4:6] 1675; GFX90A-NEXT: ;;#ASMEND 1676; GFX90A-NEXT: v_mov_b32_e32 v0, v2 1677; GFX90A-NEXT: v_mov_b32_e32 v1, v6 1678; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1679; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1680; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1681; GFX90A-NEXT: s_waitcnt vmcnt(0) 1682; GFX90A-NEXT: s_setpc_b64 s[30:31] 1683; 1684; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_0_0: 1685; GFX940: ; %bb.0: 1686; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1687; GFX940-NEXT: ;;#ASMSTART 1688; GFX940-NEXT: ; def v[0:2] 1689; GFX940-NEXT: ;;#ASMEND 1690; GFX940-NEXT: v_mov_b32_e32 v7, 0 1691; GFX940-NEXT: ;;#ASMSTART 1692; GFX940-NEXT: ; def v[4:6] 1693; GFX940-NEXT: ;;#ASMEND 1694; GFX940-NEXT: v_mov_b32_e32 v0, v2 1695; GFX940-NEXT: v_mov_b32_e32 v1, v6 1696; GFX940-NEXT: v_mov_b32_e32 v2, v4 1697; GFX940-NEXT: v_mov_b32_e32 v3, v4 1698; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1699; GFX940-NEXT: s_waitcnt vmcnt(0) 1700; GFX940-NEXT: s_setpc_b64 s[30:31] 1701 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1702 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1703 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0> 1704 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1705 ret void 1706} 1707 1708define void @v_shuffle_v4i32_v3i32__5_3_0_0(ptr addrspace(1) inreg %ptr) { 1709; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_0_0: 1710; GFX900: ; %bb.0: 1711; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1712; GFX900-NEXT: ;;#ASMSTART 1713; GFX900-NEXT: ; def v[1:3] 1714; GFX900-NEXT: ;;#ASMEND 1715; GFX900-NEXT: v_mov_b32_e32 v7, 0 1716; GFX900-NEXT: ;;#ASMSTART 1717; GFX900-NEXT: ; def v[4:6] 1718; GFX900-NEXT: ;;#ASMEND 1719; GFX900-NEXT: v_mov_b32_e32 v0, v3 1720; GFX900-NEXT: v_mov_b32_e32 v2, v4 1721; GFX900-NEXT: v_mov_b32_e32 v3, v4 1722; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1723; GFX900-NEXT: s_waitcnt vmcnt(0) 1724; GFX900-NEXT: s_setpc_b64 s[30:31] 1725; 1726; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_0_0: 1727; GFX90A: ; %bb.0: 1728; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1729; GFX90A-NEXT: ;;#ASMSTART 1730; GFX90A-NEXT: ; def v[4:6] 1731; GFX90A-NEXT: ;;#ASMEND 1732; GFX90A-NEXT: v_mov_b32_e32 v9, 0 1733; GFX90A-NEXT: ;;#ASMSTART 1734; GFX90A-NEXT: ; def v[6:8] 1735; GFX90A-NEXT: ;;#ASMEND 1736; GFX90A-NEXT: v_mov_b32_e32 v0, v8 1737; GFX90A-NEXT: v_mov_b32_e32 v1, v6 1738; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1739; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1740; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17] 1741; GFX90A-NEXT: s_waitcnt vmcnt(0) 1742; GFX90A-NEXT: s_setpc_b64 s[30:31] 1743; 1744; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_0_0: 1745; GFX940: ; %bb.0: 1746; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1747; GFX940-NEXT: ;;#ASMSTART 1748; GFX940-NEXT: ; def v[4:6] 1749; GFX940-NEXT: ;;#ASMEND 1750; GFX940-NEXT: v_mov_b32_e32 v9, 0 1751; GFX940-NEXT: ;;#ASMSTART 1752; GFX940-NEXT: ; def v[6:8] 1753; GFX940-NEXT: ;;#ASMEND 1754; GFX940-NEXT: v_mov_b32_e32 v2, v4 1755; GFX940-NEXT: v_mov_b32_e32 v0, v8 1756; GFX940-NEXT: v_mov_b32_e32 v1, v6 1757; GFX940-NEXT: v_mov_b32_e32 v3, v4 1758; GFX940-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1 1759; GFX940-NEXT: s_waitcnt vmcnt(0) 1760; GFX940-NEXT: s_setpc_b64 s[30:31] 1761 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1762 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1763 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0> 1764 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1765 ret void 1766} 1767 1768define void @v_shuffle_v4i32_v3i32__5_4_0_0(ptr addrspace(1) inreg %ptr) { 1769; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_0_0: 1770; GFX900: ; %bb.0: 1771; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1772; GFX900-NEXT: ;;#ASMSTART 1773; GFX900-NEXT: ; def v[0:2] 1774; GFX900-NEXT: ;;#ASMEND 1775; GFX900-NEXT: v_mov_b32_e32 v6, 0 1776; GFX900-NEXT: ;;#ASMSTART 1777; GFX900-NEXT: ; def v[3:5] 1778; GFX900-NEXT: ;;#ASMEND 1779; GFX900-NEXT: v_mov_b32_e32 v0, v2 1780; GFX900-NEXT: v_mov_b32_e32 v2, v3 1781; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1782; GFX900-NEXT: s_waitcnt vmcnt(0) 1783; GFX900-NEXT: s_setpc_b64 s[30:31] 1784; 1785; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_0_0: 1786; GFX90A: ; %bb.0: 1787; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1788; GFX90A-NEXT: ;;#ASMSTART 1789; GFX90A-NEXT: ; def v[0:2] 1790; GFX90A-NEXT: ;;#ASMEND 1791; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1792; GFX90A-NEXT: ;;#ASMSTART 1793; GFX90A-NEXT: ; def v[4:6] 1794; GFX90A-NEXT: ;;#ASMEND 1795; GFX90A-NEXT: v_mov_b32_e32 v0, v2 1796; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1797; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1798; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1799; GFX90A-NEXT: s_waitcnt vmcnt(0) 1800; GFX90A-NEXT: s_setpc_b64 s[30:31] 1801; 1802; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_0_0: 1803; GFX940: ; %bb.0: 1804; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1805; GFX940-NEXT: ;;#ASMSTART 1806; GFX940-NEXT: ; def v[0:2] 1807; GFX940-NEXT: ;;#ASMEND 1808; GFX940-NEXT: v_mov_b32_e32 v7, 0 1809; GFX940-NEXT: ;;#ASMSTART 1810; GFX940-NEXT: ; def v[4:6] 1811; GFX940-NEXT: ;;#ASMEND 1812; GFX940-NEXT: v_mov_b32_e32 v0, v2 1813; GFX940-NEXT: v_mov_b32_e32 v2, v4 1814; GFX940-NEXT: v_mov_b32_e32 v3, v4 1815; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1816; GFX940-NEXT: s_waitcnt vmcnt(0) 1817; GFX940-NEXT: s_setpc_b64 s[30:31] 1818 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1819 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1820 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0> 1821 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1822 ret void 1823} 1824 1825define void @v_shuffle_v4i32_v3i32__5_5_0_0(ptr addrspace(1) inreg %ptr) { 1826; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_0: 1827; GFX900: ; %bb.0: 1828; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1829; GFX900-NEXT: ;;#ASMSTART 1830; GFX900-NEXT: ; def v[0:2] 1831; GFX900-NEXT: ;;#ASMEND 1832; GFX900-NEXT: v_mov_b32_e32 v6, 0 1833; GFX900-NEXT: ;;#ASMSTART 1834; GFX900-NEXT: ; def v[3:5] 1835; GFX900-NEXT: ;;#ASMEND 1836; GFX900-NEXT: v_mov_b32_e32 v0, v2 1837; GFX900-NEXT: v_mov_b32_e32 v1, v2 1838; GFX900-NEXT: v_mov_b32_e32 v2, v3 1839; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1840; GFX900-NEXT: s_waitcnt vmcnt(0) 1841; GFX900-NEXT: s_setpc_b64 s[30:31] 1842; 1843; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_0: 1844; GFX90A: ; %bb.0: 1845; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1846; GFX90A-NEXT: ;;#ASMSTART 1847; GFX90A-NEXT: ; def v[0:2] 1848; GFX90A-NEXT: ;;#ASMEND 1849; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1850; GFX90A-NEXT: ;;#ASMSTART 1851; GFX90A-NEXT: ; def v[4:6] 1852; GFX90A-NEXT: ;;#ASMEND 1853; GFX90A-NEXT: v_mov_b32_e32 v0, v2 1854; GFX90A-NEXT: v_mov_b32_e32 v1, v2 1855; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1856; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1857; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1858; GFX90A-NEXT: s_waitcnt vmcnt(0) 1859; GFX90A-NEXT: s_setpc_b64 s[30:31] 1860; 1861; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_0: 1862; GFX940: ; %bb.0: 1863; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1864; GFX940-NEXT: ;;#ASMSTART 1865; GFX940-NEXT: ; def v[0:2] 1866; GFX940-NEXT: ;;#ASMEND 1867; GFX940-NEXT: v_mov_b32_e32 v7, 0 1868; GFX940-NEXT: ;;#ASMSTART 1869; GFX940-NEXT: ; def v[4:6] 1870; GFX940-NEXT: ;;#ASMEND 1871; GFX940-NEXT: v_mov_b32_e32 v0, v2 1872; GFX940-NEXT: v_mov_b32_e32 v1, v2 1873; GFX940-NEXT: v_mov_b32_e32 v2, v4 1874; GFX940-NEXT: v_mov_b32_e32 v3, v4 1875; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1876; GFX940-NEXT: s_waitcnt vmcnt(0) 1877; GFX940-NEXT: s_setpc_b64 s[30:31] 1878 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1879 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1880 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0> 1881 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1882 ret void 1883} 1884 1885define void @v_shuffle_v4i32_v3i32__5_5_u_0(ptr addrspace(1) inreg %ptr) { 1886; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_0: 1887; GFX900: ; %bb.0: 1888; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1889; GFX900-NEXT: ;;#ASMSTART 1890; GFX900-NEXT: ; def v[2:4] 1891; GFX900-NEXT: ;;#ASMEND 1892; GFX900-NEXT: ;;#ASMSTART 1893; GFX900-NEXT: ; def v[3:5] 1894; GFX900-NEXT: ;;#ASMEND 1895; GFX900-NEXT: v_mov_b32_e32 v6, 0 1896; GFX900-NEXT: v_mov_b32_e32 v0, v5 1897; GFX900-NEXT: v_mov_b32_e32 v1, v5 1898; GFX900-NEXT: v_mov_b32_e32 v3, v2 1899; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1900; GFX900-NEXT: s_waitcnt vmcnt(0) 1901; GFX900-NEXT: s_setpc_b64 s[30:31] 1902; 1903; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_0: 1904; GFX90A: ; %bb.0: 1905; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1906; GFX90A-NEXT: ;;#ASMSTART 1907; GFX90A-NEXT: ; def v[2:4] 1908; GFX90A-NEXT: ;;#ASMEND 1909; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1910; GFX90A-NEXT: ;;#ASMSTART 1911; GFX90A-NEXT: ; def v[4:6] 1912; GFX90A-NEXT: ;;#ASMEND 1913; GFX90A-NEXT: v_mov_b32_e32 v0, v6 1914; GFX90A-NEXT: v_mov_b32_e32 v1, v6 1915; GFX90A-NEXT: v_mov_b32_e32 v3, v2 1916; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1917; GFX90A-NEXT: s_waitcnt vmcnt(0) 1918; GFX90A-NEXT: s_setpc_b64 s[30:31] 1919; 1920; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_0: 1921; GFX940: ; %bb.0: 1922; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1923; GFX940-NEXT: ;;#ASMSTART 1924; GFX940-NEXT: ; def v[2:4] 1925; GFX940-NEXT: ;;#ASMEND 1926; GFX940-NEXT: v_mov_b32_e32 v7, 0 1927; GFX940-NEXT: ;;#ASMSTART 1928; GFX940-NEXT: ; def v[4:6] 1929; GFX940-NEXT: ;;#ASMEND 1930; GFX940-NEXT: v_mov_b32_e32 v3, v2 1931; GFX940-NEXT: v_mov_b32_e32 v0, v6 1932; GFX940-NEXT: v_mov_b32_e32 v1, v6 1933; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1934; GFX940-NEXT: s_waitcnt vmcnt(0) 1935; GFX940-NEXT: s_setpc_b64 s[30:31] 1936 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1937 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1938 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0> 1939 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1940 ret void 1941} 1942 1943define void @v_shuffle_v4i32_v3i32__5_5_1_0(ptr addrspace(1) inreg %ptr) { 1944; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_0: 1945; GFX900: ; %bb.0: 1946; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1947; GFX900-NEXT: ;;#ASMSTART 1948; GFX900-NEXT: ; def v[0:2] 1949; GFX900-NEXT: ;;#ASMEND 1950; GFX900-NEXT: v_mov_b32_e32 v6, 0 1951; GFX900-NEXT: ;;#ASMSTART 1952; GFX900-NEXT: ; def v[3:5] 1953; GFX900-NEXT: ;;#ASMEND 1954; GFX900-NEXT: v_mov_b32_e32 v0, v2 1955; GFX900-NEXT: v_mov_b32_e32 v1, v2 1956; GFX900-NEXT: v_mov_b32_e32 v2, v4 1957; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 1958; GFX900-NEXT: s_waitcnt vmcnt(0) 1959; GFX900-NEXT: s_setpc_b64 s[30:31] 1960; 1961; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_0: 1962; GFX90A: ; %bb.0: 1963; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1964; GFX90A-NEXT: ;;#ASMSTART 1965; GFX90A-NEXT: ; def v[0:2] 1966; GFX90A-NEXT: ;;#ASMEND 1967; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1968; GFX90A-NEXT: ;;#ASMSTART 1969; GFX90A-NEXT: ; def v[4:6] 1970; GFX90A-NEXT: ;;#ASMEND 1971; GFX90A-NEXT: v_mov_b32_e32 v0, v2 1972; GFX90A-NEXT: v_mov_b32_e32 v1, v2 1973; GFX90A-NEXT: v_mov_b32_e32 v2, v5 1974; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1975; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 1976; GFX90A-NEXT: s_waitcnt vmcnt(0) 1977; GFX90A-NEXT: s_setpc_b64 s[30:31] 1978; 1979; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_0: 1980; GFX940: ; %bb.0: 1981; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1982; GFX940-NEXT: ;;#ASMSTART 1983; GFX940-NEXT: ; def v[0:2] 1984; GFX940-NEXT: ;;#ASMEND 1985; GFX940-NEXT: v_mov_b32_e32 v7, 0 1986; GFX940-NEXT: ;;#ASMSTART 1987; GFX940-NEXT: ; def v[4:6] 1988; GFX940-NEXT: ;;#ASMEND 1989; GFX940-NEXT: v_mov_b32_e32 v0, v2 1990; GFX940-NEXT: v_mov_b32_e32 v1, v2 1991; GFX940-NEXT: v_mov_b32_e32 v2, v5 1992; GFX940-NEXT: v_mov_b32_e32 v3, v4 1993; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 1994; GFX940-NEXT: s_waitcnt vmcnt(0) 1995; GFX940-NEXT: s_setpc_b64 s[30:31] 1996 %vec0 = call <3 x i32> asm "; def $0", "=v"() 1997 %vec1 = call <3 x i32> asm "; def $0", "=v"() 1998 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0> 1999 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2000 ret void 2001} 2002 2003define void @v_shuffle_v4i32_v3i32__5_5_2_0(ptr addrspace(1) inreg %ptr) { 2004; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_0: 2005; GFX900: ; %bb.0: 2006; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2007; GFX900-NEXT: ;;#ASMSTART 2008; GFX900-NEXT: ; def v[0:2] 2009; GFX900-NEXT: ;;#ASMEND 2010; GFX900-NEXT: v_mov_b32_e32 v6, 0 2011; GFX900-NEXT: ;;#ASMSTART 2012; GFX900-NEXT: ; def v[3:5] 2013; GFX900-NEXT: ;;#ASMEND 2014; GFX900-NEXT: v_mov_b32_e32 v0, v2 2015; GFX900-NEXT: v_mov_b32_e32 v1, v2 2016; GFX900-NEXT: v_mov_b32_e32 v2, v5 2017; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2018; GFX900-NEXT: s_waitcnt vmcnt(0) 2019; GFX900-NEXT: s_setpc_b64 s[30:31] 2020; 2021; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_0: 2022; GFX90A: ; %bb.0: 2023; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2024; GFX90A-NEXT: ;;#ASMSTART 2025; GFX90A-NEXT: ; def v[0:2] 2026; GFX90A-NEXT: ;;#ASMEND 2027; GFX90A-NEXT: v_mov_b32_e32 v7, 0 2028; GFX90A-NEXT: ;;#ASMSTART 2029; GFX90A-NEXT: ; def v[4:6] 2030; GFX90A-NEXT: ;;#ASMEND 2031; GFX90A-NEXT: v_mov_b32_e32 v0, v2 2032; GFX90A-NEXT: v_mov_b32_e32 v1, v2 2033; GFX90A-NEXT: v_mov_b32_e32 v2, v6 2034; GFX90A-NEXT: v_mov_b32_e32 v3, v4 2035; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 2036; GFX90A-NEXT: s_waitcnt vmcnt(0) 2037; GFX90A-NEXT: s_setpc_b64 s[30:31] 2038; 2039; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_0: 2040; GFX940: ; %bb.0: 2041; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2042; GFX940-NEXT: ;;#ASMSTART 2043; GFX940-NEXT: ; def v[0:2] 2044; GFX940-NEXT: ;;#ASMEND 2045; GFX940-NEXT: v_mov_b32_e32 v7, 0 2046; GFX940-NEXT: ;;#ASMSTART 2047; GFX940-NEXT: ; def v[4:6] 2048; GFX940-NEXT: ;;#ASMEND 2049; GFX940-NEXT: v_mov_b32_e32 v0, v2 2050; GFX940-NEXT: v_mov_b32_e32 v1, v2 2051; GFX940-NEXT: v_mov_b32_e32 v2, v6 2052; GFX940-NEXT: v_mov_b32_e32 v3, v4 2053; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 2054; GFX940-NEXT: s_waitcnt vmcnt(0) 2055; GFX940-NEXT: s_setpc_b64 s[30:31] 2056 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2057 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2058 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0> 2059 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2060 ret void 2061} 2062 2063define void @v_shuffle_v4i32_v3i32__5_5_3_0(ptr addrspace(1) inreg %ptr) { 2064; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_0: 2065; GFX900: ; %bb.0: 2066; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2067; GFX900-NEXT: ;;#ASMSTART 2068; GFX900-NEXT: ; def v[3:5] 2069; GFX900-NEXT: ;;#ASMEND 2070; GFX900-NEXT: v_mov_b32_e32 v7, 0 2071; GFX900-NEXT: ;;#ASMSTART 2072; GFX900-NEXT: ; def v[4:6] 2073; GFX900-NEXT: ;;#ASMEND 2074; GFX900-NEXT: v_mov_b32_e32 v0, v6 2075; GFX900-NEXT: v_mov_b32_e32 v1, v6 2076; GFX900-NEXT: v_mov_b32_e32 v2, v4 2077; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 2078; GFX900-NEXT: s_waitcnt vmcnt(0) 2079; GFX900-NEXT: s_setpc_b64 s[30:31] 2080; 2081; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_0: 2082; GFX90A: ; %bb.0: 2083; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2084; GFX90A-NEXT: ;;#ASMSTART 2085; GFX90A-NEXT: ; def v[4:6] 2086; GFX90A-NEXT: ;;#ASMEND 2087; GFX90A-NEXT: v_mov_b32_e32 v9, 0 2088; GFX90A-NEXT: ;;#ASMSTART 2089; GFX90A-NEXT: ; def v[6:8] 2090; GFX90A-NEXT: ;;#ASMEND 2091; GFX90A-NEXT: v_mov_b32_e32 v0, v8 2092; GFX90A-NEXT: v_mov_b32_e32 v1, v8 2093; GFX90A-NEXT: v_mov_b32_e32 v2, v6 2094; GFX90A-NEXT: v_mov_b32_e32 v3, v4 2095; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17] 2096; GFX90A-NEXT: s_waitcnt vmcnt(0) 2097; GFX90A-NEXT: s_setpc_b64 s[30:31] 2098; 2099; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_0: 2100; GFX940: ; %bb.0: 2101; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2102; GFX940-NEXT: ;;#ASMSTART 2103; GFX940-NEXT: ; def v[4:6] 2104; GFX940-NEXT: ;;#ASMEND 2105; GFX940-NEXT: v_mov_b32_e32 v9, 0 2106; GFX940-NEXT: ;;#ASMSTART 2107; GFX940-NEXT: ; def v[6:8] 2108; GFX940-NEXT: ;;#ASMEND 2109; GFX940-NEXT: v_mov_b32_e32 v3, v4 2110; GFX940-NEXT: v_mov_b32_e32 v0, v8 2111; GFX940-NEXT: v_mov_b32_e32 v1, v8 2112; GFX940-NEXT: v_mov_b32_e32 v2, v6 2113; GFX940-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1 2114; GFX940-NEXT: s_waitcnt vmcnt(0) 2115; GFX940-NEXT: s_setpc_b64 s[30:31] 2116 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2117 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2118 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0> 2119 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2120 ret void 2121} 2122 2123define void @v_shuffle_v4i32_v3i32__5_5_4_0(ptr addrspace(1) inreg %ptr) { 2124; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_0: 2125; GFX900: ; %bb.0: 2126; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2127; GFX900-NEXT: ;;#ASMSTART 2128; GFX900-NEXT: ; def v[1:3] 2129; GFX900-NEXT: ;;#ASMEND 2130; GFX900-NEXT: v_mov_b32_e32 v7, 0 2131; GFX900-NEXT: ;;#ASMSTART 2132; GFX900-NEXT: ; def v[4:6] 2133; GFX900-NEXT: ;;#ASMEND 2134; GFX900-NEXT: v_mov_b32_e32 v0, v3 2135; GFX900-NEXT: v_mov_b32_e32 v1, v3 2136; GFX900-NEXT: v_mov_b32_e32 v3, v4 2137; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 2138; GFX900-NEXT: s_waitcnt vmcnt(0) 2139; GFX900-NEXT: s_setpc_b64 s[30:31] 2140; 2141; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_0: 2142; GFX90A: ; %bb.0: 2143; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2144; GFX90A-NEXT: ;;#ASMSTART 2145; GFX90A-NEXT: ; def v[4:6] 2146; GFX90A-NEXT: ;;#ASMEND 2147; GFX90A-NEXT: v_mov_b32_e32 v9, 0 2148; GFX90A-NEXT: ;;#ASMSTART 2149; GFX90A-NEXT: ; def v[6:8] 2150; GFX90A-NEXT: ;;#ASMEND 2151; GFX90A-NEXT: v_mov_b32_e32 v0, v8 2152; GFX90A-NEXT: v_mov_b32_e32 v1, v8 2153; GFX90A-NEXT: v_mov_b32_e32 v2, v7 2154; GFX90A-NEXT: v_mov_b32_e32 v3, v4 2155; GFX90A-NEXT: global_store_dwordx4 v9, v[0:3], s[16:17] 2156; GFX90A-NEXT: s_waitcnt vmcnt(0) 2157; GFX90A-NEXT: s_setpc_b64 s[30:31] 2158; 2159; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_0: 2160; GFX940: ; %bb.0: 2161; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2162; GFX940-NEXT: ;;#ASMSTART 2163; GFX940-NEXT: ; def v[4:6] 2164; GFX940-NEXT: ;;#ASMEND 2165; GFX940-NEXT: v_mov_b32_e32 v9, 0 2166; GFX940-NEXT: ;;#ASMSTART 2167; GFX940-NEXT: ; def v[6:8] 2168; GFX940-NEXT: ;;#ASMEND 2169; GFX940-NEXT: v_mov_b32_e32 v3, v4 2170; GFX940-NEXT: v_mov_b32_e32 v0, v8 2171; GFX940-NEXT: v_mov_b32_e32 v1, v8 2172; GFX940-NEXT: v_mov_b32_e32 v2, v7 2173; GFX940-NEXT: global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1 2174; GFX940-NEXT: s_waitcnt vmcnt(0) 2175; GFX940-NEXT: s_setpc_b64 s[30:31] 2176 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2177 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2178 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0> 2179 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2180 ret void 2181} 2182 2183define void @v_shuffle_v4i32_v3i32__u_1_1_1(ptr addrspace(1) inreg %ptr) { 2184; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_1_1_1: 2185; GFX900: ; %bb.0: 2186; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2187; GFX900-NEXT: ;;#ASMSTART 2188; GFX900-NEXT: ; def v[0:2] 2189; GFX900-NEXT: ;;#ASMEND 2190; GFX900-NEXT: v_mov_b32_e32 v4, 0 2191; GFX900-NEXT: v_mov_b32_e32 v2, v1 2192; GFX900-NEXT: v_mov_b32_e32 v3, v1 2193; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 2194; GFX900-NEXT: s_waitcnt vmcnt(0) 2195; GFX900-NEXT: s_setpc_b64 s[30:31] 2196; 2197; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_1_1_1: 2198; GFX90A: ; %bb.0: 2199; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2200; GFX90A-NEXT: ;;#ASMSTART 2201; GFX90A-NEXT: ; def v[0:2] 2202; GFX90A-NEXT: ;;#ASMEND 2203; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2204; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2205; GFX90A-NEXT: v_mov_b32_e32 v3, v1 2206; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 2207; GFX90A-NEXT: s_waitcnt vmcnt(0) 2208; GFX90A-NEXT: s_setpc_b64 s[30:31] 2209; 2210; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_1_1_1: 2211; GFX940: ; %bb.0: 2212; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2213; GFX940-NEXT: ;;#ASMSTART 2214; GFX940-NEXT: ; def v[0:2] 2215; GFX940-NEXT: ;;#ASMEND 2216; GFX940-NEXT: v_mov_b32_e32 v4, 0 2217; GFX940-NEXT: v_mov_b32_e32 v2, v1 2218; GFX940-NEXT: v_mov_b32_e32 v3, v1 2219; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 2220; GFX940-NEXT: s_waitcnt vmcnt(0) 2221; GFX940-NEXT: s_setpc_b64 s[30:31] 2222 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2223 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1> 2224 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2225 ret void 2226} 2227 2228define void @v_shuffle_v4i32_v3i32__0_1_1_1(ptr addrspace(1) inreg %ptr) { 2229; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_1_1_1: 2230; GFX900: ; %bb.0: 2231; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2232; GFX900-NEXT: ;;#ASMSTART 2233; GFX900-NEXT: ; def v[0:2] 2234; GFX900-NEXT: ;;#ASMEND 2235; GFX900-NEXT: v_mov_b32_e32 v4, 0 2236; GFX900-NEXT: v_mov_b32_e32 v2, v1 2237; GFX900-NEXT: v_mov_b32_e32 v3, v1 2238; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 2239; GFX900-NEXT: s_waitcnt vmcnt(0) 2240; GFX900-NEXT: s_setpc_b64 s[30:31] 2241; 2242; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_1_1_1: 2243; GFX90A: ; %bb.0: 2244; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2245; GFX90A-NEXT: ;;#ASMSTART 2246; GFX90A-NEXT: ; def v[0:2] 2247; GFX90A-NEXT: ;;#ASMEND 2248; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2249; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2250; GFX90A-NEXT: v_mov_b32_e32 v3, v1 2251; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 2252; GFX90A-NEXT: s_waitcnt vmcnt(0) 2253; GFX90A-NEXT: s_setpc_b64 s[30:31] 2254; 2255; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_1_1_1: 2256; GFX940: ; %bb.0: 2257; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2258; GFX940-NEXT: ;;#ASMSTART 2259; GFX940-NEXT: ; def v[0:2] 2260; GFX940-NEXT: ;;#ASMEND 2261; GFX940-NEXT: v_mov_b32_e32 v4, 0 2262; GFX940-NEXT: v_mov_b32_e32 v2, v1 2263; GFX940-NEXT: v_mov_b32_e32 v3, v1 2264; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 2265; GFX940-NEXT: s_waitcnt vmcnt(0) 2266; GFX940-NEXT: s_setpc_b64 s[30:31] 2267 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2268 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> 2269 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2270 ret void 2271} 2272 2273define void @v_shuffle_v4i32_v3i32__1_1_1_1(ptr addrspace(1) inreg %ptr) { 2274; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_1_1_1: 2275; GFX900: ; %bb.0: 2276; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2277; GFX900-NEXT: ;;#ASMSTART 2278; GFX900-NEXT: ; def v[0:2] 2279; GFX900-NEXT: ;;#ASMEND 2280; GFX900-NEXT: v_mov_b32_e32 v4, 0 2281; GFX900-NEXT: v_mov_b32_e32 v0, v1 2282; GFX900-NEXT: v_mov_b32_e32 v2, v1 2283; GFX900-NEXT: v_mov_b32_e32 v3, v1 2284; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 2285; GFX900-NEXT: s_waitcnt vmcnt(0) 2286; GFX900-NEXT: s_setpc_b64 s[30:31] 2287; 2288; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_1_1_1: 2289; GFX90A: ; %bb.0: 2290; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2291; GFX90A-NEXT: ;;#ASMSTART 2292; GFX90A-NEXT: ; def v[0:2] 2293; GFX90A-NEXT: ;;#ASMEND 2294; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2295; GFX90A-NEXT: v_mov_b32_e32 v0, v1 2296; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2297; GFX90A-NEXT: v_mov_b32_e32 v3, v1 2298; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 2299; GFX90A-NEXT: s_waitcnt vmcnt(0) 2300; GFX90A-NEXT: s_setpc_b64 s[30:31] 2301; 2302; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_1_1_1: 2303; GFX940: ; %bb.0: 2304; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2305; GFX940-NEXT: ;;#ASMSTART 2306; GFX940-NEXT: ; def v[0:2] 2307; GFX940-NEXT: ;;#ASMEND 2308; GFX940-NEXT: v_mov_b32_e32 v4, 0 2309; GFX940-NEXT: v_mov_b32_e32 v0, v1 2310; GFX940-NEXT: v_mov_b32_e32 v2, v1 2311; GFX940-NEXT: v_mov_b32_e32 v3, v1 2312; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 2313; GFX940-NEXT: s_waitcnt vmcnt(0) 2314; GFX940-NEXT: s_setpc_b64 s[30:31] 2315 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2316 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2317 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2318 ret void 2319} 2320 2321define void @v_shuffle_v4i32_v3i32__2_1_1_1(ptr addrspace(1) inreg %ptr) { 2322; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_1_1_1: 2323; GFX900: ; %bb.0: 2324; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2325; GFX900-NEXT: ;;#ASMSTART 2326; GFX900-NEXT: ; def v[0:2] 2327; GFX900-NEXT: ;;#ASMEND 2328; GFX900-NEXT: v_mov_b32_e32 v4, 0 2329; GFX900-NEXT: v_mov_b32_e32 v0, v2 2330; GFX900-NEXT: v_mov_b32_e32 v2, v1 2331; GFX900-NEXT: v_mov_b32_e32 v3, v1 2332; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 2333; GFX900-NEXT: s_waitcnt vmcnt(0) 2334; GFX900-NEXT: s_setpc_b64 s[30:31] 2335; 2336; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_1_1_1: 2337; GFX90A: ; %bb.0: 2338; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2339; GFX90A-NEXT: ;;#ASMSTART 2340; GFX90A-NEXT: ; def v[0:2] 2341; GFX90A-NEXT: ;;#ASMEND 2342; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2343; GFX90A-NEXT: v_mov_b32_e32 v0, v2 2344; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2345; GFX90A-NEXT: v_mov_b32_e32 v3, v1 2346; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 2347; GFX90A-NEXT: s_waitcnt vmcnt(0) 2348; GFX90A-NEXT: s_setpc_b64 s[30:31] 2349; 2350; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_1_1_1: 2351; GFX940: ; %bb.0: 2352; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2353; GFX940-NEXT: ;;#ASMSTART 2354; GFX940-NEXT: ; def v[0:2] 2355; GFX940-NEXT: ;;#ASMEND 2356; GFX940-NEXT: v_mov_b32_e32 v4, 0 2357; GFX940-NEXT: v_mov_b32_e32 v0, v2 2358; GFX940-NEXT: v_mov_b32_e32 v2, v1 2359; GFX940-NEXT: v_mov_b32_e32 v3, v1 2360; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 2361; GFX940-NEXT: s_waitcnt vmcnt(0) 2362; GFX940-NEXT: s_setpc_b64 s[30:31] 2363 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2364 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 2365 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2366 ret void 2367} 2368 2369define void @v_shuffle_v4i32_v3i32__3_1_1_1(ptr addrspace(1) inreg %ptr) { 2370; GFX900-LABEL: v_shuffle_v4i32_v3i32__3_1_1_1: 2371; GFX900: ; %bb.0: 2372; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2373; GFX900-NEXT: ;;#ASMSTART 2374; GFX900-NEXT: ; def v[0:2] 2375; GFX900-NEXT: ;;#ASMEND 2376; GFX900-NEXT: v_mov_b32_e32 v4, 0 2377; GFX900-NEXT: v_mov_b32_e32 v2, v1 2378; GFX900-NEXT: v_mov_b32_e32 v3, v1 2379; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 2380; GFX900-NEXT: s_waitcnt vmcnt(0) 2381; GFX900-NEXT: s_setpc_b64 s[30:31] 2382; 2383; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_1_1_1: 2384; GFX90A: ; %bb.0: 2385; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2386; GFX90A-NEXT: ;;#ASMSTART 2387; GFX90A-NEXT: ; def v[0:2] 2388; GFX90A-NEXT: ;;#ASMEND 2389; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2390; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2391; GFX90A-NEXT: v_mov_b32_e32 v3, v1 2392; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 2393; GFX90A-NEXT: s_waitcnt vmcnt(0) 2394; GFX90A-NEXT: s_setpc_b64 s[30:31] 2395; 2396; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_1_1_1: 2397; GFX940: ; %bb.0: 2398; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2399; GFX940-NEXT: ;;#ASMSTART 2400; GFX940-NEXT: ; def v[0:2] 2401; GFX940-NEXT: ;;#ASMEND 2402; GFX940-NEXT: v_mov_b32_e32 v4, 0 2403; GFX940-NEXT: v_mov_b32_e32 v2, v1 2404; GFX940-NEXT: v_mov_b32_e32 v3, v1 2405; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 2406; GFX940-NEXT: s_waitcnt vmcnt(0) 2407; GFX940-NEXT: s_setpc_b64 s[30:31] 2408 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2409 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 2410 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2411 ret void 2412} 2413 2414define void @v_shuffle_v4i32_v3i32__4_1_1_1(ptr addrspace(1) inreg %ptr) { 2415; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_1_1_1: 2416; GFX900: ; %bb.0: 2417; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2418; GFX900-NEXT: ;;#ASMSTART 2419; GFX900-NEXT: ; def v[0:2] 2420; GFX900-NEXT: ;;#ASMEND 2421; GFX900-NEXT: ;;#ASMSTART 2422; GFX900-NEXT: ; def v[2:4] 2423; GFX900-NEXT: ;;#ASMEND 2424; GFX900-NEXT: v_mov_b32_e32 v5, 0 2425; GFX900-NEXT: v_mov_b32_e32 v0, v3 2426; GFX900-NEXT: v_mov_b32_e32 v2, v1 2427; GFX900-NEXT: v_mov_b32_e32 v3, v1 2428; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 2429; GFX900-NEXT: s_waitcnt vmcnt(0) 2430; GFX900-NEXT: s_setpc_b64 s[30:31] 2431; 2432; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_1_1_1: 2433; GFX90A: ; %bb.0: 2434; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2435; GFX90A-NEXT: ;;#ASMSTART 2436; GFX90A-NEXT: ; def v[0:2] 2437; GFX90A-NEXT: ;;#ASMEND 2438; GFX90A-NEXT: ;;#ASMSTART 2439; GFX90A-NEXT: ; def v[2:4] 2440; GFX90A-NEXT: ;;#ASMEND 2441; GFX90A-NEXT: v_mov_b32_e32 v5, 0 2442; GFX90A-NEXT: v_mov_b32_e32 v0, v3 2443; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2444; GFX90A-NEXT: v_mov_b32_e32 v3, v1 2445; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 2446; GFX90A-NEXT: s_waitcnt vmcnt(0) 2447; GFX90A-NEXT: s_setpc_b64 s[30:31] 2448; 2449; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_1_1_1: 2450; GFX940: ; %bb.0: 2451; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2452; GFX940-NEXT: ;;#ASMSTART 2453; GFX940-NEXT: ; def v[0:2] 2454; GFX940-NEXT: ;;#ASMEND 2455; GFX940-NEXT: v_mov_b32_e32 v5, 0 2456; GFX940-NEXT: ;;#ASMSTART 2457; GFX940-NEXT: ; def v[2:4] 2458; GFX940-NEXT: ;;#ASMEND 2459; GFX940-NEXT: s_nop 0 2460; GFX940-NEXT: v_mov_b32_e32 v0, v3 2461; GFX940-NEXT: v_mov_b32_e32 v2, v1 2462; GFX940-NEXT: v_mov_b32_e32 v3, v1 2463; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 2464; GFX940-NEXT: s_waitcnt vmcnt(0) 2465; GFX940-NEXT: s_setpc_b64 s[30:31] 2466 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2467 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2468 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1> 2469 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2470 ret void 2471} 2472 2473define void @v_shuffle_v4i32_v3i32__5_1_1_1(ptr addrspace(1) inreg %ptr) { 2474; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_1_1: 2475; GFX900: ; %bb.0: 2476; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2477; GFX900-NEXT: ;;#ASMSTART 2478; GFX900-NEXT: ; def v[0:2] 2479; GFX900-NEXT: ;;#ASMEND 2480; GFX900-NEXT: ;;#ASMSTART 2481; GFX900-NEXT: ; def v[2:4] 2482; GFX900-NEXT: ;;#ASMEND 2483; GFX900-NEXT: v_mov_b32_e32 v5, 0 2484; GFX900-NEXT: v_mov_b32_e32 v0, v4 2485; GFX900-NEXT: v_mov_b32_e32 v2, v1 2486; GFX900-NEXT: v_mov_b32_e32 v3, v1 2487; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 2488; GFX900-NEXT: s_waitcnt vmcnt(0) 2489; GFX900-NEXT: s_setpc_b64 s[30:31] 2490; 2491; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_1_1: 2492; GFX90A: ; %bb.0: 2493; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2494; GFX90A-NEXT: ;;#ASMSTART 2495; GFX90A-NEXT: ; def v[0:2] 2496; GFX90A-NEXT: ;;#ASMEND 2497; GFX90A-NEXT: ;;#ASMSTART 2498; GFX90A-NEXT: ; def v[2:4] 2499; GFX90A-NEXT: ;;#ASMEND 2500; GFX90A-NEXT: v_mov_b32_e32 v5, 0 2501; GFX90A-NEXT: v_mov_b32_e32 v0, v4 2502; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2503; GFX90A-NEXT: v_mov_b32_e32 v3, v1 2504; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 2505; GFX90A-NEXT: s_waitcnt vmcnt(0) 2506; GFX90A-NEXT: s_setpc_b64 s[30:31] 2507; 2508; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_1_1: 2509; GFX940: ; %bb.0: 2510; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2511; GFX940-NEXT: ;;#ASMSTART 2512; GFX940-NEXT: ; def v[0:2] 2513; GFX940-NEXT: ;;#ASMEND 2514; GFX940-NEXT: v_mov_b32_e32 v5, 0 2515; GFX940-NEXT: ;;#ASMSTART 2516; GFX940-NEXT: ; def v[2:4] 2517; GFX940-NEXT: ;;#ASMEND 2518; GFX940-NEXT: s_nop 0 2519; GFX940-NEXT: v_mov_b32_e32 v0, v4 2520; GFX940-NEXT: v_mov_b32_e32 v2, v1 2521; GFX940-NEXT: v_mov_b32_e32 v3, v1 2522; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 2523; GFX940-NEXT: s_waitcnt vmcnt(0) 2524; GFX940-NEXT: s_setpc_b64 s[30:31] 2525 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2526 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2527 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1> 2528 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2529 ret void 2530} 2531 2532define void @v_shuffle_v4i32_v3i32__5_u_1_1(ptr addrspace(1) inreg %ptr) { 2533; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_1_1: 2534; GFX900: ; %bb.0: 2535; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2536; GFX900-NEXT: ;;#ASMSTART 2537; GFX900-NEXT: ; def v[1:3] 2538; GFX900-NEXT: ;;#ASMEND 2539; GFX900-NEXT: ;;#ASMSTART 2540; GFX900-NEXT: ; def v[3:5] 2541; GFX900-NEXT: ;;#ASMEND 2542; GFX900-NEXT: v_mov_b32_e32 v6, 0 2543; GFX900-NEXT: v_mov_b32_e32 v0, v5 2544; GFX900-NEXT: v_mov_b32_e32 v3, v2 2545; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2546; GFX900-NEXT: s_waitcnt vmcnt(0) 2547; GFX900-NEXT: s_setpc_b64 s[30:31] 2548; 2549; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_1_1: 2550; GFX90A: ; %bb.0: 2551; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2552; GFX90A-NEXT: ;;#ASMSTART 2553; GFX90A-NEXT: ; def v[0:2] 2554; GFX90A-NEXT: ;;#ASMEND 2555; GFX90A-NEXT: ;;#ASMSTART 2556; GFX90A-NEXT: ; def v[2:4] 2557; GFX90A-NEXT: ;;#ASMEND 2558; GFX90A-NEXT: v_mov_b32_e32 v5, 0 2559; GFX90A-NEXT: v_mov_b32_e32 v0, v4 2560; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2561; GFX90A-NEXT: v_mov_b32_e32 v3, v1 2562; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 2563; GFX90A-NEXT: s_waitcnt vmcnt(0) 2564; GFX90A-NEXT: s_setpc_b64 s[30:31] 2565; 2566; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_1_1: 2567; GFX940: ; %bb.0: 2568; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2569; GFX940-NEXT: ;;#ASMSTART 2570; GFX940-NEXT: ; def v[0:2] 2571; GFX940-NEXT: ;;#ASMEND 2572; GFX940-NEXT: v_mov_b32_e32 v5, 0 2573; GFX940-NEXT: ;;#ASMSTART 2574; GFX940-NEXT: ; def v[2:4] 2575; GFX940-NEXT: ;;#ASMEND 2576; GFX940-NEXT: s_nop 0 2577; GFX940-NEXT: v_mov_b32_e32 v0, v4 2578; GFX940-NEXT: v_mov_b32_e32 v2, v1 2579; GFX940-NEXT: v_mov_b32_e32 v3, v1 2580; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 2581; GFX940-NEXT: s_waitcnt vmcnt(0) 2582; GFX940-NEXT: s_setpc_b64 s[30:31] 2583 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2584 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2585 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1> 2586 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2587 ret void 2588} 2589 2590define void @v_shuffle_v4i32_v3i32__5_0_1_1(ptr addrspace(1) inreg %ptr) { 2591; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_1_1: 2592; GFX900: ; %bb.0: 2593; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2594; GFX900-NEXT: ;;#ASMSTART 2595; GFX900-NEXT: ; def v[1:3] 2596; GFX900-NEXT: ;;#ASMEND 2597; GFX900-NEXT: ;;#ASMSTART 2598; GFX900-NEXT: ; def v[3:5] 2599; GFX900-NEXT: ;;#ASMEND 2600; GFX900-NEXT: v_mov_b32_e32 v6, 0 2601; GFX900-NEXT: v_mov_b32_e32 v0, v5 2602; GFX900-NEXT: v_mov_b32_e32 v3, v2 2603; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2604; GFX900-NEXT: s_waitcnt vmcnt(0) 2605; GFX900-NEXT: s_setpc_b64 s[30:31] 2606; 2607; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_1_1: 2608; GFX90A: ; %bb.0: 2609; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2610; GFX90A-NEXT: ;;#ASMSTART 2611; GFX90A-NEXT: ; def v[2:4] 2612; GFX90A-NEXT: ;;#ASMEND 2613; GFX90A-NEXT: v_mov_b32_e32 v7, 0 2614; GFX90A-NEXT: ;;#ASMSTART 2615; GFX90A-NEXT: ; def v[4:6] 2616; GFX90A-NEXT: ;;#ASMEND 2617; GFX90A-NEXT: v_mov_b32_e32 v0, v6 2618; GFX90A-NEXT: v_mov_b32_e32 v1, v2 2619; GFX90A-NEXT: v_mov_b32_e32 v2, v3 2620; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 2621; GFX90A-NEXT: s_waitcnt vmcnt(0) 2622; GFX90A-NEXT: s_setpc_b64 s[30:31] 2623; 2624; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_1_1: 2625; GFX940: ; %bb.0: 2626; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2627; GFX940-NEXT: ;;#ASMSTART 2628; GFX940-NEXT: ; def v[2:4] 2629; GFX940-NEXT: ;;#ASMEND 2630; GFX940-NEXT: v_mov_b32_e32 v7, 0 2631; GFX940-NEXT: ;;#ASMSTART 2632; GFX940-NEXT: ; def v[4:6] 2633; GFX940-NEXT: ;;#ASMEND 2634; GFX940-NEXT: v_mov_b32_e32 v1, v2 2635; GFX940-NEXT: v_mov_b32_e32 v0, v6 2636; GFX940-NEXT: v_mov_b32_e32 v2, v3 2637; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 2638; GFX940-NEXT: s_waitcnt vmcnt(0) 2639; GFX940-NEXT: s_setpc_b64 s[30:31] 2640 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2641 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2642 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1> 2643 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2644 ret void 2645} 2646 2647define void @v_shuffle_v4i32_v3i32__5_2_1_1(ptr addrspace(1) inreg %ptr) { 2648; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_1_1: 2649; GFX900: ; %bb.0: 2650; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2651; GFX900-NEXT: ;;#ASMSTART 2652; GFX900-NEXT: ; def v[1:3] 2653; GFX900-NEXT: ;;#ASMEND 2654; GFX900-NEXT: v_mov_b32_e32 v7, 0 2655; GFX900-NEXT: ;;#ASMSTART 2656; GFX900-NEXT: ; def v[4:6] 2657; GFX900-NEXT: ;;#ASMEND 2658; GFX900-NEXT: v_mov_b32_e32 v0, v6 2659; GFX900-NEXT: v_mov_b32_e32 v1, v3 2660; GFX900-NEXT: v_mov_b32_e32 v3, v2 2661; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 2662; GFX900-NEXT: s_waitcnt vmcnt(0) 2663; GFX900-NEXT: s_setpc_b64 s[30:31] 2664; 2665; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_1_1: 2666; GFX90A: ; %bb.0: 2667; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2668; GFX90A-NEXT: ;;#ASMSTART 2669; GFX90A-NEXT: ; def v[2:4] 2670; GFX90A-NEXT: ;;#ASMEND 2671; GFX90A-NEXT: ;;#ASMSTART 2672; GFX90A-NEXT: ; def v[0:2] 2673; GFX90A-NEXT: ;;#ASMEND 2674; GFX90A-NEXT: v_mov_b32_e32 v5, 0 2675; GFX90A-NEXT: v_mov_b32_e32 v0, v2 2676; GFX90A-NEXT: v_mov_b32_e32 v1, v4 2677; GFX90A-NEXT: v_mov_b32_e32 v2, v3 2678; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 2679; GFX90A-NEXT: s_waitcnt vmcnt(0) 2680; GFX90A-NEXT: s_setpc_b64 s[30:31] 2681; 2682; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_1_1: 2683; GFX940: ; %bb.0: 2684; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2685; GFX940-NEXT: ;;#ASMSTART 2686; GFX940-NEXT: ; def v[2:4] 2687; GFX940-NEXT: ;;#ASMEND 2688; GFX940-NEXT: v_mov_b32_e32 v5, 0 2689; GFX940-NEXT: ;;#ASMSTART 2690; GFX940-NEXT: ; def v[0:2] 2691; GFX940-NEXT: ;;#ASMEND 2692; GFX940-NEXT: s_nop 0 2693; GFX940-NEXT: v_mov_b32_e32 v0, v2 2694; GFX940-NEXT: v_mov_b32_e32 v1, v4 2695; GFX940-NEXT: v_mov_b32_e32 v2, v3 2696; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 2697; GFX940-NEXT: s_waitcnt vmcnt(0) 2698; GFX940-NEXT: s_setpc_b64 s[30:31] 2699 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2700 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2701 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1> 2702 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2703 ret void 2704} 2705 2706define void @v_shuffle_v4i32_v3i32__5_3_1_1(ptr addrspace(1) inreg %ptr) { 2707; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_1_1: 2708; GFX900: ; %bb.0: 2709; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2710; GFX900-NEXT: ;;#ASMSTART 2711; GFX900-NEXT: ; def v[3:5] 2712; GFX900-NEXT: ;;#ASMEND 2713; GFX900-NEXT: ;;#ASMSTART 2714; GFX900-NEXT: ; def v[1:3] 2715; GFX900-NEXT: ;;#ASMEND 2716; GFX900-NEXT: v_mov_b32_e32 v6, 0 2717; GFX900-NEXT: v_mov_b32_e32 v0, v3 2718; GFX900-NEXT: v_mov_b32_e32 v2, v4 2719; GFX900-NEXT: v_mov_b32_e32 v3, v4 2720; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2721; GFX900-NEXT: s_waitcnt vmcnt(0) 2722; GFX900-NEXT: s_setpc_b64 s[30:31] 2723; 2724; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_1_1: 2725; GFX90A: ; %bb.0: 2726; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2727; GFX90A-NEXT: ;;#ASMSTART 2728; GFX90A-NEXT: ; def v[2:4] 2729; GFX90A-NEXT: ;;#ASMEND 2730; GFX90A-NEXT: v_mov_b32_e32 v7, 0 2731; GFX90A-NEXT: ;;#ASMSTART 2732; GFX90A-NEXT: ; def v[4:6] 2733; GFX90A-NEXT: ;;#ASMEND 2734; GFX90A-NEXT: v_mov_b32_e32 v0, v6 2735; GFX90A-NEXT: v_mov_b32_e32 v1, v4 2736; GFX90A-NEXT: v_mov_b32_e32 v2, v3 2737; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 2738; GFX90A-NEXT: s_waitcnt vmcnt(0) 2739; GFX90A-NEXT: s_setpc_b64 s[30:31] 2740; 2741; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_1_1: 2742; GFX940: ; %bb.0: 2743; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2744; GFX940-NEXT: ;;#ASMSTART 2745; GFX940-NEXT: ; def v[2:4] 2746; GFX940-NEXT: ;;#ASMEND 2747; GFX940-NEXT: v_mov_b32_e32 v7, 0 2748; GFX940-NEXT: ;;#ASMSTART 2749; GFX940-NEXT: ; def v[4:6] 2750; GFX940-NEXT: ;;#ASMEND 2751; GFX940-NEXT: v_mov_b32_e32 v2, v3 2752; GFX940-NEXT: v_mov_b32_e32 v0, v6 2753; GFX940-NEXT: v_mov_b32_e32 v1, v4 2754; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 2755; GFX940-NEXT: s_waitcnt vmcnt(0) 2756; GFX940-NEXT: s_setpc_b64 s[30:31] 2757 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2758 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2759 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1> 2760 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2761 ret void 2762} 2763 2764define void @v_shuffle_v4i32_v3i32__5_4_1_1(ptr addrspace(1) inreg %ptr) { 2765; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_1_1: 2766; GFX900: ; %bb.0: 2767; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2768; GFX900-NEXT: ;;#ASMSTART 2769; GFX900-NEXT: ; def v[2:4] 2770; GFX900-NEXT: ;;#ASMEND 2771; GFX900-NEXT: ;;#ASMSTART 2772; GFX900-NEXT: ; def v[0:2] 2773; GFX900-NEXT: ;;#ASMEND 2774; GFX900-NEXT: v_mov_b32_e32 v5, 0 2775; GFX900-NEXT: v_mov_b32_e32 v0, v2 2776; GFX900-NEXT: v_mov_b32_e32 v2, v3 2777; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 2778; GFX900-NEXT: s_waitcnt vmcnt(0) 2779; GFX900-NEXT: s_setpc_b64 s[30:31] 2780; 2781; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_1_1: 2782; GFX90A: ; %bb.0: 2783; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2784; GFX90A-NEXT: ;;#ASMSTART 2785; GFX90A-NEXT: ; def v[2:4] 2786; GFX90A-NEXT: ;;#ASMEND 2787; GFX90A-NEXT: ;;#ASMSTART 2788; GFX90A-NEXT: ; def v[0:2] 2789; GFX90A-NEXT: ;;#ASMEND 2790; GFX90A-NEXT: v_mov_b32_e32 v5, 0 2791; GFX90A-NEXT: v_mov_b32_e32 v0, v2 2792; GFX90A-NEXT: v_mov_b32_e32 v2, v3 2793; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 2794; GFX90A-NEXT: s_waitcnt vmcnt(0) 2795; GFX90A-NEXT: s_setpc_b64 s[30:31] 2796; 2797; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_1_1: 2798; GFX940: ; %bb.0: 2799; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2800; GFX940-NEXT: ;;#ASMSTART 2801; GFX940-NEXT: ; def v[2:4] 2802; GFX940-NEXT: ;;#ASMEND 2803; GFX940-NEXT: v_mov_b32_e32 v5, 0 2804; GFX940-NEXT: ;;#ASMSTART 2805; GFX940-NEXT: ; def v[0:2] 2806; GFX940-NEXT: ;;#ASMEND 2807; GFX940-NEXT: s_nop 0 2808; GFX940-NEXT: v_mov_b32_e32 v0, v2 2809; GFX940-NEXT: v_mov_b32_e32 v2, v3 2810; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 2811; GFX940-NEXT: s_waitcnt vmcnt(0) 2812; GFX940-NEXT: s_setpc_b64 s[30:31] 2813 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2814 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2815 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1> 2816 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2817 ret void 2818} 2819 2820define void @v_shuffle_v4i32_v3i32__5_5_1_1(ptr addrspace(1) inreg %ptr) { 2821; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_1: 2822; GFX900: ; %bb.0: 2823; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2824; GFX900-NEXT: ;;#ASMSTART 2825; GFX900-NEXT: ; def v[1:3] 2826; GFX900-NEXT: ;;#ASMEND 2827; GFX900-NEXT: ;;#ASMSTART 2828; GFX900-NEXT: ; def v[3:5] 2829; GFX900-NEXT: ;;#ASMEND 2830; GFX900-NEXT: v_mov_b32_e32 v6, 0 2831; GFX900-NEXT: v_mov_b32_e32 v0, v5 2832; GFX900-NEXT: v_mov_b32_e32 v1, v5 2833; GFX900-NEXT: v_mov_b32_e32 v3, v2 2834; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2835; GFX900-NEXT: s_waitcnt vmcnt(0) 2836; GFX900-NEXT: s_setpc_b64 s[30:31] 2837; 2838; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_1: 2839; GFX90A: ; %bb.0: 2840; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2841; GFX90A-NEXT: ;;#ASMSTART 2842; GFX90A-NEXT: ; def v[2:4] 2843; GFX90A-NEXT: ;;#ASMEND 2844; GFX90A-NEXT: ;;#ASMSTART 2845; GFX90A-NEXT: ; def v[0:2] 2846; GFX90A-NEXT: ;;#ASMEND 2847; GFX90A-NEXT: v_mov_b32_e32 v5, 0 2848; GFX90A-NEXT: v_mov_b32_e32 v0, v2 2849; GFX90A-NEXT: v_mov_b32_e32 v1, v2 2850; GFX90A-NEXT: v_mov_b32_e32 v2, v3 2851; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 2852; GFX90A-NEXT: s_waitcnt vmcnt(0) 2853; GFX90A-NEXT: s_setpc_b64 s[30:31] 2854; 2855; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_1: 2856; GFX940: ; %bb.0: 2857; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2858; GFX940-NEXT: ;;#ASMSTART 2859; GFX940-NEXT: ; def v[2:4] 2860; GFX940-NEXT: ;;#ASMEND 2861; GFX940-NEXT: v_mov_b32_e32 v5, 0 2862; GFX940-NEXT: ;;#ASMSTART 2863; GFX940-NEXT: ; def v[0:2] 2864; GFX940-NEXT: ;;#ASMEND 2865; GFX940-NEXT: s_nop 0 2866; GFX940-NEXT: v_mov_b32_e32 v0, v2 2867; GFX940-NEXT: v_mov_b32_e32 v1, v2 2868; GFX940-NEXT: v_mov_b32_e32 v2, v3 2869; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 2870; GFX940-NEXT: s_waitcnt vmcnt(0) 2871; GFX940-NEXT: s_setpc_b64 s[30:31] 2872 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2873 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2874 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1> 2875 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2876 ret void 2877} 2878 2879define void @v_shuffle_v4i32_v3i32__5_5_u_1(ptr addrspace(1) inreg %ptr) { 2880; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_1: 2881; GFX900: ; %bb.0: 2882; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2883; GFX900-NEXT: ;;#ASMSTART 2884; GFX900-NEXT: ; def v[1:3] 2885; GFX900-NEXT: ;;#ASMEND 2886; GFX900-NEXT: ;;#ASMSTART 2887; GFX900-NEXT: ; def v[3:5] 2888; GFX900-NEXT: ;;#ASMEND 2889; GFX900-NEXT: v_mov_b32_e32 v6, 0 2890; GFX900-NEXT: v_mov_b32_e32 v0, v5 2891; GFX900-NEXT: v_mov_b32_e32 v1, v5 2892; GFX900-NEXT: v_mov_b32_e32 v3, v2 2893; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 2894; GFX900-NEXT: s_waitcnt vmcnt(0) 2895; GFX900-NEXT: s_setpc_b64 s[30:31] 2896; 2897; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_1: 2898; GFX90A: ; %bb.0: 2899; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2900; GFX90A-NEXT: ;;#ASMSTART 2901; GFX90A-NEXT: ; def v[2:4] 2902; GFX90A-NEXT: ;;#ASMEND 2903; GFX90A-NEXT: ;;#ASMSTART 2904; GFX90A-NEXT: ; def v[0:2] 2905; GFX90A-NEXT: ;;#ASMEND 2906; GFX90A-NEXT: v_mov_b32_e32 v5, 0 2907; GFX90A-NEXT: v_mov_b32_e32 v0, v2 2908; GFX90A-NEXT: v_mov_b32_e32 v1, v2 2909; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 2910; GFX90A-NEXT: s_waitcnt vmcnt(0) 2911; GFX90A-NEXT: s_setpc_b64 s[30:31] 2912; 2913; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_1: 2914; GFX940: ; %bb.0: 2915; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2916; GFX940-NEXT: ;;#ASMSTART 2917; GFX940-NEXT: ; def v[2:4] 2918; GFX940-NEXT: ;;#ASMEND 2919; GFX940-NEXT: v_mov_b32_e32 v5, 0 2920; GFX940-NEXT: ;;#ASMSTART 2921; GFX940-NEXT: ; def v[0:2] 2922; GFX940-NEXT: ;;#ASMEND 2923; GFX940-NEXT: s_nop 0 2924; GFX940-NEXT: v_mov_b32_e32 v0, v2 2925; GFX940-NEXT: v_mov_b32_e32 v1, v2 2926; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 2927; GFX940-NEXT: s_waitcnt vmcnt(0) 2928; GFX940-NEXT: s_setpc_b64 s[30:31] 2929 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2930 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2931 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1> 2932 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2933 ret void 2934} 2935 2936define void @v_shuffle_v4i32_v3i32__5_5_0_1(ptr addrspace(1) inreg %ptr) { 2937; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_1: 2938; GFX900: ; %bb.0: 2939; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2940; GFX900-NEXT: ;;#ASMSTART 2941; GFX900-NEXT: ; def v[2:4] 2942; GFX900-NEXT: ;;#ASMEND 2943; GFX900-NEXT: v_mov_b32_e32 v7, 0 2944; GFX900-NEXT: ;;#ASMSTART 2945; GFX900-NEXT: ; def v[4:6] 2946; GFX900-NEXT: ;;#ASMEND 2947; GFX900-NEXT: v_mov_b32_e32 v0, v6 2948; GFX900-NEXT: v_mov_b32_e32 v1, v6 2949; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 2950; GFX900-NEXT: s_waitcnt vmcnt(0) 2951; GFX900-NEXT: s_setpc_b64 s[30:31] 2952; 2953; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_1: 2954; GFX90A: ; %bb.0: 2955; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2956; GFX90A-NEXT: ;;#ASMSTART 2957; GFX90A-NEXT: ; def v[2:4] 2958; GFX90A-NEXT: ;;#ASMEND 2959; GFX90A-NEXT: v_mov_b32_e32 v7, 0 2960; GFX90A-NEXT: ;;#ASMSTART 2961; GFX90A-NEXT: ; def v[4:6] 2962; GFX90A-NEXT: ;;#ASMEND 2963; GFX90A-NEXT: v_mov_b32_e32 v0, v6 2964; GFX90A-NEXT: v_mov_b32_e32 v1, v6 2965; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 2966; GFX90A-NEXT: s_waitcnt vmcnt(0) 2967; GFX90A-NEXT: s_setpc_b64 s[30:31] 2968; 2969; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_1: 2970; GFX940: ; %bb.0: 2971; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2972; GFX940-NEXT: ;;#ASMSTART 2973; GFX940-NEXT: ; def v[2:4] 2974; GFX940-NEXT: ;;#ASMEND 2975; GFX940-NEXT: v_mov_b32_e32 v7, 0 2976; GFX940-NEXT: ;;#ASMSTART 2977; GFX940-NEXT: ; def v[4:6] 2978; GFX940-NEXT: ;;#ASMEND 2979; GFX940-NEXT: s_nop 0 2980; GFX940-NEXT: v_mov_b32_e32 v0, v6 2981; GFX940-NEXT: v_mov_b32_e32 v1, v6 2982; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 2983; GFX940-NEXT: s_waitcnt vmcnt(0) 2984; GFX940-NEXT: s_setpc_b64 s[30:31] 2985 %vec0 = call <3 x i32> asm "; def $0", "=v"() 2986 %vec1 = call <3 x i32> asm "; def $0", "=v"() 2987 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1> 2988 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2989 ret void 2990} 2991 2992define void @v_shuffle_v4i32_v3i32__5_5_2_1(ptr addrspace(1) inreg %ptr) { 2993; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_1: 2994; GFX900: ; %bb.0: 2995; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2996; GFX900-NEXT: ;;#ASMSTART 2997; GFX900-NEXT: ; def v[2:4] 2998; GFX900-NEXT: ;;#ASMEND 2999; GFX900-NEXT: ;;#ASMSTART 3000; GFX900-NEXT: ; def v[0:2] 3001; GFX900-NEXT: ;;#ASMEND 3002; GFX900-NEXT: v_mov_b32_e32 v5, 0 3003; GFX900-NEXT: v_mov_b32_e32 v0, v2 3004; GFX900-NEXT: v_mov_b32_e32 v1, v2 3005; GFX900-NEXT: v_mov_b32_e32 v2, v4 3006; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 3007; GFX900-NEXT: s_waitcnt vmcnt(0) 3008; GFX900-NEXT: s_setpc_b64 s[30:31] 3009; 3010; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_1: 3011; GFX90A: ; %bb.0: 3012; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3013; GFX90A-NEXT: ;;#ASMSTART 3014; GFX90A-NEXT: ; def v[2:4] 3015; GFX90A-NEXT: ;;#ASMEND 3016; GFX90A-NEXT: ;;#ASMSTART 3017; GFX90A-NEXT: ; def v[0:2] 3018; GFX90A-NEXT: ;;#ASMEND 3019; GFX90A-NEXT: v_mov_b32_e32 v5, 0 3020; GFX90A-NEXT: v_mov_b32_e32 v0, v2 3021; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3022; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3023; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 3024; GFX90A-NEXT: s_waitcnt vmcnt(0) 3025; GFX90A-NEXT: s_setpc_b64 s[30:31] 3026; 3027; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_1: 3028; GFX940: ; %bb.0: 3029; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3030; GFX940-NEXT: ;;#ASMSTART 3031; GFX940-NEXT: ; def v[2:4] 3032; GFX940-NEXT: ;;#ASMEND 3033; GFX940-NEXT: v_mov_b32_e32 v5, 0 3034; GFX940-NEXT: ;;#ASMSTART 3035; GFX940-NEXT: ; def v[0:2] 3036; GFX940-NEXT: ;;#ASMEND 3037; GFX940-NEXT: s_nop 0 3038; GFX940-NEXT: v_mov_b32_e32 v0, v2 3039; GFX940-NEXT: v_mov_b32_e32 v1, v2 3040; GFX940-NEXT: v_mov_b32_e32 v2, v4 3041; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 3042; GFX940-NEXT: s_waitcnt vmcnt(0) 3043; GFX940-NEXT: s_setpc_b64 s[30:31] 3044 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3045 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3046 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1> 3047 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3048 ret void 3049} 3050 3051define void @v_shuffle_v4i32_v3i32__5_5_3_1(ptr addrspace(1) inreg %ptr) { 3052; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_1: 3053; GFX900: ; %bb.0: 3054; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3055; GFX900-NEXT: ;;#ASMSTART 3056; GFX900-NEXT: ; def v[2:4] 3057; GFX900-NEXT: ;;#ASMEND 3058; GFX900-NEXT: v_mov_b32_e32 v7, 0 3059; GFX900-NEXT: ;;#ASMSTART 3060; GFX900-NEXT: ; def v[4:6] 3061; GFX900-NEXT: ;;#ASMEND 3062; GFX900-NEXT: v_mov_b32_e32 v0, v6 3063; GFX900-NEXT: v_mov_b32_e32 v1, v6 3064; GFX900-NEXT: v_mov_b32_e32 v2, v4 3065; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3066; GFX900-NEXT: s_waitcnt vmcnt(0) 3067; GFX900-NEXT: s_setpc_b64 s[30:31] 3068; 3069; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_1: 3070; GFX90A: ; %bb.0: 3071; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3072; GFX90A-NEXT: ;;#ASMSTART 3073; GFX90A-NEXT: ; def v[2:4] 3074; GFX90A-NEXT: ;;#ASMEND 3075; GFX90A-NEXT: v_mov_b32_e32 v7, 0 3076; GFX90A-NEXT: ;;#ASMSTART 3077; GFX90A-NEXT: ; def v[4:6] 3078; GFX90A-NEXT: ;;#ASMEND 3079; GFX90A-NEXT: v_mov_b32_e32 v0, v6 3080; GFX90A-NEXT: v_mov_b32_e32 v1, v6 3081; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3082; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3083; GFX90A-NEXT: s_waitcnt vmcnt(0) 3084; GFX90A-NEXT: s_setpc_b64 s[30:31] 3085; 3086; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_1: 3087; GFX940: ; %bb.0: 3088; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3089; GFX940-NEXT: ;;#ASMSTART 3090; GFX940-NEXT: ; def v[2:4] 3091; GFX940-NEXT: ;;#ASMEND 3092; GFX940-NEXT: v_mov_b32_e32 v7, 0 3093; GFX940-NEXT: ;;#ASMSTART 3094; GFX940-NEXT: ; def v[4:6] 3095; GFX940-NEXT: ;;#ASMEND 3096; GFX940-NEXT: s_nop 0 3097; GFX940-NEXT: v_mov_b32_e32 v0, v6 3098; GFX940-NEXT: v_mov_b32_e32 v1, v6 3099; GFX940-NEXT: v_mov_b32_e32 v2, v4 3100; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 3101; GFX940-NEXT: s_waitcnt vmcnt(0) 3102; GFX940-NEXT: s_setpc_b64 s[30:31] 3103 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3104 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3105 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1> 3106 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3107 ret void 3108} 3109 3110define void @v_shuffle_v4i32_v3i32__5_5_4_1(ptr addrspace(1) inreg %ptr) { 3111; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_1: 3112; GFX900: ; %bb.0: 3113; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3114; GFX900-NEXT: ;;#ASMSTART 3115; GFX900-NEXT: ; def v[3:5] 3116; GFX900-NEXT: ;;#ASMEND 3117; GFX900-NEXT: ;;#ASMSTART 3118; GFX900-NEXT: ; def v[1:3] 3119; GFX900-NEXT: ;;#ASMEND 3120; GFX900-NEXT: v_mov_b32_e32 v6, 0 3121; GFX900-NEXT: v_mov_b32_e32 v0, v3 3122; GFX900-NEXT: v_mov_b32_e32 v1, v3 3123; GFX900-NEXT: v_mov_b32_e32 v3, v4 3124; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3125; GFX900-NEXT: s_waitcnt vmcnt(0) 3126; GFX900-NEXT: s_setpc_b64 s[30:31] 3127; 3128; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_1: 3129; GFX90A: ; %bb.0: 3130; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3131; GFX90A-NEXT: ;;#ASMSTART 3132; GFX90A-NEXT: ; def v[2:4] 3133; GFX90A-NEXT: ;;#ASMEND 3134; GFX90A-NEXT: v_mov_b32_e32 v7, 0 3135; GFX90A-NEXT: ;;#ASMSTART 3136; GFX90A-NEXT: ; def v[4:6] 3137; GFX90A-NEXT: ;;#ASMEND 3138; GFX90A-NEXT: v_mov_b32_e32 v0, v6 3139; GFX90A-NEXT: v_mov_b32_e32 v1, v6 3140; GFX90A-NEXT: v_mov_b32_e32 v2, v5 3141; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3142; GFX90A-NEXT: s_waitcnt vmcnt(0) 3143; GFX90A-NEXT: s_setpc_b64 s[30:31] 3144; 3145; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_1: 3146; GFX940: ; %bb.0: 3147; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3148; GFX940-NEXT: ;;#ASMSTART 3149; GFX940-NEXT: ; def v[2:4] 3150; GFX940-NEXT: ;;#ASMEND 3151; GFX940-NEXT: v_mov_b32_e32 v7, 0 3152; GFX940-NEXT: ;;#ASMSTART 3153; GFX940-NEXT: ; def v[4:6] 3154; GFX940-NEXT: ;;#ASMEND 3155; GFX940-NEXT: s_nop 0 3156; GFX940-NEXT: v_mov_b32_e32 v0, v6 3157; GFX940-NEXT: v_mov_b32_e32 v1, v6 3158; GFX940-NEXT: v_mov_b32_e32 v2, v5 3159; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 3160; GFX940-NEXT: s_waitcnt vmcnt(0) 3161; GFX940-NEXT: s_setpc_b64 s[30:31] 3162 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3163 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3164 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1> 3165 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3166 ret void 3167} 3168 3169define void @v_shuffle_v4i32_v3i32__u_2_2_2(ptr addrspace(1) inreg %ptr) { 3170; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_2_2_2: 3171; GFX900: ; %bb.0: 3172; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3173; GFX900-NEXT: ;;#ASMSTART 3174; GFX900-NEXT: ; def v[0:2] 3175; GFX900-NEXT: ;;#ASMEND 3176; GFX900-NEXT: v_mov_b32_e32 v4, 0 3177; GFX900-NEXT: v_mov_b32_e32 v1, v2 3178; GFX900-NEXT: v_mov_b32_e32 v3, v2 3179; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3180; GFX900-NEXT: s_waitcnt vmcnt(0) 3181; GFX900-NEXT: s_setpc_b64 s[30:31] 3182; 3183; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_2_2_2: 3184; GFX90A: ; %bb.0: 3185; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3186; GFX90A-NEXT: ;;#ASMSTART 3187; GFX90A-NEXT: ; def v[0:2] 3188; GFX90A-NEXT: ;;#ASMEND 3189; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3190; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3191; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3192; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3193; GFX90A-NEXT: s_waitcnt vmcnt(0) 3194; GFX90A-NEXT: s_setpc_b64 s[30:31] 3195; 3196; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_2_2_2: 3197; GFX940: ; %bb.0: 3198; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3199; GFX940-NEXT: ;;#ASMSTART 3200; GFX940-NEXT: ; def v[0:2] 3201; GFX940-NEXT: ;;#ASMEND 3202; GFX940-NEXT: v_mov_b32_e32 v4, 0 3203; GFX940-NEXT: v_mov_b32_e32 v1, v2 3204; GFX940-NEXT: v_mov_b32_e32 v3, v2 3205; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 3206; GFX940-NEXT: s_waitcnt vmcnt(0) 3207; GFX940-NEXT: s_setpc_b64 s[30:31] 3208 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3209 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2> 3210 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3211 ret void 3212} 3213 3214define void @v_shuffle_v4i32_v3i32__0_2_2_2(ptr addrspace(1) inreg %ptr) { 3215; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_2_2_2: 3216; GFX900: ; %bb.0: 3217; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3218; GFX900-NEXT: ;;#ASMSTART 3219; GFX900-NEXT: ; def v[0:2] 3220; GFX900-NEXT: ;;#ASMEND 3221; GFX900-NEXT: v_mov_b32_e32 v4, 0 3222; GFX900-NEXT: v_mov_b32_e32 v1, v2 3223; GFX900-NEXT: v_mov_b32_e32 v3, v2 3224; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3225; GFX900-NEXT: s_waitcnt vmcnt(0) 3226; GFX900-NEXT: s_setpc_b64 s[30:31] 3227; 3228; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_2_2_2: 3229; GFX90A: ; %bb.0: 3230; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3231; GFX90A-NEXT: ;;#ASMSTART 3232; GFX90A-NEXT: ; def v[0:2] 3233; GFX90A-NEXT: ;;#ASMEND 3234; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3235; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3236; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3237; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3238; GFX90A-NEXT: s_waitcnt vmcnt(0) 3239; GFX90A-NEXT: s_setpc_b64 s[30:31] 3240; 3241; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_2_2_2: 3242; GFX940: ; %bb.0: 3243; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3244; GFX940-NEXT: ;;#ASMSTART 3245; GFX940-NEXT: ; def v[0:2] 3246; GFX940-NEXT: ;;#ASMEND 3247; GFX940-NEXT: v_mov_b32_e32 v4, 0 3248; GFX940-NEXT: v_mov_b32_e32 v1, v2 3249; GFX940-NEXT: v_mov_b32_e32 v3, v2 3250; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 3251; GFX940-NEXT: s_waitcnt vmcnt(0) 3252; GFX940-NEXT: s_setpc_b64 s[30:31] 3253 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3254 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2> 3255 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3256 ret void 3257} 3258 3259define void @v_shuffle_v4i32_v3i32__1_2_2_2(ptr addrspace(1) inreg %ptr) { 3260; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_2_2_2: 3261; GFX900: ; %bb.0: 3262; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3263; GFX900-NEXT: ;;#ASMSTART 3264; GFX900-NEXT: ; def v[0:2] 3265; GFX900-NEXT: ;;#ASMEND 3266; GFX900-NEXT: v_mov_b32_e32 v4, 0 3267; GFX900-NEXT: v_mov_b32_e32 v0, v1 3268; GFX900-NEXT: v_mov_b32_e32 v1, v2 3269; GFX900-NEXT: v_mov_b32_e32 v3, v2 3270; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3271; GFX900-NEXT: s_waitcnt vmcnt(0) 3272; GFX900-NEXT: s_setpc_b64 s[30:31] 3273; 3274; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_2_2_2: 3275; GFX90A: ; %bb.0: 3276; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3277; GFX90A-NEXT: ;;#ASMSTART 3278; GFX90A-NEXT: ; def v[0:2] 3279; GFX90A-NEXT: ;;#ASMEND 3280; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3281; GFX90A-NEXT: v_mov_b32_e32 v0, v1 3282; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3283; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3284; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3285; GFX90A-NEXT: s_waitcnt vmcnt(0) 3286; GFX90A-NEXT: s_setpc_b64 s[30:31] 3287; 3288; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_2_2_2: 3289; GFX940: ; %bb.0: 3290; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3291; GFX940-NEXT: ;;#ASMSTART 3292; GFX940-NEXT: ; def v[0:2] 3293; GFX940-NEXT: ;;#ASMEND 3294; GFX940-NEXT: v_mov_b32_e32 v4, 0 3295; GFX940-NEXT: v_mov_b32_e32 v0, v1 3296; GFX940-NEXT: v_mov_b32_e32 v1, v2 3297; GFX940-NEXT: v_mov_b32_e32 v3, v2 3298; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 3299; GFX940-NEXT: s_waitcnt vmcnt(0) 3300; GFX940-NEXT: s_setpc_b64 s[30:31] 3301 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3302 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2> 3303 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3304 ret void 3305} 3306 3307define void @v_shuffle_v4i32_v3i32__2_2_2_2(ptr addrspace(1) inreg %ptr) { 3308; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_2_2_2: 3309; GFX900: ; %bb.0: 3310; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3311; GFX900-NEXT: ;;#ASMSTART 3312; GFX900-NEXT: ; def v[0:2] 3313; GFX900-NEXT: ;;#ASMEND 3314; GFX900-NEXT: v_mov_b32_e32 v4, 0 3315; GFX900-NEXT: v_mov_b32_e32 v0, v2 3316; GFX900-NEXT: v_mov_b32_e32 v1, v2 3317; GFX900-NEXT: v_mov_b32_e32 v3, v2 3318; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3319; GFX900-NEXT: s_waitcnt vmcnt(0) 3320; GFX900-NEXT: s_setpc_b64 s[30:31] 3321; 3322; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_2_2_2: 3323; GFX90A: ; %bb.0: 3324; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3325; GFX90A-NEXT: ;;#ASMSTART 3326; GFX90A-NEXT: ; def v[0:2] 3327; GFX90A-NEXT: ;;#ASMEND 3328; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3329; GFX90A-NEXT: v_mov_b32_e32 v0, v2 3330; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3331; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3332; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3333; GFX90A-NEXT: s_waitcnt vmcnt(0) 3334; GFX90A-NEXT: s_setpc_b64 s[30:31] 3335; 3336; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_2_2_2: 3337; GFX940: ; %bb.0: 3338; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3339; GFX940-NEXT: ;;#ASMSTART 3340; GFX940-NEXT: ; def v[0:2] 3341; GFX940-NEXT: ;;#ASMEND 3342; GFX940-NEXT: v_mov_b32_e32 v4, 0 3343; GFX940-NEXT: v_mov_b32_e32 v0, v2 3344; GFX940-NEXT: v_mov_b32_e32 v1, v2 3345; GFX940-NEXT: v_mov_b32_e32 v3, v2 3346; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 3347; GFX940-NEXT: s_waitcnt vmcnt(0) 3348; GFX940-NEXT: s_setpc_b64 s[30:31] 3349 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3350 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 3351 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3352 ret void 3353} 3354 3355define void @v_shuffle_v4i32_v3i32__3_2_2_2(ptr addrspace(1) inreg %ptr) { 3356; GFX900-LABEL: v_shuffle_v4i32_v3i32__3_2_2_2: 3357; GFX900: ; %bb.0: 3358; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3359; GFX900-NEXT: ;;#ASMSTART 3360; GFX900-NEXT: ; def v[0:2] 3361; GFX900-NEXT: ;;#ASMEND 3362; GFX900-NEXT: v_mov_b32_e32 v4, 0 3363; GFX900-NEXT: v_mov_b32_e32 v1, v2 3364; GFX900-NEXT: v_mov_b32_e32 v3, v2 3365; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3366; GFX900-NEXT: s_waitcnt vmcnt(0) 3367; GFX900-NEXT: s_setpc_b64 s[30:31] 3368; 3369; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_2_2_2: 3370; GFX90A: ; %bb.0: 3371; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3372; GFX90A-NEXT: ;;#ASMSTART 3373; GFX90A-NEXT: ; def v[0:2] 3374; GFX90A-NEXT: ;;#ASMEND 3375; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3376; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3377; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3378; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3379; GFX90A-NEXT: s_waitcnt vmcnt(0) 3380; GFX90A-NEXT: s_setpc_b64 s[30:31] 3381; 3382; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_2_2_2: 3383; GFX940: ; %bb.0: 3384; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3385; GFX940-NEXT: ;;#ASMSTART 3386; GFX940-NEXT: ; def v[0:2] 3387; GFX940-NEXT: ;;#ASMEND 3388; GFX940-NEXT: v_mov_b32_e32 v4, 0 3389; GFX940-NEXT: v_mov_b32_e32 v1, v2 3390; GFX940-NEXT: v_mov_b32_e32 v3, v2 3391; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 3392; GFX940-NEXT: s_waitcnt vmcnt(0) 3393; GFX940-NEXT: s_setpc_b64 s[30:31] 3394 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3395 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2> 3396 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3397 ret void 3398} 3399 3400define void @v_shuffle_v4i32_v3i32__4_2_2_2(ptr addrspace(1) inreg %ptr) { 3401; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_2_2_2: 3402; GFX900: ; %bb.0: 3403; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3404; GFX900-NEXT: ;;#ASMSTART 3405; GFX900-NEXT: ; def v[0:2] 3406; GFX900-NEXT: ;;#ASMEND 3407; GFX900-NEXT: ;;#ASMSTART 3408; GFX900-NEXT: ; def v[3:5] 3409; GFX900-NEXT: ;;#ASMEND 3410; GFX900-NEXT: v_mov_b32_e32 v6, 0 3411; GFX900-NEXT: v_mov_b32_e32 v0, v4 3412; GFX900-NEXT: v_mov_b32_e32 v1, v2 3413; GFX900-NEXT: v_mov_b32_e32 v3, v2 3414; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3415; GFX900-NEXT: s_waitcnt vmcnt(0) 3416; GFX900-NEXT: s_setpc_b64 s[30:31] 3417; 3418; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_2_2_2: 3419; GFX90A: ; %bb.0: 3420; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3421; GFX90A-NEXT: ;;#ASMSTART 3422; GFX90A-NEXT: ; def v[0:2] 3423; GFX90A-NEXT: ;;#ASMEND 3424; GFX90A-NEXT: v_mov_b32_e32 v7, 0 3425; GFX90A-NEXT: ;;#ASMSTART 3426; GFX90A-NEXT: ; def v[4:6] 3427; GFX90A-NEXT: ;;#ASMEND 3428; GFX90A-NEXT: v_mov_b32_e32 v0, v5 3429; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3430; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3431; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3432; GFX90A-NEXT: s_waitcnt vmcnt(0) 3433; GFX90A-NEXT: s_setpc_b64 s[30:31] 3434; 3435; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_2_2_2: 3436; GFX940: ; %bb.0: 3437; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3438; GFX940-NEXT: ;;#ASMSTART 3439; GFX940-NEXT: ; def v[0:2] 3440; GFX940-NEXT: ;;#ASMEND 3441; GFX940-NEXT: v_mov_b32_e32 v7, 0 3442; GFX940-NEXT: ;;#ASMSTART 3443; GFX940-NEXT: ; def v[4:6] 3444; GFX940-NEXT: ;;#ASMEND 3445; GFX940-NEXT: v_mov_b32_e32 v1, v2 3446; GFX940-NEXT: v_mov_b32_e32 v0, v5 3447; GFX940-NEXT: v_mov_b32_e32 v3, v2 3448; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 3449; GFX940-NEXT: s_waitcnt vmcnt(0) 3450; GFX940-NEXT: s_setpc_b64 s[30:31] 3451 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3452 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3453 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2> 3454 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3455 ret void 3456} 3457 3458define void @v_shuffle_v4i32_v3i32__5_2_2_2(ptr addrspace(1) inreg %ptr) { 3459; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_2_2: 3460; GFX900: ; %bb.0: 3461; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3462; GFX900-NEXT: ;;#ASMSTART 3463; GFX900-NEXT: ; def v[0:2] 3464; GFX900-NEXT: ;;#ASMEND 3465; GFX900-NEXT: ;;#ASMSTART 3466; GFX900-NEXT: ; def v[3:5] 3467; GFX900-NEXT: ;;#ASMEND 3468; GFX900-NEXT: v_mov_b32_e32 v6, 0 3469; GFX900-NEXT: v_mov_b32_e32 v0, v5 3470; GFX900-NEXT: v_mov_b32_e32 v1, v2 3471; GFX900-NEXT: v_mov_b32_e32 v3, v2 3472; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3473; GFX900-NEXT: s_waitcnt vmcnt(0) 3474; GFX900-NEXT: s_setpc_b64 s[30:31] 3475; 3476; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_2_2: 3477; GFX90A: ; %bb.0: 3478; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3479; GFX90A-NEXT: ;;#ASMSTART 3480; GFX90A-NEXT: ; def v[0:2] 3481; GFX90A-NEXT: ;;#ASMEND 3482; GFX90A-NEXT: v_mov_b32_e32 v7, 0 3483; GFX90A-NEXT: ;;#ASMSTART 3484; GFX90A-NEXT: ; def v[4:6] 3485; GFX90A-NEXT: ;;#ASMEND 3486; GFX90A-NEXT: v_mov_b32_e32 v0, v6 3487; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3488; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3489; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3490; GFX90A-NEXT: s_waitcnt vmcnt(0) 3491; GFX90A-NEXT: s_setpc_b64 s[30:31] 3492; 3493; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_2_2: 3494; GFX940: ; %bb.0: 3495; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3496; GFX940-NEXT: ;;#ASMSTART 3497; GFX940-NEXT: ; def v[0:2] 3498; GFX940-NEXT: ;;#ASMEND 3499; GFX940-NEXT: v_mov_b32_e32 v7, 0 3500; GFX940-NEXT: ;;#ASMSTART 3501; GFX940-NEXT: ; def v[4:6] 3502; GFX940-NEXT: ;;#ASMEND 3503; GFX940-NEXT: v_mov_b32_e32 v1, v2 3504; GFX940-NEXT: v_mov_b32_e32 v0, v6 3505; GFX940-NEXT: v_mov_b32_e32 v3, v2 3506; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 3507; GFX940-NEXT: s_waitcnt vmcnt(0) 3508; GFX940-NEXT: s_setpc_b64 s[30:31] 3509 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3510 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3511 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2> 3512 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3513 ret void 3514} 3515 3516define void @v_shuffle_v4i32_v3i32__5_u_2_2(ptr addrspace(1) inreg %ptr) { 3517; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_2_2: 3518; GFX900: ; %bb.0: 3519; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3520; GFX900-NEXT: ;;#ASMSTART 3521; GFX900-NEXT: ; def v[0:2] 3522; GFX900-NEXT: ;;#ASMEND 3523; GFX900-NEXT: ;;#ASMSTART 3524; GFX900-NEXT: ; def v[3:5] 3525; GFX900-NEXT: ;;#ASMEND 3526; GFX900-NEXT: v_mov_b32_e32 v6, 0 3527; GFX900-NEXT: v_mov_b32_e32 v0, v5 3528; GFX900-NEXT: v_mov_b32_e32 v3, v2 3529; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3530; GFX900-NEXT: s_waitcnt vmcnt(0) 3531; GFX900-NEXT: s_setpc_b64 s[30:31] 3532; 3533; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_2_2: 3534; GFX90A: ; %bb.0: 3535; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3536; GFX90A-NEXT: ;;#ASMSTART 3537; GFX90A-NEXT: ; def v[0:2] 3538; GFX90A-NEXT: ;;#ASMEND 3539; GFX90A-NEXT: v_mov_b32_e32 v7, 0 3540; GFX90A-NEXT: ;;#ASMSTART 3541; GFX90A-NEXT: ; def v[4:6] 3542; GFX90A-NEXT: ;;#ASMEND 3543; GFX90A-NEXT: v_mov_b32_e32 v0, v6 3544; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3545; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3546; GFX90A-NEXT: s_waitcnt vmcnt(0) 3547; GFX90A-NEXT: s_setpc_b64 s[30:31] 3548; 3549; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_2_2: 3550; GFX940: ; %bb.0: 3551; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3552; GFX940-NEXT: ;;#ASMSTART 3553; GFX940-NEXT: ; def v[0:2] 3554; GFX940-NEXT: ;;#ASMEND 3555; GFX940-NEXT: v_mov_b32_e32 v7, 0 3556; GFX940-NEXT: ;;#ASMSTART 3557; GFX940-NEXT: ; def v[4:6] 3558; GFX940-NEXT: ;;#ASMEND 3559; GFX940-NEXT: v_mov_b32_e32 v3, v2 3560; GFX940-NEXT: v_mov_b32_e32 v0, v6 3561; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 3562; GFX940-NEXT: s_waitcnt vmcnt(0) 3563; GFX940-NEXT: s_setpc_b64 s[30:31] 3564 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3565 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3566 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2> 3567 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3568 ret void 3569} 3570 3571define void @v_shuffle_v4i32_v3i32__5_0_2_2(ptr addrspace(1) inreg %ptr) { 3572; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_2_2: 3573; GFX900: ; %bb.0: 3574; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3575; GFX900-NEXT: ;;#ASMSTART 3576; GFX900-NEXT: ; def v[1:3] 3577; GFX900-NEXT: ;;#ASMEND 3578; GFX900-NEXT: v_mov_b32_e32 v7, 0 3579; GFX900-NEXT: ;;#ASMSTART 3580; GFX900-NEXT: ; def v[4:6] 3581; GFX900-NEXT: ;;#ASMEND 3582; GFX900-NEXT: v_mov_b32_e32 v0, v6 3583; GFX900-NEXT: v_mov_b32_e32 v2, v3 3584; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3585; GFX900-NEXT: s_waitcnt vmcnt(0) 3586; GFX900-NEXT: s_setpc_b64 s[30:31] 3587; 3588; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_2_2: 3589; GFX90A: ; %bb.0: 3590; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3591; GFX90A-NEXT: ;;#ASMSTART 3592; GFX90A-NEXT: ; def v[2:4] 3593; GFX90A-NEXT: ;;#ASMEND 3594; GFX90A-NEXT: v_mov_b32_e32 v5, 0 3595; GFX90A-NEXT: ;;#ASMSTART 3596; GFX90A-NEXT: ; def v[6:8] 3597; GFX90A-NEXT: ;;#ASMEND 3598; GFX90A-NEXT: v_mov_b32_e32 v0, v8 3599; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3600; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3601; GFX90A-NEXT: v_mov_b32_e32 v3, v4 3602; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 3603; GFX90A-NEXT: s_waitcnt vmcnt(0) 3604; GFX90A-NEXT: s_setpc_b64 s[30:31] 3605; 3606; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_2_2: 3607; GFX940: ; %bb.0: 3608; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3609; GFX940-NEXT: ;;#ASMSTART 3610; GFX940-NEXT: ; def v[2:4] 3611; GFX940-NEXT: ;;#ASMEND 3612; GFX940-NEXT: v_mov_b32_e32 v5, 0 3613; GFX940-NEXT: ;;#ASMSTART 3614; GFX940-NEXT: ; def v[6:8] 3615; GFX940-NEXT: ;;#ASMEND 3616; GFX940-NEXT: v_mov_b32_e32 v1, v2 3617; GFX940-NEXT: v_mov_b32_e32 v0, v8 3618; GFX940-NEXT: v_mov_b32_e32 v2, v4 3619; GFX940-NEXT: v_mov_b32_e32 v3, v4 3620; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 3621; GFX940-NEXT: s_waitcnt vmcnt(0) 3622; GFX940-NEXT: s_setpc_b64 s[30:31] 3623 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3624 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3625 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2> 3626 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3627 ret void 3628} 3629 3630define void @v_shuffle_v4i32_v3i32__5_1_2_2(ptr addrspace(1) inreg %ptr) { 3631; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_2_2: 3632; GFX900: ; %bb.0: 3633; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3634; GFX900-NEXT: ;;#ASMSTART 3635; GFX900-NEXT: ; def v[0:2] 3636; GFX900-NEXT: ;;#ASMEND 3637; GFX900-NEXT: ;;#ASMSTART 3638; GFX900-NEXT: ; def v[3:5] 3639; GFX900-NEXT: ;;#ASMEND 3640; GFX900-NEXT: v_mov_b32_e32 v6, 0 3641; GFX900-NEXT: v_mov_b32_e32 v0, v5 3642; GFX900-NEXT: v_mov_b32_e32 v3, v2 3643; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3644; GFX900-NEXT: s_waitcnt vmcnt(0) 3645; GFX900-NEXT: s_setpc_b64 s[30:31] 3646; 3647; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_2_2: 3648; GFX90A: ; %bb.0: 3649; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3650; GFX90A-NEXT: ;;#ASMSTART 3651; GFX90A-NEXT: ; def v[0:2] 3652; GFX90A-NEXT: ;;#ASMEND 3653; GFX90A-NEXT: v_mov_b32_e32 v7, 0 3654; GFX90A-NEXT: ;;#ASMSTART 3655; GFX90A-NEXT: ; def v[4:6] 3656; GFX90A-NEXT: ;;#ASMEND 3657; GFX90A-NEXT: v_mov_b32_e32 v0, v6 3658; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3659; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3660; GFX90A-NEXT: s_waitcnt vmcnt(0) 3661; GFX90A-NEXT: s_setpc_b64 s[30:31] 3662; 3663; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_2_2: 3664; GFX940: ; %bb.0: 3665; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3666; GFX940-NEXT: ;;#ASMSTART 3667; GFX940-NEXT: ; def v[0:2] 3668; GFX940-NEXT: ;;#ASMEND 3669; GFX940-NEXT: v_mov_b32_e32 v7, 0 3670; GFX940-NEXT: ;;#ASMSTART 3671; GFX940-NEXT: ; def v[4:6] 3672; GFX940-NEXT: ;;#ASMEND 3673; GFX940-NEXT: v_mov_b32_e32 v3, v2 3674; GFX940-NEXT: v_mov_b32_e32 v0, v6 3675; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 3676; GFX940-NEXT: s_waitcnt vmcnt(0) 3677; GFX940-NEXT: s_setpc_b64 s[30:31] 3678 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3679 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3680 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2> 3681 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3682 ret void 3683} 3684 3685define void @v_shuffle_v4i32_v3i32__5_3_2_2(ptr addrspace(1) inreg %ptr) { 3686; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_2_2: 3687; GFX900: ; %bb.0: 3688; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3689; GFX900-NEXT: ;;#ASMSTART 3690; GFX900-NEXT: ; def v[2:4] 3691; GFX900-NEXT: ;;#ASMEND 3692; GFX900-NEXT: ;;#ASMSTART 3693; GFX900-NEXT: ; def v[1:3] 3694; GFX900-NEXT: ;;#ASMEND 3695; GFX900-NEXT: v_mov_b32_e32 v5, 0 3696; GFX900-NEXT: v_mov_b32_e32 v0, v3 3697; GFX900-NEXT: v_mov_b32_e32 v2, v4 3698; GFX900-NEXT: v_mov_b32_e32 v3, v4 3699; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 3700; GFX900-NEXT: s_waitcnt vmcnt(0) 3701; GFX900-NEXT: s_setpc_b64 s[30:31] 3702; 3703; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_2_2: 3704; GFX90A: ; %bb.0: 3705; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3706; GFX90A-NEXT: ;;#ASMSTART 3707; GFX90A-NEXT: ; def v[0:2] 3708; GFX90A-NEXT: ;;#ASMEND 3709; GFX90A-NEXT: v_mov_b32_e32 v7, 0 3710; GFX90A-NEXT: ;;#ASMSTART 3711; GFX90A-NEXT: ; def v[4:6] 3712; GFX90A-NEXT: ;;#ASMEND 3713; GFX90A-NEXT: v_mov_b32_e32 v0, v6 3714; GFX90A-NEXT: v_mov_b32_e32 v1, v4 3715; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3716; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3717; GFX90A-NEXT: s_waitcnt vmcnt(0) 3718; GFX90A-NEXT: s_setpc_b64 s[30:31] 3719; 3720; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_2_2: 3721; GFX940: ; %bb.0: 3722; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3723; GFX940-NEXT: ;;#ASMSTART 3724; GFX940-NEXT: ; def v[0:2] 3725; GFX940-NEXT: ;;#ASMEND 3726; GFX940-NEXT: v_mov_b32_e32 v7, 0 3727; GFX940-NEXT: ;;#ASMSTART 3728; GFX940-NEXT: ; def v[4:6] 3729; GFX940-NEXT: ;;#ASMEND 3730; GFX940-NEXT: v_mov_b32_e32 v3, v2 3731; GFX940-NEXT: v_mov_b32_e32 v0, v6 3732; GFX940-NEXT: v_mov_b32_e32 v1, v4 3733; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 3734; GFX940-NEXT: s_waitcnt vmcnt(0) 3735; GFX940-NEXT: s_setpc_b64 s[30:31] 3736 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3737 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3738 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2> 3739 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3740 ret void 3741} 3742 3743define void @v_shuffle_v4i32_v3i32__5_4_2_2(ptr addrspace(1) inreg %ptr) { 3744; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_2_2: 3745; GFX900: ; %bb.0: 3746; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3747; GFX900-NEXT: ;;#ASMSTART 3748; GFX900-NEXT: ; def v[1:3] 3749; GFX900-NEXT: ;;#ASMEND 3750; GFX900-NEXT: ;;#ASMSTART 3751; GFX900-NEXT: ; def v[0:2] 3752; GFX900-NEXT: ;;#ASMEND 3753; GFX900-NEXT: v_mov_b32_e32 v4, 0 3754; GFX900-NEXT: v_mov_b32_e32 v0, v2 3755; GFX900-NEXT: v_mov_b32_e32 v2, v3 3756; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3757; GFX900-NEXT: s_waitcnt vmcnt(0) 3758; GFX900-NEXT: s_setpc_b64 s[30:31] 3759; 3760; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_2_2: 3761; GFX90A: ; %bb.0: 3762; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3763; GFX90A-NEXT: ;;#ASMSTART 3764; GFX90A-NEXT: ; def v[2:4] 3765; GFX90A-NEXT: ;;#ASMEND 3766; GFX90A-NEXT: ;;#ASMSTART 3767; GFX90A-NEXT: ; def v[0:2] 3768; GFX90A-NEXT: ;;#ASMEND 3769; GFX90A-NEXT: v_mov_b32_e32 v5, 0 3770; GFX90A-NEXT: v_mov_b32_e32 v0, v2 3771; GFX90A-NEXT: v_mov_b32_e32 v2, v4 3772; GFX90A-NEXT: v_mov_b32_e32 v3, v4 3773; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 3774; GFX90A-NEXT: s_waitcnt vmcnt(0) 3775; GFX90A-NEXT: s_setpc_b64 s[30:31] 3776; 3777; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_2_2: 3778; GFX940: ; %bb.0: 3779; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3780; GFX940-NEXT: ;;#ASMSTART 3781; GFX940-NEXT: ; def v[2:4] 3782; GFX940-NEXT: ;;#ASMEND 3783; GFX940-NEXT: v_mov_b32_e32 v5, 0 3784; GFX940-NEXT: ;;#ASMSTART 3785; GFX940-NEXT: ; def v[0:2] 3786; GFX940-NEXT: ;;#ASMEND 3787; GFX940-NEXT: v_mov_b32_e32 v3, v4 3788; GFX940-NEXT: v_mov_b32_e32 v0, v2 3789; GFX940-NEXT: v_mov_b32_e32 v2, v4 3790; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 3791; GFX940-NEXT: s_waitcnt vmcnt(0) 3792; GFX940-NEXT: s_setpc_b64 s[30:31] 3793 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3794 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3795 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2> 3796 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3797 ret void 3798} 3799 3800define void @v_shuffle_v4i32_v3i32__5_5_2_2(ptr addrspace(1) inreg %ptr) { 3801; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_2: 3802; GFX900: ; %bb.0: 3803; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3804; GFX900-NEXT: ;;#ASMSTART 3805; GFX900-NEXT: ; def v[0:2] 3806; GFX900-NEXT: ;;#ASMEND 3807; GFX900-NEXT: ;;#ASMSTART 3808; GFX900-NEXT: ; def v[3:5] 3809; GFX900-NEXT: ;;#ASMEND 3810; GFX900-NEXT: v_mov_b32_e32 v6, 0 3811; GFX900-NEXT: v_mov_b32_e32 v0, v5 3812; GFX900-NEXT: v_mov_b32_e32 v1, v5 3813; GFX900-NEXT: v_mov_b32_e32 v3, v2 3814; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 3815; GFX900-NEXT: s_waitcnt vmcnt(0) 3816; GFX900-NEXT: s_setpc_b64 s[30:31] 3817; 3818; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_2: 3819; GFX90A: ; %bb.0: 3820; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3821; GFX90A-NEXT: ;;#ASMSTART 3822; GFX90A-NEXT: ; def v[0:2] 3823; GFX90A-NEXT: ;;#ASMEND 3824; GFX90A-NEXT: v_mov_b32_e32 v7, 0 3825; GFX90A-NEXT: ;;#ASMSTART 3826; GFX90A-NEXT: ; def v[4:6] 3827; GFX90A-NEXT: ;;#ASMEND 3828; GFX90A-NEXT: v_mov_b32_e32 v0, v6 3829; GFX90A-NEXT: v_mov_b32_e32 v1, v6 3830; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3831; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3832; GFX90A-NEXT: s_waitcnt vmcnt(0) 3833; GFX90A-NEXT: s_setpc_b64 s[30:31] 3834; 3835; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_2: 3836; GFX940: ; %bb.0: 3837; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3838; GFX940-NEXT: ;;#ASMSTART 3839; GFX940-NEXT: ; def v[0:2] 3840; GFX940-NEXT: ;;#ASMEND 3841; GFX940-NEXT: v_mov_b32_e32 v7, 0 3842; GFX940-NEXT: ;;#ASMSTART 3843; GFX940-NEXT: ; def v[4:6] 3844; GFX940-NEXT: ;;#ASMEND 3845; GFX940-NEXT: v_mov_b32_e32 v3, v2 3846; GFX940-NEXT: v_mov_b32_e32 v0, v6 3847; GFX940-NEXT: v_mov_b32_e32 v1, v6 3848; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 3849; GFX940-NEXT: s_waitcnt vmcnt(0) 3850; GFX940-NEXT: s_setpc_b64 s[30:31] 3851 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3852 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3853 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2> 3854 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3855 ret void 3856} 3857 3858define void @v_shuffle_v4i32_v3i32__5_5_u_2(ptr addrspace(1) inreg %ptr) { 3859; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_2: 3860; GFX900: ; %bb.0: 3861; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3862; GFX900-NEXT: ;;#ASMSTART 3863; GFX900-NEXT: ; def v[1:3] 3864; GFX900-NEXT: ;;#ASMEND 3865; GFX900-NEXT: ;;#ASMSTART 3866; GFX900-NEXT: ; def v[0:2] 3867; GFX900-NEXT: ;;#ASMEND 3868; GFX900-NEXT: v_mov_b32_e32 v4, 0 3869; GFX900-NEXT: v_mov_b32_e32 v0, v2 3870; GFX900-NEXT: v_mov_b32_e32 v1, v2 3871; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 3872; GFX900-NEXT: s_waitcnt vmcnt(0) 3873; GFX900-NEXT: s_setpc_b64 s[30:31] 3874; 3875; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_2: 3876; GFX90A: ; %bb.0: 3877; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3878; GFX90A-NEXT: ;;#ASMSTART 3879; GFX90A-NEXT: ; def v[0:2] 3880; GFX90A-NEXT: ;;#ASMEND 3881; GFX90A-NEXT: v_mov_b32_e32 v7, 0 3882; GFX90A-NEXT: ;;#ASMSTART 3883; GFX90A-NEXT: ; def v[4:6] 3884; GFX90A-NEXT: ;;#ASMEND 3885; GFX90A-NEXT: v_mov_b32_e32 v0, v6 3886; GFX90A-NEXT: v_mov_b32_e32 v1, v6 3887; GFX90A-NEXT: v_mov_b32_e32 v3, v2 3888; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3889; GFX90A-NEXT: s_waitcnt vmcnt(0) 3890; GFX90A-NEXT: s_setpc_b64 s[30:31] 3891; 3892; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_2: 3893; GFX940: ; %bb.0: 3894; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3895; GFX940-NEXT: ;;#ASMSTART 3896; GFX940-NEXT: ; def v[0:2] 3897; GFX940-NEXT: ;;#ASMEND 3898; GFX940-NEXT: v_mov_b32_e32 v7, 0 3899; GFX940-NEXT: ;;#ASMSTART 3900; GFX940-NEXT: ; def v[4:6] 3901; GFX940-NEXT: ;;#ASMEND 3902; GFX940-NEXT: v_mov_b32_e32 v3, v2 3903; GFX940-NEXT: v_mov_b32_e32 v0, v6 3904; GFX940-NEXT: v_mov_b32_e32 v1, v6 3905; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 3906; GFX940-NEXT: s_waitcnt vmcnt(0) 3907; GFX940-NEXT: s_setpc_b64 s[30:31] 3908 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3909 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3910 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2> 3911 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3912 ret void 3913} 3914 3915define void @v_shuffle_v4i32_v3i32__5_5_0_2(ptr addrspace(1) inreg %ptr) { 3916; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_2: 3917; GFX900: ; %bb.0: 3918; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3919; GFX900-NEXT: ;;#ASMSTART 3920; GFX900-NEXT: ; def v[2:4] 3921; GFX900-NEXT: ;;#ASMEND 3922; GFX900-NEXT: v_mov_b32_e32 v8, 0 3923; GFX900-NEXT: ;;#ASMSTART 3924; GFX900-NEXT: ; def v[5:7] 3925; GFX900-NEXT: ;;#ASMEND 3926; GFX900-NEXT: v_mov_b32_e32 v0, v7 3927; GFX900-NEXT: v_mov_b32_e32 v1, v7 3928; GFX900-NEXT: v_mov_b32_e32 v3, v4 3929; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] 3930; GFX900-NEXT: s_waitcnt vmcnt(0) 3931; GFX900-NEXT: s_setpc_b64 s[30:31] 3932; 3933; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_2: 3934; GFX90A: ; %bb.0: 3935; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3936; GFX90A-NEXT: ;;#ASMSTART 3937; GFX90A-NEXT: ; def v[2:4] 3938; GFX90A-NEXT: ;;#ASMEND 3939; GFX90A-NEXT: v_mov_b32_e32 v5, 0 3940; GFX90A-NEXT: ;;#ASMSTART 3941; GFX90A-NEXT: ; def v[6:8] 3942; GFX90A-NEXT: ;;#ASMEND 3943; GFX90A-NEXT: v_mov_b32_e32 v0, v8 3944; GFX90A-NEXT: v_mov_b32_e32 v1, v8 3945; GFX90A-NEXT: v_mov_b32_e32 v3, v4 3946; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 3947; GFX90A-NEXT: s_waitcnt vmcnt(0) 3948; GFX90A-NEXT: s_setpc_b64 s[30:31] 3949; 3950; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_2: 3951; GFX940: ; %bb.0: 3952; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3953; GFX940-NEXT: ;;#ASMSTART 3954; GFX940-NEXT: ; def v[2:4] 3955; GFX940-NEXT: ;;#ASMEND 3956; GFX940-NEXT: v_mov_b32_e32 v5, 0 3957; GFX940-NEXT: ;;#ASMSTART 3958; GFX940-NEXT: ; def v[6:8] 3959; GFX940-NEXT: ;;#ASMEND 3960; GFX940-NEXT: v_mov_b32_e32 v3, v4 3961; GFX940-NEXT: v_mov_b32_e32 v0, v8 3962; GFX940-NEXT: v_mov_b32_e32 v1, v8 3963; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 3964; GFX940-NEXT: s_waitcnt vmcnt(0) 3965; GFX940-NEXT: s_setpc_b64 s[30:31] 3966 %vec0 = call <3 x i32> asm "; def $0", "=v"() 3967 %vec1 = call <3 x i32> asm "; def $0", "=v"() 3968 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2> 3969 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3970 ret void 3971} 3972 3973define void @v_shuffle_v4i32_v3i32__5_5_1_2(ptr addrspace(1) inreg %ptr) { 3974; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_2: 3975; GFX900: ; %bb.0: 3976; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3977; GFX900-NEXT: ;;#ASMSTART 3978; GFX900-NEXT: ; def v[1:3] 3979; GFX900-NEXT: ;;#ASMEND 3980; GFX900-NEXT: v_mov_b32_e32 v7, 0 3981; GFX900-NEXT: ;;#ASMSTART 3982; GFX900-NEXT: ; def v[4:6] 3983; GFX900-NEXT: ;;#ASMEND 3984; GFX900-NEXT: v_mov_b32_e32 v0, v6 3985; GFX900-NEXT: v_mov_b32_e32 v1, v6 3986; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 3987; GFX900-NEXT: s_waitcnt vmcnt(0) 3988; GFX900-NEXT: s_setpc_b64 s[30:31] 3989; 3990; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_2: 3991; GFX90A: ; %bb.0: 3992; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3993; GFX90A-NEXT: ;;#ASMSTART 3994; GFX90A-NEXT: ; def v[2:4] 3995; GFX90A-NEXT: ;;#ASMEND 3996; GFX90A-NEXT: ;;#ASMSTART 3997; GFX90A-NEXT: ; def v[0:2] 3998; GFX90A-NEXT: ;;#ASMEND 3999; GFX90A-NEXT: v_mov_b32_e32 v5, 0 4000; GFX90A-NEXT: v_mov_b32_e32 v0, v2 4001; GFX90A-NEXT: v_mov_b32_e32 v1, v2 4002; GFX90A-NEXT: v_mov_b32_e32 v2, v3 4003; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4004; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 4005; GFX90A-NEXT: s_waitcnt vmcnt(0) 4006; GFX90A-NEXT: s_setpc_b64 s[30:31] 4007; 4008; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_2: 4009; GFX940: ; %bb.0: 4010; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4011; GFX940-NEXT: ;;#ASMSTART 4012; GFX940-NEXT: ; def v[2:4] 4013; GFX940-NEXT: ;;#ASMEND 4014; GFX940-NEXT: v_mov_b32_e32 v5, 0 4015; GFX940-NEXT: ;;#ASMSTART 4016; GFX940-NEXT: ; def v[0:2] 4017; GFX940-NEXT: ;;#ASMEND 4018; GFX940-NEXT: s_nop 0 4019; GFX940-NEXT: v_mov_b32_e32 v0, v2 4020; GFX940-NEXT: v_mov_b32_e32 v1, v2 4021; GFX940-NEXT: v_mov_b32_e32 v2, v3 4022; GFX940-NEXT: v_mov_b32_e32 v3, v4 4023; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 4024; GFX940-NEXT: s_waitcnt vmcnt(0) 4025; GFX940-NEXT: s_setpc_b64 s[30:31] 4026 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4027 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4028 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2> 4029 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4030 ret void 4031} 4032 4033define void @v_shuffle_v4i32_v3i32__5_5_3_2(ptr addrspace(1) inreg %ptr) { 4034; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_2: 4035; GFX900: ; %bb.0: 4036; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4037; GFX900-NEXT: ;;#ASMSTART 4038; GFX900-NEXT: ; def v[1:3] 4039; GFX900-NEXT: ;;#ASMEND 4040; GFX900-NEXT: v_mov_b32_e32 v7, 0 4041; GFX900-NEXT: ;;#ASMSTART 4042; GFX900-NEXT: ; def v[4:6] 4043; GFX900-NEXT: ;;#ASMEND 4044; GFX900-NEXT: v_mov_b32_e32 v0, v6 4045; GFX900-NEXT: v_mov_b32_e32 v1, v6 4046; GFX900-NEXT: v_mov_b32_e32 v2, v4 4047; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4048; GFX900-NEXT: s_waitcnt vmcnt(0) 4049; GFX900-NEXT: s_setpc_b64 s[30:31] 4050; 4051; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_2: 4052; GFX90A: ; %bb.0: 4053; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4054; GFX90A-NEXT: ;;#ASMSTART 4055; GFX90A-NEXT: ; def v[2:4] 4056; GFX90A-NEXT: ;;#ASMEND 4057; GFX90A-NEXT: v_mov_b32_e32 v5, 0 4058; GFX90A-NEXT: ;;#ASMSTART 4059; GFX90A-NEXT: ; def v[6:8] 4060; GFX90A-NEXT: ;;#ASMEND 4061; GFX90A-NEXT: v_mov_b32_e32 v0, v8 4062; GFX90A-NEXT: v_mov_b32_e32 v1, v8 4063; GFX90A-NEXT: v_mov_b32_e32 v2, v6 4064; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4065; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 4066; GFX90A-NEXT: s_waitcnt vmcnt(0) 4067; GFX90A-NEXT: s_setpc_b64 s[30:31] 4068; 4069; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_2: 4070; GFX940: ; %bb.0: 4071; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4072; GFX940-NEXT: ;;#ASMSTART 4073; GFX940-NEXT: ; def v[2:4] 4074; GFX940-NEXT: ;;#ASMEND 4075; GFX940-NEXT: v_mov_b32_e32 v5, 0 4076; GFX940-NEXT: ;;#ASMSTART 4077; GFX940-NEXT: ; def v[6:8] 4078; GFX940-NEXT: ;;#ASMEND 4079; GFX940-NEXT: v_mov_b32_e32 v3, v4 4080; GFX940-NEXT: v_mov_b32_e32 v0, v8 4081; GFX940-NEXT: v_mov_b32_e32 v1, v8 4082; GFX940-NEXT: v_mov_b32_e32 v2, v6 4083; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 4084; GFX940-NEXT: s_waitcnt vmcnt(0) 4085; GFX940-NEXT: s_setpc_b64 s[30:31] 4086 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4087 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4088 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2> 4089 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4090 ret void 4091} 4092 4093define void @v_shuffle_v4i32_v3i32__5_5_4_2(ptr addrspace(1) inreg %ptr) { 4094; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_2: 4095; GFX900: ; %bb.0: 4096; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4097; GFX900-NEXT: ;;#ASMSTART 4098; GFX900-NEXT: ; def v[2:4] 4099; GFX900-NEXT: ;;#ASMEND 4100; GFX900-NEXT: ;;#ASMSTART 4101; GFX900-NEXT: ; def v[1:3] 4102; GFX900-NEXT: ;;#ASMEND 4103; GFX900-NEXT: v_mov_b32_e32 v5, 0 4104; GFX900-NEXT: v_mov_b32_e32 v0, v3 4105; GFX900-NEXT: v_mov_b32_e32 v1, v3 4106; GFX900-NEXT: v_mov_b32_e32 v3, v4 4107; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 4108; GFX900-NEXT: s_waitcnt vmcnt(0) 4109; GFX900-NEXT: s_setpc_b64 s[30:31] 4110; 4111; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_2: 4112; GFX90A: ; %bb.0: 4113; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4114; GFX90A-NEXT: ;;#ASMSTART 4115; GFX90A-NEXT: ; def v[2:4] 4116; GFX90A-NEXT: ;;#ASMEND 4117; GFX90A-NEXT: v_mov_b32_e32 v5, 0 4118; GFX90A-NEXT: ;;#ASMSTART 4119; GFX90A-NEXT: ; def v[6:8] 4120; GFX90A-NEXT: ;;#ASMEND 4121; GFX90A-NEXT: v_mov_b32_e32 v0, v8 4122; GFX90A-NEXT: v_mov_b32_e32 v1, v8 4123; GFX90A-NEXT: v_mov_b32_e32 v2, v7 4124; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4125; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 4126; GFX90A-NEXT: s_waitcnt vmcnt(0) 4127; GFX90A-NEXT: s_setpc_b64 s[30:31] 4128; 4129; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_2: 4130; GFX940: ; %bb.0: 4131; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4132; GFX940-NEXT: ;;#ASMSTART 4133; GFX940-NEXT: ; def v[2:4] 4134; GFX940-NEXT: ;;#ASMEND 4135; GFX940-NEXT: v_mov_b32_e32 v5, 0 4136; GFX940-NEXT: ;;#ASMSTART 4137; GFX940-NEXT: ; def v[6:8] 4138; GFX940-NEXT: ;;#ASMEND 4139; GFX940-NEXT: v_mov_b32_e32 v3, v4 4140; GFX940-NEXT: v_mov_b32_e32 v0, v8 4141; GFX940-NEXT: v_mov_b32_e32 v1, v8 4142; GFX940-NEXT: v_mov_b32_e32 v2, v7 4143; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 4144; GFX940-NEXT: s_waitcnt vmcnt(0) 4145; GFX940-NEXT: s_setpc_b64 s[30:31] 4146 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4147 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4148 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2> 4149 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4150 ret void 4151} 4152 4153define void @v_shuffle_v4i32_v3i32__u_3_3_3(ptr addrspace(1) inreg %ptr) { 4154; GFX9-LABEL: v_shuffle_v4i32_v3i32__u_3_3_3: 4155; GFX9: ; %bb.0: 4156; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4157; GFX9-NEXT: s_setpc_b64 s[30:31] 4158 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4159 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3> 4160 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4161 ret void 4162} 4163 4164define void @v_shuffle_v4i32_v3i32__0_3_3_3(ptr addrspace(1) inreg %ptr) { 4165; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_3_3_3: 4166; GFX900: ; %bb.0: 4167; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4168; GFX900-NEXT: v_mov_b32_e32 v3, 0 4169; GFX900-NEXT: ;;#ASMSTART 4170; GFX900-NEXT: ; def v[0:2] 4171; GFX900-NEXT: ;;#ASMEND 4172; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 4173; GFX900-NEXT: s_waitcnt vmcnt(0) 4174; GFX900-NEXT: s_setpc_b64 s[30:31] 4175; 4176; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_3_3_3: 4177; GFX90A: ; %bb.0: 4178; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4179; GFX90A-NEXT: v_mov_b32_e32 v3, 0 4180; GFX90A-NEXT: ;;#ASMSTART 4181; GFX90A-NEXT: ; def v[0:2] 4182; GFX90A-NEXT: ;;#ASMEND 4183; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 4184; GFX90A-NEXT: s_waitcnt vmcnt(0) 4185; GFX90A-NEXT: s_setpc_b64 s[30:31] 4186; 4187; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_3_3_3: 4188; GFX940: ; %bb.0: 4189; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4190; GFX940-NEXT: v_mov_b32_e32 v3, 0 4191; GFX940-NEXT: ;;#ASMSTART 4192; GFX940-NEXT: ; def v[0:2] 4193; GFX940-NEXT: ;;#ASMEND 4194; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 4195; GFX940-NEXT: s_waitcnt vmcnt(0) 4196; GFX940-NEXT: s_setpc_b64 s[30:31] 4197 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4198 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3> 4199 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4200 ret void 4201} 4202 4203define void @v_shuffle_v4i32_v3i32__1_3_3_3(ptr addrspace(1) inreg %ptr) { 4204; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_3_3_3: 4205; GFX900: ; %bb.0: 4206; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4207; GFX900-NEXT: ;;#ASMSTART 4208; GFX900-NEXT: ; def v[0:2] 4209; GFX900-NEXT: ;;#ASMEND 4210; GFX900-NEXT: v_mov_b32_e32 v3, 0 4211; GFX900-NEXT: v_mov_b32_e32 v0, v1 4212; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 4213; GFX900-NEXT: s_waitcnt vmcnt(0) 4214; GFX900-NEXT: s_setpc_b64 s[30:31] 4215; 4216; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_3_3_3: 4217; GFX90A: ; %bb.0: 4218; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4219; GFX90A-NEXT: ;;#ASMSTART 4220; GFX90A-NEXT: ; def v[0:2] 4221; GFX90A-NEXT: ;;#ASMEND 4222; GFX90A-NEXT: v_mov_b32_e32 v3, 0 4223; GFX90A-NEXT: v_mov_b32_e32 v0, v1 4224; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 4225; GFX90A-NEXT: s_waitcnt vmcnt(0) 4226; GFX90A-NEXT: s_setpc_b64 s[30:31] 4227; 4228; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_3_3_3: 4229; GFX940: ; %bb.0: 4230; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4231; GFX940-NEXT: ;;#ASMSTART 4232; GFX940-NEXT: ; def v[0:2] 4233; GFX940-NEXT: ;;#ASMEND 4234; GFX940-NEXT: v_mov_b32_e32 v3, 0 4235; GFX940-NEXT: v_mov_b32_e32 v0, v1 4236; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 4237; GFX940-NEXT: s_waitcnt vmcnt(0) 4238; GFX940-NEXT: s_setpc_b64 s[30:31] 4239 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4240 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3> 4241 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4242 ret void 4243} 4244 4245define void @v_shuffle_v4i32_v3i32__2_3_3_3(ptr addrspace(1) inreg %ptr) { 4246; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_3_3_3: 4247; GFX900: ; %bb.0: 4248; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4249; GFX900-NEXT: ;;#ASMSTART 4250; GFX900-NEXT: ; def v[0:2] 4251; GFX900-NEXT: ;;#ASMEND 4252; GFX900-NEXT: v_mov_b32_e32 v3, 0 4253; GFX900-NEXT: v_mov_b32_e32 v0, v2 4254; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 4255; GFX900-NEXT: s_waitcnt vmcnt(0) 4256; GFX900-NEXT: s_setpc_b64 s[30:31] 4257; 4258; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_3_3_3: 4259; GFX90A: ; %bb.0: 4260; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4261; GFX90A-NEXT: ;;#ASMSTART 4262; GFX90A-NEXT: ; def v[0:2] 4263; GFX90A-NEXT: ;;#ASMEND 4264; GFX90A-NEXT: v_mov_b32_e32 v3, 0 4265; GFX90A-NEXT: v_mov_b32_e32 v0, v2 4266; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17] 4267; GFX90A-NEXT: s_waitcnt vmcnt(0) 4268; GFX90A-NEXT: s_setpc_b64 s[30:31] 4269; 4270; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_3_3_3: 4271; GFX940: ; %bb.0: 4272; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4273; GFX940-NEXT: ;;#ASMSTART 4274; GFX940-NEXT: ; def v[0:2] 4275; GFX940-NEXT: ;;#ASMEND 4276; GFX940-NEXT: v_mov_b32_e32 v3, 0 4277; GFX940-NEXT: v_mov_b32_e32 v0, v2 4278; GFX940-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1 4279; GFX940-NEXT: s_waitcnt vmcnt(0) 4280; GFX940-NEXT: s_setpc_b64 s[30:31] 4281 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4282 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3> 4283 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4284 ret void 4285} 4286 4287define void @v_shuffle_v4i32_v3i32__3_3_3_3(ptr addrspace(1) inreg %ptr) { 4288; GFX9-LABEL: v_shuffle_v4i32_v3i32__3_3_3_3: 4289; GFX9: ; %bb.0: 4290; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4291; GFX9-NEXT: s_setpc_b64 s[30:31] 4292 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4293 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 4294 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4295 ret void 4296} 4297 4298define void @v_shuffle_v4i32_v3i32__4_3_3_3(ptr addrspace(1) inreg %ptr) { 4299; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_3_3_3: 4300; GFX900: ; %bb.0: 4301; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4302; GFX900-NEXT: ;;#ASMSTART 4303; GFX900-NEXT: ; def v[1:3] 4304; GFX900-NEXT: ;;#ASMEND 4305; GFX900-NEXT: v_mov_b32_e32 v4, 0 4306; GFX900-NEXT: v_mov_b32_e32 v0, v2 4307; GFX900-NEXT: v_mov_b32_e32 v2, v1 4308; GFX900-NEXT: v_mov_b32_e32 v3, v1 4309; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 4310; GFX900-NEXT: s_waitcnt vmcnt(0) 4311; GFX900-NEXT: s_setpc_b64 s[30:31] 4312; 4313; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_3_3_3: 4314; GFX90A: ; %bb.0: 4315; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4316; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4317; GFX90A-NEXT: ;;#ASMSTART 4318; GFX90A-NEXT: ; def v[4:6] 4319; GFX90A-NEXT: ;;#ASMEND 4320; GFX90A-NEXT: v_mov_b32_e32 v0, v5 4321; GFX90A-NEXT: v_mov_b32_e32 v1, v4 4322; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4323; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4324; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4325; GFX90A-NEXT: s_waitcnt vmcnt(0) 4326; GFX90A-NEXT: s_setpc_b64 s[30:31] 4327; 4328; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_3_3_3: 4329; GFX940: ; %bb.0: 4330; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4331; GFX940-NEXT: v_mov_b32_e32 v7, 0 4332; GFX940-NEXT: ;;#ASMSTART 4333; GFX940-NEXT: ; def v[4:6] 4334; GFX940-NEXT: ;;#ASMEND 4335; GFX940-NEXT: s_nop 0 4336; GFX940-NEXT: v_mov_b32_e32 v0, v5 4337; GFX940-NEXT: v_mov_b32_e32 v1, v4 4338; GFX940-NEXT: v_mov_b32_e32 v2, v4 4339; GFX940-NEXT: v_mov_b32_e32 v3, v4 4340; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 4341; GFX940-NEXT: s_waitcnt vmcnt(0) 4342; GFX940-NEXT: s_setpc_b64 s[30:31] 4343 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4344 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4345 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3> 4346 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4347 ret void 4348} 4349 4350define void @v_shuffle_v4i32_v3i32__5_3_3_3(ptr addrspace(1) inreg %ptr) { 4351; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_3_3: 4352; GFX900: ; %bb.0: 4353; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4354; GFX900-NEXT: ;;#ASMSTART 4355; GFX900-NEXT: ; def v[1:3] 4356; GFX900-NEXT: ;;#ASMEND 4357; GFX900-NEXT: v_mov_b32_e32 v4, 0 4358; GFX900-NEXT: v_mov_b32_e32 v0, v3 4359; GFX900-NEXT: v_mov_b32_e32 v2, v1 4360; GFX900-NEXT: v_mov_b32_e32 v3, v1 4361; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 4362; GFX900-NEXT: s_waitcnt vmcnt(0) 4363; GFX900-NEXT: s_setpc_b64 s[30:31] 4364; 4365; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_3_3: 4366; GFX90A: ; %bb.0: 4367; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4368; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4369; GFX90A-NEXT: ;;#ASMSTART 4370; GFX90A-NEXT: ; def v[4:6] 4371; GFX90A-NEXT: ;;#ASMEND 4372; GFX90A-NEXT: v_mov_b32_e32 v0, v6 4373; GFX90A-NEXT: v_mov_b32_e32 v1, v4 4374; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4375; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4376; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4377; GFX90A-NEXT: s_waitcnt vmcnt(0) 4378; GFX90A-NEXT: s_setpc_b64 s[30:31] 4379; 4380; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_3_3: 4381; GFX940: ; %bb.0: 4382; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4383; GFX940-NEXT: v_mov_b32_e32 v7, 0 4384; GFX940-NEXT: ;;#ASMSTART 4385; GFX940-NEXT: ; def v[4:6] 4386; GFX940-NEXT: ;;#ASMEND 4387; GFX940-NEXT: s_nop 0 4388; GFX940-NEXT: v_mov_b32_e32 v0, v6 4389; GFX940-NEXT: v_mov_b32_e32 v1, v4 4390; GFX940-NEXT: v_mov_b32_e32 v2, v4 4391; GFX940-NEXT: v_mov_b32_e32 v3, v4 4392; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 4393; GFX940-NEXT: s_waitcnt vmcnt(0) 4394; GFX940-NEXT: s_setpc_b64 s[30:31] 4395 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4396 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4397 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3> 4398 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4399 ret void 4400} 4401 4402define void @v_shuffle_v4i32_v3i32__5_u_3_3(ptr addrspace(1) inreg %ptr) { 4403; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3: 4404; GFX900: ; %bb.0: 4405; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4406; GFX900-NEXT: ;;#ASMSTART 4407; GFX900-NEXT: ; def v[1:3] 4408; GFX900-NEXT: ;;#ASMEND 4409; GFX900-NEXT: v_mov_b32_e32 v4, 0 4410; GFX900-NEXT: v_mov_b32_e32 v0, v3 4411; GFX900-NEXT: v_mov_b32_e32 v2, v1 4412; GFX900-NEXT: v_mov_b32_e32 v3, v1 4413; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 4414; GFX900-NEXT: s_waitcnt vmcnt(0) 4415; GFX900-NEXT: s_setpc_b64 s[30:31] 4416; 4417; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3: 4418; GFX90A: ; %bb.0: 4419; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4420; GFX90A-NEXT: v_mov_b32_e32 v1, 0 4421; GFX90A-NEXT: ;;#ASMSTART 4422; GFX90A-NEXT: ; def v[4:6] 4423; GFX90A-NEXT: ;;#ASMEND 4424; GFX90A-NEXT: v_mov_b32_e32 v0, v6 4425; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4426; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4427; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17] 4428; GFX90A-NEXT: s_waitcnt vmcnt(0) 4429; GFX90A-NEXT: s_setpc_b64 s[30:31] 4430; 4431; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3: 4432; GFX940: ; %bb.0: 4433; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4434; GFX940-NEXT: v_mov_b32_e32 v1, 0 4435; GFX940-NEXT: ;;#ASMSTART 4436; GFX940-NEXT: ; def v[4:6] 4437; GFX940-NEXT: ;;#ASMEND 4438; GFX940-NEXT: s_nop 0 4439; GFX940-NEXT: v_mov_b32_e32 v0, v6 4440; GFX940-NEXT: v_mov_b32_e32 v2, v4 4441; GFX940-NEXT: v_mov_b32_e32 v3, v4 4442; GFX940-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1] sc0 sc1 4443; GFX940-NEXT: s_waitcnt vmcnt(0) 4444; GFX940-NEXT: s_setpc_b64 s[30:31] 4445 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4446 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4447 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3> 4448 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4449 ret void 4450} 4451 4452define void @v_shuffle_v4i32_v3i32__5_0_3_3(ptr addrspace(1) inreg %ptr) { 4453; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_3_3: 4454; GFX900: ; %bb.0: 4455; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4456; GFX900-NEXT: ;;#ASMSTART 4457; GFX900-NEXT: ; def v[1:3] 4458; GFX900-NEXT: ;;#ASMEND 4459; GFX900-NEXT: v_mov_b32_e32 v6, 0 4460; GFX900-NEXT: ;;#ASMSTART 4461; GFX900-NEXT: ; def v[3:5] 4462; GFX900-NEXT: ;;#ASMEND 4463; GFX900-NEXT: v_mov_b32_e32 v0, v5 4464; GFX900-NEXT: v_mov_b32_e32 v2, v3 4465; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4466; GFX900-NEXT: s_waitcnt vmcnt(0) 4467; GFX900-NEXT: s_setpc_b64 s[30:31] 4468; 4469; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_3_3: 4470; GFX90A: ; %bb.0: 4471; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4472; GFX90A-NEXT: ;;#ASMSTART 4473; GFX90A-NEXT: ; def v[2:4] 4474; GFX90A-NEXT: ;;#ASMEND 4475; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4476; GFX90A-NEXT: ;;#ASMSTART 4477; GFX90A-NEXT: ; def v[4:6] 4478; GFX90A-NEXT: ;;#ASMEND 4479; GFX90A-NEXT: v_mov_b32_e32 v0, v6 4480; GFX90A-NEXT: v_mov_b32_e32 v1, v2 4481; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4482; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4483; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4484; GFX90A-NEXT: s_waitcnt vmcnt(0) 4485; GFX90A-NEXT: s_setpc_b64 s[30:31] 4486; 4487; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_3_3: 4488; GFX940: ; %bb.0: 4489; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4490; GFX940-NEXT: ;;#ASMSTART 4491; GFX940-NEXT: ; def v[2:4] 4492; GFX940-NEXT: ;;#ASMEND 4493; GFX940-NEXT: v_mov_b32_e32 v7, 0 4494; GFX940-NEXT: ;;#ASMSTART 4495; GFX940-NEXT: ; def v[4:6] 4496; GFX940-NEXT: ;;#ASMEND 4497; GFX940-NEXT: v_mov_b32_e32 v1, v2 4498; GFX940-NEXT: v_mov_b32_e32 v0, v6 4499; GFX940-NEXT: v_mov_b32_e32 v2, v4 4500; GFX940-NEXT: v_mov_b32_e32 v3, v4 4501; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 4502; GFX940-NEXT: s_waitcnt vmcnt(0) 4503; GFX940-NEXT: s_setpc_b64 s[30:31] 4504 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4505 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4506 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3> 4507 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4508 ret void 4509} 4510 4511define void @v_shuffle_v4i32_v3i32__5_1_3_3(ptr addrspace(1) inreg %ptr) { 4512; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_3_3: 4513; GFX900: ; %bb.0: 4514; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4515; GFX900-NEXT: ;;#ASMSTART 4516; GFX900-NEXT: ; def v[0:2] 4517; GFX900-NEXT: ;;#ASMEND 4518; GFX900-NEXT: v_mov_b32_e32 v6, 0 4519; GFX900-NEXT: ;;#ASMSTART 4520; GFX900-NEXT: ; def v[3:5] 4521; GFX900-NEXT: ;;#ASMEND 4522; GFX900-NEXT: v_mov_b32_e32 v0, v5 4523; GFX900-NEXT: v_mov_b32_e32 v2, v3 4524; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4525; GFX900-NEXT: s_waitcnt vmcnt(0) 4526; GFX900-NEXT: s_setpc_b64 s[30:31] 4527; 4528; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_3_3: 4529; GFX90A: ; %bb.0: 4530; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4531; GFX90A-NEXT: ;;#ASMSTART 4532; GFX90A-NEXT: ; def v[0:2] 4533; GFX90A-NEXT: ;;#ASMEND 4534; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4535; GFX90A-NEXT: ;;#ASMSTART 4536; GFX90A-NEXT: ; def v[4:6] 4537; GFX90A-NEXT: ;;#ASMEND 4538; GFX90A-NEXT: v_mov_b32_e32 v0, v6 4539; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4540; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4541; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4542; GFX90A-NEXT: s_waitcnt vmcnt(0) 4543; GFX90A-NEXT: s_setpc_b64 s[30:31] 4544; 4545; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_3_3: 4546; GFX940: ; %bb.0: 4547; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4548; GFX940-NEXT: ;;#ASMSTART 4549; GFX940-NEXT: ; def v[0:2] 4550; GFX940-NEXT: ;;#ASMEND 4551; GFX940-NEXT: v_mov_b32_e32 v7, 0 4552; GFX940-NEXT: ;;#ASMSTART 4553; GFX940-NEXT: ; def v[4:6] 4554; GFX940-NEXT: ;;#ASMEND 4555; GFX940-NEXT: s_nop 0 4556; GFX940-NEXT: v_mov_b32_e32 v0, v6 4557; GFX940-NEXT: v_mov_b32_e32 v2, v4 4558; GFX940-NEXT: v_mov_b32_e32 v3, v4 4559; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 4560; GFX940-NEXT: s_waitcnt vmcnt(0) 4561; GFX940-NEXT: s_setpc_b64 s[30:31] 4562 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4563 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4564 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3> 4565 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4566 ret void 4567} 4568 4569define void @v_shuffle_v4i32_v3i32__5_2_3_3(ptr addrspace(1) inreg %ptr) { 4570; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_3_3: 4571; GFX900: ; %bb.0: 4572; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4573; GFX900-NEXT: ;;#ASMSTART 4574; GFX900-NEXT: ; def v[0:2] 4575; GFX900-NEXT: ;;#ASMEND 4576; GFX900-NEXT: v_mov_b32_e32 v6, 0 4577; GFX900-NEXT: ;;#ASMSTART 4578; GFX900-NEXT: ; def v[3:5] 4579; GFX900-NEXT: ;;#ASMEND 4580; GFX900-NEXT: v_mov_b32_e32 v0, v5 4581; GFX900-NEXT: v_mov_b32_e32 v1, v2 4582; GFX900-NEXT: v_mov_b32_e32 v2, v3 4583; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4584; GFX900-NEXT: s_waitcnt vmcnt(0) 4585; GFX900-NEXT: s_setpc_b64 s[30:31] 4586; 4587; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_3_3: 4588; GFX90A: ; %bb.0: 4589; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4590; GFX90A-NEXT: ;;#ASMSTART 4591; GFX90A-NEXT: ; def v[0:2] 4592; GFX90A-NEXT: ;;#ASMEND 4593; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4594; GFX90A-NEXT: ;;#ASMSTART 4595; GFX90A-NEXT: ; def v[4:6] 4596; GFX90A-NEXT: ;;#ASMEND 4597; GFX90A-NEXT: v_mov_b32_e32 v0, v6 4598; GFX90A-NEXT: v_mov_b32_e32 v1, v2 4599; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4600; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4601; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4602; GFX90A-NEXT: s_waitcnt vmcnt(0) 4603; GFX90A-NEXT: s_setpc_b64 s[30:31] 4604; 4605; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_3_3: 4606; GFX940: ; %bb.0: 4607; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4608; GFX940-NEXT: ;;#ASMSTART 4609; GFX940-NEXT: ; def v[0:2] 4610; GFX940-NEXT: ;;#ASMEND 4611; GFX940-NEXT: v_mov_b32_e32 v7, 0 4612; GFX940-NEXT: ;;#ASMSTART 4613; GFX940-NEXT: ; def v[4:6] 4614; GFX940-NEXT: ;;#ASMEND 4615; GFX940-NEXT: v_mov_b32_e32 v1, v2 4616; GFX940-NEXT: v_mov_b32_e32 v0, v6 4617; GFX940-NEXT: v_mov_b32_e32 v2, v4 4618; GFX940-NEXT: v_mov_b32_e32 v3, v4 4619; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 4620; GFX940-NEXT: s_waitcnt vmcnt(0) 4621; GFX940-NEXT: s_setpc_b64 s[30:31] 4622 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4623 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4624 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3> 4625 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4626 ret void 4627} 4628 4629define void @v_shuffle_v4i32_v3i32__5_4_3_3(ptr addrspace(1) inreg %ptr) { 4630; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_3_3: 4631; GFX900: ; %bb.0: 4632; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4633; GFX900-NEXT: v_mov_b32_e32 v6, 0 4634; GFX900-NEXT: ;;#ASMSTART 4635; GFX900-NEXT: ; def v[3:5] 4636; GFX900-NEXT: ;;#ASMEND 4637; GFX900-NEXT: v_mov_b32_e32 v0, v5 4638; GFX900-NEXT: v_mov_b32_e32 v1, v4 4639; GFX900-NEXT: v_mov_b32_e32 v2, v3 4640; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4641; GFX900-NEXT: s_waitcnt vmcnt(0) 4642; GFX900-NEXT: s_setpc_b64 s[30:31] 4643; 4644; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_3_3: 4645; GFX90A: ; %bb.0: 4646; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4647; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4648; GFX90A-NEXT: ;;#ASMSTART 4649; GFX90A-NEXT: ; def v[4:6] 4650; GFX90A-NEXT: ;;#ASMEND 4651; GFX90A-NEXT: v_mov_b32_e32 v0, v6 4652; GFX90A-NEXT: v_mov_b32_e32 v1, v5 4653; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4654; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4655; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4656; GFX90A-NEXT: s_waitcnt vmcnt(0) 4657; GFX90A-NEXT: s_setpc_b64 s[30:31] 4658; 4659; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_3_3: 4660; GFX940: ; %bb.0: 4661; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4662; GFX940-NEXT: v_mov_b32_e32 v7, 0 4663; GFX940-NEXT: ;;#ASMSTART 4664; GFX940-NEXT: ; def v[4:6] 4665; GFX940-NEXT: ;;#ASMEND 4666; GFX940-NEXT: s_nop 0 4667; GFX940-NEXT: v_mov_b32_e32 v0, v6 4668; GFX940-NEXT: v_mov_b32_e32 v1, v5 4669; GFX940-NEXT: v_mov_b32_e32 v2, v4 4670; GFX940-NEXT: v_mov_b32_e32 v3, v4 4671; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 4672; GFX940-NEXT: s_waitcnt vmcnt(0) 4673; GFX940-NEXT: s_setpc_b64 s[30:31] 4674 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4675 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4676 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3> 4677 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4678 ret void 4679} 4680 4681define void @v_shuffle_v4i32_v3i32__5_5_3_3(ptr addrspace(1) inreg %ptr) { 4682; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_3: 4683; GFX900: ; %bb.0: 4684; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4685; GFX900-NEXT: v_mov_b32_e32 v6, 0 4686; GFX900-NEXT: ;;#ASMSTART 4687; GFX900-NEXT: ; def v[3:5] 4688; GFX900-NEXT: ;;#ASMEND 4689; GFX900-NEXT: v_mov_b32_e32 v0, v5 4690; GFX900-NEXT: v_mov_b32_e32 v1, v5 4691; GFX900-NEXT: v_mov_b32_e32 v2, v3 4692; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4693; GFX900-NEXT: s_waitcnt vmcnt(0) 4694; GFX900-NEXT: s_setpc_b64 s[30:31] 4695; 4696; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_3: 4697; GFX90A: ; %bb.0: 4698; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4699; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4700; GFX90A-NEXT: ;;#ASMSTART 4701; GFX90A-NEXT: ; def v[4:6] 4702; GFX90A-NEXT: ;;#ASMEND 4703; GFX90A-NEXT: v_mov_b32_e32 v0, v6 4704; GFX90A-NEXT: v_mov_b32_e32 v1, v6 4705; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4706; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4707; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4708; GFX90A-NEXT: s_waitcnt vmcnt(0) 4709; GFX90A-NEXT: s_setpc_b64 s[30:31] 4710; 4711; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_3: 4712; GFX940: ; %bb.0: 4713; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4714; GFX940-NEXT: v_mov_b32_e32 v7, 0 4715; GFX940-NEXT: ;;#ASMSTART 4716; GFX940-NEXT: ; def v[4:6] 4717; GFX940-NEXT: ;;#ASMEND 4718; GFX940-NEXT: s_nop 0 4719; GFX940-NEXT: v_mov_b32_e32 v0, v6 4720; GFX940-NEXT: v_mov_b32_e32 v1, v6 4721; GFX940-NEXT: v_mov_b32_e32 v2, v4 4722; GFX940-NEXT: v_mov_b32_e32 v3, v4 4723; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 4724; GFX940-NEXT: s_waitcnt vmcnt(0) 4725; GFX940-NEXT: s_setpc_b64 s[30:31] 4726 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4727 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4728 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3> 4729 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4730 ret void 4731} 4732 4733define void @v_shuffle_v4i32_v3i32__5_5_u_3(ptr addrspace(1) inreg %ptr) { 4734; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3: 4735; GFX900: ; %bb.0: 4736; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4737; GFX900-NEXT: ;;#ASMSTART 4738; GFX900-NEXT: ; def v[2:4] 4739; GFX900-NEXT: ;;#ASMEND 4740; GFX900-NEXT: v_mov_b32_e32 v5, 0 4741; GFX900-NEXT: v_mov_b32_e32 v0, v4 4742; GFX900-NEXT: v_mov_b32_e32 v1, v4 4743; GFX900-NEXT: v_mov_b32_e32 v3, v2 4744; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 4745; GFX900-NEXT: s_waitcnt vmcnt(0) 4746; GFX900-NEXT: s_setpc_b64 s[30:31] 4747; 4748; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3: 4749; GFX90A: ; %bb.0: 4750; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4751; GFX90A-NEXT: ;;#ASMSTART 4752; GFX90A-NEXT: ; def v[2:4] 4753; GFX90A-NEXT: ;;#ASMEND 4754; GFX90A-NEXT: v_mov_b32_e32 v5, 0 4755; GFX90A-NEXT: v_mov_b32_e32 v0, v4 4756; GFX90A-NEXT: v_mov_b32_e32 v1, v4 4757; GFX90A-NEXT: v_mov_b32_e32 v3, v2 4758; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 4759; GFX90A-NEXT: s_waitcnt vmcnt(0) 4760; GFX90A-NEXT: s_setpc_b64 s[30:31] 4761; 4762; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3: 4763; GFX940: ; %bb.0: 4764; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4765; GFX940-NEXT: ;;#ASMSTART 4766; GFX940-NEXT: ; def v[2:4] 4767; GFX940-NEXT: ;;#ASMEND 4768; GFX940-NEXT: v_mov_b32_e32 v5, 0 4769; GFX940-NEXT: v_mov_b32_e32 v0, v4 4770; GFX940-NEXT: v_mov_b32_e32 v1, v4 4771; GFX940-NEXT: v_mov_b32_e32 v3, v2 4772; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 4773; GFX940-NEXT: s_waitcnt vmcnt(0) 4774; GFX940-NEXT: s_setpc_b64 s[30:31] 4775 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4776 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4777 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3> 4778 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4779 ret void 4780} 4781 4782define void @v_shuffle_v4i32_v3i32__5_5_0_3(ptr addrspace(1) inreg %ptr) { 4783; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_3: 4784; GFX900: ; %bb.0: 4785; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4786; GFX900-NEXT: ;;#ASMSTART 4787; GFX900-NEXT: ; def v[2:4] 4788; GFX900-NEXT: ;;#ASMEND 4789; GFX900-NEXT: v_mov_b32_e32 v6, 0 4790; GFX900-NEXT: ;;#ASMSTART 4791; GFX900-NEXT: ; def v[3:5] 4792; GFX900-NEXT: ;;#ASMEND 4793; GFX900-NEXT: v_mov_b32_e32 v0, v5 4794; GFX900-NEXT: v_mov_b32_e32 v1, v5 4795; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4796; GFX900-NEXT: s_waitcnt vmcnt(0) 4797; GFX900-NEXT: s_setpc_b64 s[30:31] 4798; 4799; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_3: 4800; GFX90A: ; %bb.0: 4801; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4802; GFX90A-NEXT: ;;#ASMSTART 4803; GFX90A-NEXT: ; def v[2:4] 4804; GFX90A-NEXT: ;;#ASMEND 4805; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4806; GFX90A-NEXT: ;;#ASMSTART 4807; GFX90A-NEXT: ; def v[4:6] 4808; GFX90A-NEXT: ;;#ASMEND 4809; GFX90A-NEXT: v_mov_b32_e32 v0, v6 4810; GFX90A-NEXT: v_mov_b32_e32 v1, v6 4811; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4812; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4813; GFX90A-NEXT: s_waitcnt vmcnt(0) 4814; GFX90A-NEXT: s_setpc_b64 s[30:31] 4815; 4816; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_3: 4817; GFX940: ; %bb.0: 4818; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4819; GFX940-NEXT: ;;#ASMSTART 4820; GFX940-NEXT: ; def v[2:4] 4821; GFX940-NEXT: ;;#ASMEND 4822; GFX940-NEXT: v_mov_b32_e32 v7, 0 4823; GFX940-NEXT: ;;#ASMSTART 4824; GFX940-NEXT: ; def v[4:6] 4825; GFX940-NEXT: ;;#ASMEND 4826; GFX940-NEXT: s_nop 0 4827; GFX940-NEXT: v_mov_b32_e32 v0, v6 4828; GFX940-NEXT: v_mov_b32_e32 v1, v6 4829; GFX940-NEXT: v_mov_b32_e32 v3, v4 4830; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 4831; GFX940-NEXT: s_waitcnt vmcnt(0) 4832; GFX940-NEXT: s_setpc_b64 s[30:31] 4833 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4834 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4835 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3> 4836 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4837 ret void 4838} 4839 4840define void @v_shuffle_v4i32_v3i32__5_5_1_3(ptr addrspace(1) inreg %ptr) { 4841; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_3: 4842; GFX900: ; %bb.0: 4843; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4844; GFX900-NEXT: ;;#ASMSTART 4845; GFX900-NEXT: ; def v[1:3] 4846; GFX900-NEXT: ;;#ASMEND 4847; GFX900-NEXT: v_mov_b32_e32 v6, 0 4848; GFX900-NEXT: ;;#ASMSTART 4849; GFX900-NEXT: ; def v[3:5] 4850; GFX900-NEXT: ;;#ASMEND 4851; GFX900-NEXT: v_mov_b32_e32 v0, v5 4852; GFX900-NEXT: v_mov_b32_e32 v1, v5 4853; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4854; GFX900-NEXT: s_waitcnt vmcnt(0) 4855; GFX900-NEXT: s_setpc_b64 s[30:31] 4856; 4857; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_3: 4858; GFX90A: ; %bb.0: 4859; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4860; GFX90A-NEXT: ;;#ASMSTART 4861; GFX90A-NEXT: ; def v[2:4] 4862; GFX90A-NEXT: ;;#ASMEND 4863; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4864; GFX90A-NEXT: ;;#ASMSTART 4865; GFX90A-NEXT: ; def v[4:6] 4866; GFX90A-NEXT: ;;#ASMEND 4867; GFX90A-NEXT: v_mov_b32_e32 v0, v6 4868; GFX90A-NEXT: v_mov_b32_e32 v1, v6 4869; GFX90A-NEXT: v_mov_b32_e32 v2, v3 4870; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4871; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4872; GFX90A-NEXT: s_waitcnt vmcnt(0) 4873; GFX90A-NEXT: s_setpc_b64 s[30:31] 4874; 4875; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_3: 4876; GFX940: ; %bb.0: 4877; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4878; GFX940-NEXT: ;;#ASMSTART 4879; GFX940-NEXT: ; def v[2:4] 4880; GFX940-NEXT: ;;#ASMEND 4881; GFX940-NEXT: v_mov_b32_e32 v7, 0 4882; GFX940-NEXT: ;;#ASMSTART 4883; GFX940-NEXT: ; def v[4:6] 4884; GFX940-NEXT: ;;#ASMEND 4885; GFX940-NEXT: v_mov_b32_e32 v2, v3 4886; GFX940-NEXT: v_mov_b32_e32 v0, v6 4887; GFX940-NEXT: v_mov_b32_e32 v1, v6 4888; GFX940-NEXT: v_mov_b32_e32 v3, v4 4889; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 4890; GFX940-NEXT: s_waitcnt vmcnt(0) 4891; GFX940-NEXT: s_setpc_b64 s[30:31] 4892 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4893 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4894 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3> 4895 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4896 ret void 4897} 4898 4899define void @v_shuffle_v4i32_v3i32__5_5_2_3(ptr addrspace(1) inreg %ptr) { 4900; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_3: 4901; GFX900: ; %bb.0: 4902; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4903; GFX900-NEXT: ;;#ASMSTART 4904; GFX900-NEXT: ; def v[0:2] 4905; GFX900-NEXT: ;;#ASMEND 4906; GFX900-NEXT: v_mov_b32_e32 v6, 0 4907; GFX900-NEXT: ;;#ASMSTART 4908; GFX900-NEXT: ; def v[3:5] 4909; GFX900-NEXT: ;;#ASMEND 4910; GFX900-NEXT: v_mov_b32_e32 v0, v5 4911; GFX900-NEXT: v_mov_b32_e32 v1, v5 4912; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4913; GFX900-NEXT: s_waitcnt vmcnt(0) 4914; GFX900-NEXT: s_setpc_b64 s[30:31] 4915; 4916; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_3: 4917; GFX90A: ; %bb.0: 4918; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4919; GFX90A-NEXT: ;;#ASMSTART 4920; GFX90A-NEXT: ; def v[0:2] 4921; GFX90A-NEXT: ;;#ASMEND 4922; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4923; GFX90A-NEXT: ;;#ASMSTART 4924; GFX90A-NEXT: ; def v[4:6] 4925; GFX90A-NEXT: ;;#ASMEND 4926; GFX90A-NEXT: v_mov_b32_e32 v0, v6 4927; GFX90A-NEXT: v_mov_b32_e32 v1, v6 4928; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4929; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4930; GFX90A-NEXT: s_waitcnt vmcnt(0) 4931; GFX90A-NEXT: s_setpc_b64 s[30:31] 4932; 4933; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_3: 4934; GFX940: ; %bb.0: 4935; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4936; GFX940-NEXT: ;;#ASMSTART 4937; GFX940-NEXT: ; def v[0:2] 4938; GFX940-NEXT: ;;#ASMEND 4939; GFX940-NEXT: v_mov_b32_e32 v7, 0 4940; GFX940-NEXT: ;;#ASMSTART 4941; GFX940-NEXT: ; def v[4:6] 4942; GFX940-NEXT: ;;#ASMEND 4943; GFX940-NEXT: s_nop 0 4944; GFX940-NEXT: v_mov_b32_e32 v0, v6 4945; GFX940-NEXT: v_mov_b32_e32 v1, v6 4946; GFX940-NEXT: v_mov_b32_e32 v3, v4 4947; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 4948; GFX940-NEXT: s_waitcnt vmcnt(0) 4949; GFX940-NEXT: s_setpc_b64 s[30:31] 4950 %vec0 = call <3 x i32> asm "; def $0", "=v"() 4951 %vec1 = call <3 x i32> asm "; def $0", "=v"() 4952 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3> 4953 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4954 ret void 4955} 4956 4957define void @v_shuffle_v4i32_v3i32__5_5_4_3(ptr addrspace(1) inreg %ptr) { 4958; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_3: 4959; GFX900: ; %bb.0: 4960; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4961; GFX900-NEXT: v_mov_b32_e32 v6, 0 4962; GFX900-NEXT: ;;#ASMSTART 4963; GFX900-NEXT: ; def v[3:5] 4964; GFX900-NEXT: ;;#ASMEND 4965; GFX900-NEXT: v_mov_b32_e32 v0, v5 4966; GFX900-NEXT: v_mov_b32_e32 v1, v5 4967; GFX900-NEXT: v_mov_b32_e32 v2, v4 4968; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 4969; GFX900-NEXT: s_waitcnt vmcnt(0) 4970; GFX900-NEXT: s_setpc_b64 s[30:31] 4971; 4972; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_3: 4973; GFX90A: ; %bb.0: 4974; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4975; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4976; GFX90A-NEXT: ;;#ASMSTART 4977; GFX90A-NEXT: ; def v[4:6] 4978; GFX90A-NEXT: ;;#ASMEND 4979; GFX90A-NEXT: v_mov_b32_e32 v0, v6 4980; GFX90A-NEXT: v_mov_b32_e32 v1, v6 4981; GFX90A-NEXT: v_mov_b32_e32 v2, v5 4982; GFX90A-NEXT: v_mov_b32_e32 v3, v4 4983; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 4984; GFX90A-NEXT: s_waitcnt vmcnt(0) 4985; GFX90A-NEXT: s_setpc_b64 s[30:31] 4986; 4987; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_3: 4988; GFX940: ; %bb.0: 4989; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4990; GFX940-NEXT: v_mov_b32_e32 v7, 0 4991; GFX940-NEXT: ;;#ASMSTART 4992; GFX940-NEXT: ; def v[4:6] 4993; GFX940-NEXT: ;;#ASMEND 4994; GFX940-NEXT: s_nop 0 4995; GFX940-NEXT: v_mov_b32_e32 v0, v6 4996; GFX940-NEXT: v_mov_b32_e32 v1, v6 4997; GFX940-NEXT: v_mov_b32_e32 v2, v5 4998; GFX940-NEXT: v_mov_b32_e32 v3, v4 4999; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 5000; GFX940-NEXT: s_waitcnt vmcnt(0) 5001; GFX940-NEXT: s_setpc_b64 s[30:31] 5002 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5003 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5004 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3> 5005 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5006 ret void 5007} 5008 5009define void @v_shuffle_v4i32_v3i32__u_4_4_4(ptr addrspace(1) inreg %ptr) { 5010; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_4_4_4: 5011; GFX900: ; %bb.0: 5012; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5013; GFX900-NEXT: ;;#ASMSTART 5014; GFX900-NEXT: ; def v[0:2] 5015; GFX900-NEXT: ;;#ASMEND 5016; GFX900-NEXT: v_mov_b32_e32 v4, 0 5017; GFX900-NEXT: v_mov_b32_e32 v2, v1 5018; GFX900-NEXT: v_mov_b32_e32 v3, v1 5019; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5020; GFX900-NEXT: s_waitcnt vmcnt(0) 5021; GFX900-NEXT: s_setpc_b64 s[30:31] 5022; 5023; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_4_4_4: 5024; GFX90A: ; %bb.0: 5025; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5026; GFX90A-NEXT: ;;#ASMSTART 5027; GFX90A-NEXT: ; def v[0:2] 5028; GFX90A-NEXT: ;;#ASMEND 5029; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5030; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5031; GFX90A-NEXT: v_mov_b32_e32 v3, v1 5032; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5033; GFX90A-NEXT: s_waitcnt vmcnt(0) 5034; GFX90A-NEXT: s_setpc_b64 s[30:31] 5035; 5036; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_4_4_4: 5037; GFX940: ; %bb.0: 5038; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5039; GFX940-NEXT: ;;#ASMSTART 5040; GFX940-NEXT: ; def v[0:2] 5041; GFX940-NEXT: ;;#ASMEND 5042; GFX940-NEXT: v_mov_b32_e32 v4, 0 5043; GFX940-NEXT: v_mov_b32_e32 v2, v1 5044; GFX940-NEXT: v_mov_b32_e32 v3, v1 5045; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 5046; GFX940-NEXT: s_waitcnt vmcnt(0) 5047; GFX940-NEXT: s_setpc_b64 s[30:31] 5048 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5049 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5050 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4> 5051 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5052 ret void 5053} 5054 5055define void @v_shuffle_v4i32_v3i32__0_4_4_4(ptr addrspace(1) inreg %ptr) { 5056; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_4_4_4: 5057; GFX900: ; %bb.0: 5058; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5059; GFX900-NEXT: ;;#ASMSTART 5060; GFX900-NEXT: ; def v[0:2] 5061; GFX900-NEXT: ;;#ASMEND 5062; GFX900-NEXT: ;;#ASMSTART 5063; GFX900-NEXT: ; def v[1:3] 5064; GFX900-NEXT: ;;#ASMEND 5065; GFX900-NEXT: v_mov_b32_e32 v4, 0 5066; GFX900-NEXT: v_mov_b32_e32 v1, v2 5067; GFX900-NEXT: v_mov_b32_e32 v3, v2 5068; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5069; GFX900-NEXT: s_waitcnt vmcnt(0) 5070; GFX900-NEXT: s_setpc_b64 s[30:31] 5071; 5072; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_4_4_4: 5073; GFX90A: ; %bb.0: 5074; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5075; GFX90A-NEXT: ;;#ASMSTART 5076; GFX90A-NEXT: ; def v[0:2] 5077; GFX90A-NEXT: ;;#ASMEND 5078; GFX90A-NEXT: ;;#ASMSTART 5079; GFX90A-NEXT: ; def v[2:4] 5080; GFX90A-NEXT: ;;#ASMEND 5081; GFX90A-NEXT: v_mov_b32_e32 v5, 0 5082; GFX90A-NEXT: v_mov_b32_e32 v1, v3 5083; GFX90A-NEXT: v_mov_b32_e32 v2, v3 5084; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5085; GFX90A-NEXT: s_waitcnt vmcnt(0) 5086; GFX90A-NEXT: s_setpc_b64 s[30:31] 5087; 5088; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_4_4_4: 5089; GFX940: ; %bb.0: 5090; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5091; GFX940-NEXT: ;;#ASMSTART 5092; GFX940-NEXT: ; def v[0:2] 5093; GFX940-NEXT: ;;#ASMEND 5094; GFX940-NEXT: v_mov_b32_e32 v5, 0 5095; GFX940-NEXT: ;;#ASMSTART 5096; GFX940-NEXT: ; def v[2:4] 5097; GFX940-NEXT: ;;#ASMEND 5098; GFX940-NEXT: s_nop 0 5099; GFX940-NEXT: v_mov_b32_e32 v1, v3 5100; GFX940-NEXT: v_mov_b32_e32 v2, v3 5101; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 5102; GFX940-NEXT: s_waitcnt vmcnt(0) 5103; GFX940-NEXT: s_setpc_b64 s[30:31] 5104 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5105 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5106 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 5107 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5108 ret void 5109} 5110 5111define void @v_shuffle_v4i32_v3i32__1_4_4_4(ptr addrspace(1) inreg %ptr) { 5112; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_4_4_4: 5113; GFX900: ; %bb.0: 5114; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5115; GFX900-NEXT: ;;#ASMSTART 5116; GFX900-NEXT: ; def v[2:4] 5117; GFX900-NEXT: ;;#ASMEND 5118; GFX900-NEXT: ;;#ASMSTART 5119; GFX900-NEXT: ; def v[0:2] 5120; GFX900-NEXT: ;;#ASMEND 5121; GFX900-NEXT: v_mov_b32_e32 v5, 0 5122; GFX900-NEXT: v_mov_b32_e32 v0, v3 5123; GFX900-NEXT: v_mov_b32_e32 v2, v1 5124; GFX900-NEXT: v_mov_b32_e32 v3, v1 5125; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5126; GFX900-NEXT: s_waitcnt vmcnt(0) 5127; GFX900-NEXT: s_setpc_b64 s[30:31] 5128; 5129; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_4_4_4: 5130; GFX90A: ; %bb.0: 5131; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5132; GFX90A-NEXT: ;;#ASMSTART 5133; GFX90A-NEXT: ; def v[2:4] 5134; GFX90A-NEXT: ;;#ASMEND 5135; GFX90A-NEXT: ;;#ASMSTART 5136; GFX90A-NEXT: ; def v[0:2] 5137; GFX90A-NEXT: ;;#ASMEND 5138; GFX90A-NEXT: v_mov_b32_e32 v5, 0 5139; GFX90A-NEXT: v_mov_b32_e32 v0, v3 5140; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5141; GFX90A-NEXT: v_mov_b32_e32 v3, v1 5142; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5143; GFX90A-NEXT: s_waitcnt vmcnt(0) 5144; GFX90A-NEXT: s_setpc_b64 s[30:31] 5145; 5146; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_4_4_4: 5147; GFX940: ; %bb.0: 5148; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5149; GFX940-NEXT: ;;#ASMSTART 5150; GFX940-NEXT: ; def v[2:4] 5151; GFX940-NEXT: ;;#ASMEND 5152; GFX940-NEXT: v_mov_b32_e32 v5, 0 5153; GFX940-NEXT: ;;#ASMSTART 5154; GFX940-NEXT: ; def v[0:2] 5155; GFX940-NEXT: ;;#ASMEND 5156; GFX940-NEXT: s_nop 0 5157; GFX940-NEXT: v_mov_b32_e32 v0, v3 5158; GFX940-NEXT: v_mov_b32_e32 v2, v1 5159; GFX940-NEXT: v_mov_b32_e32 v3, v1 5160; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 5161; GFX940-NEXT: s_waitcnt vmcnt(0) 5162; GFX940-NEXT: s_setpc_b64 s[30:31] 5163 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5164 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5165 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4> 5166 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5167 ret void 5168} 5169 5170define void @v_shuffle_v4i32_v3i32__2_4_4_4(ptr addrspace(1) inreg %ptr) { 5171; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_4_4_4: 5172; GFX900: ; %bb.0: 5173; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5174; GFX900-NEXT: ;;#ASMSTART 5175; GFX900-NEXT: ; def v[1:3] 5176; GFX900-NEXT: ;;#ASMEND 5177; GFX900-NEXT: ;;#ASMSTART 5178; GFX900-NEXT: ; def v[0:2] 5179; GFX900-NEXT: ;;#ASMEND 5180; GFX900-NEXT: v_mov_b32_e32 v4, 0 5181; GFX900-NEXT: v_mov_b32_e32 v0, v3 5182; GFX900-NEXT: v_mov_b32_e32 v2, v1 5183; GFX900-NEXT: v_mov_b32_e32 v3, v1 5184; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5185; GFX900-NEXT: s_waitcnt vmcnt(0) 5186; GFX900-NEXT: s_setpc_b64 s[30:31] 5187; 5188; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_4_4_4: 5189; GFX90A: ; %bb.0: 5190; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5191; GFX90A-NEXT: ;;#ASMSTART 5192; GFX90A-NEXT: ; def v[2:4] 5193; GFX90A-NEXT: ;;#ASMEND 5194; GFX90A-NEXT: ;;#ASMSTART 5195; GFX90A-NEXT: ; def v[0:2] 5196; GFX90A-NEXT: ;;#ASMEND 5197; GFX90A-NEXT: v_mov_b32_e32 v5, 0 5198; GFX90A-NEXT: v_mov_b32_e32 v0, v4 5199; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5200; GFX90A-NEXT: v_mov_b32_e32 v3, v1 5201; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5202; GFX90A-NEXT: s_waitcnt vmcnt(0) 5203; GFX90A-NEXT: s_setpc_b64 s[30:31] 5204; 5205; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_4_4_4: 5206; GFX940: ; %bb.0: 5207; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5208; GFX940-NEXT: ;;#ASMSTART 5209; GFX940-NEXT: ; def v[2:4] 5210; GFX940-NEXT: ;;#ASMEND 5211; GFX940-NEXT: v_mov_b32_e32 v5, 0 5212; GFX940-NEXT: ;;#ASMSTART 5213; GFX940-NEXT: ; def v[0:2] 5214; GFX940-NEXT: ;;#ASMEND 5215; GFX940-NEXT: s_nop 0 5216; GFX940-NEXT: v_mov_b32_e32 v0, v4 5217; GFX940-NEXT: v_mov_b32_e32 v2, v1 5218; GFX940-NEXT: v_mov_b32_e32 v3, v1 5219; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 5220; GFX940-NEXT: s_waitcnt vmcnt(0) 5221; GFX940-NEXT: s_setpc_b64 s[30:31] 5222 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5223 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5224 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4> 5225 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5226 ret void 5227} 5228 5229define void @v_shuffle_v4i32_v3i32__3_4_4_4(ptr addrspace(1) inreg %ptr) { 5230; GFX900-LABEL: v_shuffle_v4i32_v3i32__3_4_4_4: 5231; GFX900: ; %bb.0: 5232; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5233; GFX900-NEXT: ;;#ASMSTART 5234; GFX900-NEXT: ; def v[0:2] 5235; GFX900-NEXT: ;;#ASMEND 5236; GFX900-NEXT: v_mov_b32_e32 v4, 0 5237; GFX900-NEXT: v_mov_b32_e32 v2, v1 5238; GFX900-NEXT: v_mov_b32_e32 v3, v1 5239; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5240; GFX900-NEXT: s_waitcnt vmcnt(0) 5241; GFX900-NEXT: s_setpc_b64 s[30:31] 5242; 5243; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_4_4_4: 5244; GFX90A: ; %bb.0: 5245; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5246; GFX90A-NEXT: ;;#ASMSTART 5247; GFX90A-NEXT: ; def v[0:2] 5248; GFX90A-NEXT: ;;#ASMEND 5249; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5250; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5251; GFX90A-NEXT: v_mov_b32_e32 v3, v1 5252; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5253; GFX90A-NEXT: s_waitcnt vmcnt(0) 5254; GFX90A-NEXT: s_setpc_b64 s[30:31] 5255; 5256; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_4_4_4: 5257; GFX940: ; %bb.0: 5258; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5259; GFX940-NEXT: ;;#ASMSTART 5260; GFX940-NEXT: ; def v[0:2] 5261; GFX940-NEXT: ;;#ASMEND 5262; GFX940-NEXT: v_mov_b32_e32 v4, 0 5263; GFX940-NEXT: v_mov_b32_e32 v2, v1 5264; GFX940-NEXT: v_mov_b32_e32 v3, v1 5265; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 5266; GFX940-NEXT: s_waitcnt vmcnt(0) 5267; GFX940-NEXT: s_setpc_b64 s[30:31] 5268 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5269 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5270 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4> 5271 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5272 ret void 5273} 5274 5275define void @v_shuffle_v4i32_v3i32__4_4_4_4(ptr addrspace(1) inreg %ptr) { 5276; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_4_4_4: 5277; GFX900: ; %bb.0: 5278; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5279; GFX900-NEXT: ;;#ASMSTART 5280; GFX900-NEXT: ; def v[0:2] 5281; GFX900-NEXT: ;;#ASMEND 5282; GFX900-NEXT: v_mov_b32_e32 v4, 0 5283; GFX900-NEXT: v_mov_b32_e32 v0, v1 5284; GFX900-NEXT: v_mov_b32_e32 v2, v1 5285; GFX900-NEXT: v_mov_b32_e32 v3, v1 5286; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5287; GFX900-NEXT: s_waitcnt vmcnt(0) 5288; GFX900-NEXT: s_setpc_b64 s[30:31] 5289; 5290; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_4_4_4: 5291; GFX90A: ; %bb.0: 5292; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5293; GFX90A-NEXT: ;;#ASMSTART 5294; GFX90A-NEXT: ; def v[0:2] 5295; GFX90A-NEXT: ;;#ASMEND 5296; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5297; GFX90A-NEXT: v_mov_b32_e32 v0, v1 5298; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5299; GFX90A-NEXT: v_mov_b32_e32 v3, v1 5300; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5301; GFX90A-NEXT: s_waitcnt vmcnt(0) 5302; GFX90A-NEXT: s_setpc_b64 s[30:31] 5303; 5304; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_4_4_4: 5305; GFX940: ; %bb.0: 5306; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5307; GFX940-NEXT: ;;#ASMSTART 5308; GFX940-NEXT: ; def v[0:2] 5309; GFX940-NEXT: ;;#ASMEND 5310; GFX940-NEXT: v_mov_b32_e32 v4, 0 5311; GFX940-NEXT: v_mov_b32_e32 v0, v1 5312; GFX940-NEXT: v_mov_b32_e32 v2, v1 5313; GFX940-NEXT: v_mov_b32_e32 v3, v1 5314; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 5315; GFX940-NEXT: s_waitcnt vmcnt(0) 5316; GFX940-NEXT: s_setpc_b64 s[30:31] 5317 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5318 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5319 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4> 5320 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5321 ret void 5322} 5323 5324define void @v_shuffle_v4i32_v3i32__5_4_4_4(ptr addrspace(1) inreg %ptr) { 5325; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_4_4: 5326; GFX900: ; %bb.0: 5327; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5328; GFX900-NEXT: ;;#ASMSTART 5329; GFX900-NEXT: ; def v[0:2] 5330; GFX900-NEXT: ;;#ASMEND 5331; GFX900-NEXT: v_mov_b32_e32 v4, 0 5332; GFX900-NEXT: v_mov_b32_e32 v0, v2 5333; GFX900-NEXT: v_mov_b32_e32 v2, v1 5334; GFX900-NEXT: v_mov_b32_e32 v3, v1 5335; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5336; GFX900-NEXT: s_waitcnt vmcnt(0) 5337; GFX900-NEXT: s_setpc_b64 s[30:31] 5338; 5339; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_4_4: 5340; GFX90A: ; %bb.0: 5341; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5342; GFX90A-NEXT: ;;#ASMSTART 5343; GFX90A-NEXT: ; def v[0:2] 5344; GFX90A-NEXT: ;;#ASMEND 5345; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5346; GFX90A-NEXT: v_mov_b32_e32 v0, v2 5347; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5348; GFX90A-NEXT: v_mov_b32_e32 v3, v1 5349; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5350; GFX90A-NEXT: s_waitcnt vmcnt(0) 5351; GFX90A-NEXT: s_setpc_b64 s[30:31] 5352; 5353; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_4_4: 5354; GFX940: ; %bb.0: 5355; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5356; GFX940-NEXT: ;;#ASMSTART 5357; GFX940-NEXT: ; def v[0:2] 5358; GFX940-NEXT: ;;#ASMEND 5359; GFX940-NEXT: v_mov_b32_e32 v4, 0 5360; GFX940-NEXT: v_mov_b32_e32 v0, v2 5361; GFX940-NEXT: v_mov_b32_e32 v2, v1 5362; GFX940-NEXT: v_mov_b32_e32 v3, v1 5363; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 5364; GFX940-NEXT: s_waitcnt vmcnt(0) 5365; GFX940-NEXT: s_setpc_b64 s[30:31] 5366 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5367 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5368 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4> 5369 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5370 ret void 5371} 5372 5373define void @v_shuffle_v4i32_v3i32__5_u_4_4(ptr addrspace(1) inreg %ptr) { 5374; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_4_4: 5375; GFX900: ; %bb.0: 5376; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5377; GFX900-NEXT: ;;#ASMSTART 5378; GFX900-NEXT: ; def v[1:3] 5379; GFX900-NEXT: ;;#ASMEND 5380; GFX900-NEXT: v_mov_b32_e32 v4, 0 5381; GFX900-NEXT: v_mov_b32_e32 v0, v3 5382; GFX900-NEXT: v_mov_b32_e32 v3, v2 5383; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5384; GFX900-NEXT: s_waitcnt vmcnt(0) 5385; GFX900-NEXT: s_setpc_b64 s[30:31] 5386; 5387; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_4_4: 5388; GFX90A: ; %bb.0: 5389; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5390; GFX90A-NEXT: ;;#ASMSTART 5391; GFX90A-NEXT: ; def v[0:2] 5392; GFX90A-NEXT: ;;#ASMEND 5393; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5394; GFX90A-NEXT: v_mov_b32_e32 v0, v2 5395; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5396; GFX90A-NEXT: v_mov_b32_e32 v3, v1 5397; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5398; GFX90A-NEXT: s_waitcnt vmcnt(0) 5399; GFX90A-NEXT: s_setpc_b64 s[30:31] 5400; 5401; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_4_4: 5402; GFX940: ; %bb.0: 5403; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5404; GFX940-NEXT: ;;#ASMSTART 5405; GFX940-NEXT: ; def v[0:2] 5406; GFX940-NEXT: ;;#ASMEND 5407; GFX940-NEXT: v_mov_b32_e32 v4, 0 5408; GFX940-NEXT: v_mov_b32_e32 v0, v2 5409; GFX940-NEXT: v_mov_b32_e32 v2, v1 5410; GFX940-NEXT: v_mov_b32_e32 v3, v1 5411; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 5412; GFX940-NEXT: s_waitcnt vmcnt(0) 5413; GFX940-NEXT: s_setpc_b64 s[30:31] 5414 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5415 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5416 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4> 5417 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5418 ret void 5419} 5420 5421define void @v_shuffle_v4i32_v3i32__5_0_4_4(ptr addrspace(1) inreg %ptr) { 5422; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_4_4: 5423; GFX900: ; %bb.0: 5424; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5425; GFX900-NEXT: ;;#ASMSTART 5426; GFX900-NEXT: ; def v[1:3] 5427; GFX900-NEXT: ;;#ASMEND 5428; GFX900-NEXT: ;;#ASMSTART 5429; GFX900-NEXT: ; def v[2:4] 5430; GFX900-NEXT: ;;#ASMEND 5431; GFX900-NEXT: v_mov_b32_e32 v5, 0 5432; GFX900-NEXT: v_mov_b32_e32 v0, v4 5433; GFX900-NEXT: v_mov_b32_e32 v2, v3 5434; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5435; GFX900-NEXT: s_waitcnt vmcnt(0) 5436; GFX900-NEXT: s_setpc_b64 s[30:31] 5437; 5438; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_4_4: 5439; GFX90A: ; %bb.0: 5440; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5441; GFX90A-NEXT: ;;#ASMSTART 5442; GFX90A-NEXT: ; def v[2:4] 5443; GFX90A-NEXT: ;;#ASMEND 5444; GFX90A-NEXT: v_mov_b32_e32 v7, 0 5445; GFX90A-NEXT: ;;#ASMSTART 5446; GFX90A-NEXT: ; def v[4:6] 5447; GFX90A-NEXT: ;;#ASMEND 5448; GFX90A-NEXT: v_mov_b32_e32 v0, v6 5449; GFX90A-NEXT: v_mov_b32_e32 v1, v2 5450; GFX90A-NEXT: v_mov_b32_e32 v2, v5 5451; GFX90A-NEXT: v_mov_b32_e32 v3, v5 5452; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 5453; GFX90A-NEXT: s_waitcnt vmcnt(0) 5454; GFX90A-NEXT: s_setpc_b64 s[30:31] 5455; 5456; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_4_4: 5457; GFX940: ; %bb.0: 5458; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5459; GFX940-NEXT: ;;#ASMSTART 5460; GFX940-NEXT: ; def v[2:4] 5461; GFX940-NEXT: ;;#ASMEND 5462; GFX940-NEXT: v_mov_b32_e32 v7, 0 5463; GFX940-NEXT: ;;#ASMSTART 5464; GFX940-NEXT: ; def v[4:6] 5465; GFX940-NEXT: ;;#ASMEND 5466; GFX940-NEXT: v_mov_b32_e32 v1, v2 5467; GFX940-NEXT: v_mov_b32_e32 v0, v6 5468; GFX940-NEXT: v_mov_b32_e32 v2, v5 5469; GFX940-NEXT: v_mov_b32_e32 v3, v5 5470; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 5471; GFX940-NEXT: s_waitcnt vmcnt(0) 5472; GFX940-NEXT: s_setpc_b64 s[30:31] 5473 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5474 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5475 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4> 5476 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5477 ret void 5478} 5479 5480define void @v_shuffle_v4i32_v3i32__5_1_4_4(ptr addrspace(1) inreg %ptr) { 5481; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_4_4: 5482; GFX900: ; %bb.0: 5483; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5484; GFX900-NEXT: ;;#ASMSTART 5485; GFX900-NEXT: ; def v[0:2] 5486; GFX900-NEXT: ;;#ASMEND 5487; GFX900-NEXT: ;;#ASMSTART 5488; GFX900-NEXT: ; def v[2:4] 5489; GFX900-NEXT: ;;#ASMEND 5490; GFX900-NEXT: v_mov_b32_e32 v5, 0 5491; GFX900-NEXT: v_mov_b32_e32 v0, v4 5492; GFX900-NEXT: v_mov_b32_e32 v2, v3 5493; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5494; GFX900-NEXT: s_waitcnt vmcnt(0) 5495; GFX900-NEXT: s_setpc_b64 s[30:31] 5496; 5497; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_4_4: 5498; GFX90A: ; %bb.0: 5499; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5500; GFX90A-NEXT: ;;#ASMSTART 5501; GFX90A-NEXT: ; def v[0:2] 5502; GFX90A-NEXT: ;;#ASMEND 5503; GFX90A-NEXT: ;;#ASMSTART 5504; GFX90A-NEXT: ; def v[2:4] 5505; GFX90A-NEXT: ;;#ASMEND 5506; GFX90A-NEXT: v_mov_b32_e32 v5, 0 5507; GFX90A-NEXT: v_mov_b32_e32 v0, v4 5508; GFX90A-NEXT: v_mov_b32_e32 v2, v3 5509; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5510; GFX90A-NEXT: s_waitcnt vmcnt(0) 5511; GFX90A-NEXT: s_setpc_b64 s[30:31] 5512; 5513; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_4_4: 5514; GFX940: ; %bb.0: 5515; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5516; GFX940-NEXT: ;;#ASMSTART 5517; GFX940-NEXT: ; def v[0:2] 5518; GFX940-NEXT: ;;#ASMEND 5519; GFX940-NEXT: v_mov_b32_e32 v5, 0 5520; GFX940-NEXT: ;;#ASMSTART 5521; GFX940-NEXT: ; def v[2:4] 5522; GFX940-NEXT: ;;#ASMEND 5523; GFX940-NEXT: s_nop 0 5524; GFX940-NEXT: v_mov_b32_e32 v0, v4 5525; GFX940-NEXT: v_mov_b32_e32 v2, v3 5526; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 5527; GFX940-NEXT: s_waitcnt vmcnt(0) 5528; GFX940-NEXT: s_setpc_b64 s[30:31] 5529 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5530 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5531 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4> 5532 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5533 ret void 5534} 5535 5536define void @v_shuffle_v4i32_v3i32__5_2_4_4(ptr addrspace(1) inreg %ptr) { 5537; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_4_4: 5538; GFX900: ; %bb.0: 5539; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5540; GFX900-NEXT: ;;#ASMSTART 5541; GFX900-NEXT: ; def v[2:4] 5542; GFX900-NEXT: ;;#ASMEND 5543; GFX900-NEXT: ;;#ASMSTART 5544; GFX900-NEXT: ; def v[1:3] 5545; GFX900-NEXT: ;;#ASMEND 5546; GFX900-NEXT: v_mov_b32_e32 v5, 0 5547; GFX900-NEXT: v_mov_b32_e32 v0, v3 5548; GFX900-NEXT: v_mov_b32_e32 v1, v4 5549; GFX900-NEXT: v_mov_b32_e32 v3, v2 5550; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5551; GFX900-NEXT: s_waitcnt vmcnt(0) 5552; GFX900-NEXT: s_setpc_b64 s[30:31] 5553; 5554; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_4_4: 5555; GFX90A: ; %bb.0: 5556; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5557; GFX90A-NEXT: ;;#ASMSTART 5558; GFX90A-NEXT: ; def v[0:2] 5559; GFX90A-NEXT: ;;#ASMEND 5560; GFX90A-NEXT: v_mov_b32_e32 v7, 0 5561; GFX90A-NEXT: ;;#ASMSTART 5562; GFX90A-NEXT: ; def v[4:6] 5563; GFX90A-NEXT: ;;#ASMEND 5564; GFX90A-NEXT: v_mov_b32_e32 v0, v6 5565; GFX90A-NEXT: v_mov_b32_e32 v1, v2 5566; GFX90A-NEXT: v_mov_b32_e32 v2, v5 5567; GFX90A-NEXT: v_mov_b32_e32 v3, v5 5568; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 5569; GFX90A-NEXT: s_waitcnt vmcnt(0) 5570; GFX90A-NEXT: s_setpc_b64 s[30:31] 5571; 5572; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_4_4: 5573; GFX940: ; %bb.0: 5574; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5575; GFX940-NEXT: ;;#ASMSTART 5576; GFX940-NEXT: ; def v[0:2] 5577; GFX940-NEXT: ;;#ASMEND 5578; GFX940-NEXT: v_mov_b32_e32 v7, 0 5579; GFX940-NEXT: ;;#ASMSTART 5580; GFX940-NEXT: ; def v[4:6] 5581; GFX940-NEXT: ;;#ASMEND 5582; GFX940-NEXT: v_mov_b32_e32 v1, v2 5583; GFX940-NEXT: v_mov_b32_e32 v0, v6 5584; GFX940-NEXT: v_mov_b32_e32 v2, v5 5585; GFX940-NEXT: v_mov_b32_e32 v3, v5 5586; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 5587; GFX940-NEXT: s_waitcnt vmcnt(0) 5588; GFX940-NEXT: s_setpc_b64 s[30:31] 5589 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5590 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5591 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4> 5592 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5593 ret void 5594} 5595 5596define void @v_shuffle_v4i32_v3i32__5_3_4_4(ptr addrspace(1) inreg %ptr) { 5597; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_4_4: 5598; GFX900: ; %bb.0: 5599; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5600; GFX900-NEXT: ;;#ASMSTART 5601; GFX900-NEXT: ; def v[1:3] 5602; GFX900-NEXT: ;;#ASMEND 5603; GFX900-NEXT: v_mov_b32_e32 v4, 0 5604; GFX900-NEXT: v_mov_b32_e32 v0, v3 5605; GFX900-NEXT: v_mov_b32_e32 v3, v2 5606; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5607; GFX900-NEXT: s_waitcnt vmcnt(0) 5608; GFX900-NEXT: s_setpc_b64 s[30:31] 5609; 5610; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_4_4: 5611; GFX90A: ; %bb.0: 5612; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5613; GFX90A-NEXT: ;;#ASMSTART 5614; GFX90A-NEXT: ; def v[2:4] 5615; GFX90A-NEXT: ;;#ASMEND 5616; GFX90A-NEXT: v_mov_b32_e32 v5, 0 5617; GFX90A-NEXT: v_mov_b32_e32 v0, v4 5618; GFX90A-NEXT: v_mov_b32_e32 v1, v2 5619; GFX90A-NEXT: v_mov_b32_e32 v2, v3 5620; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5621; GFX90A-NEXT: s_waitcnt vmcnt(0) 5622; GFX90A-NEXT: s_setpc_b64 s[30:31] 5623; 5624; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_4_4: 5625; GFX940: ; %bb.0: 5626; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5627; GFX940-NEXT: ;;#ASMSTART 5628; GFX940-NEXT: ; def v[2:4] 5629; GFX940-NEXT: ;;#ASMEND 5630; GFX940-NEXT: v_mov_b32_e32 v5, 0 5631; GFX940-NEXT: v_mov_b32_e32 v0, v4 5632; GFX940-NEXT: v_mov_b32_e32 v1, v2 5633; GFX940-NEXT: v_mov_b32_e32 v2, v3 5634; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 5635; GFX940-NEXT: s_waitcnt vmcnt(0) 5636; GFX940-NEXT: s_setpc_b64 s[30:31] 5637 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5638 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5639 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4> 5640 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5641 ret void 5642} 5643 5644define void @v_shuffle_v4i32_v3i32__5_5_4_4(ptr addrspace(1) inreg %ptr) { 5645; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_4: 5646; GFX900: ; %bb.0: 5647; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5648; GFX900-NEXT: ;;#ASMSTART 5649; GFX900-NEXT: ; def v[1:3] 5650; GFX900-NEXT: ;;#ASMEND 5651; GFX900-NEXT: v_mov_b32_e32 v4, 0 5652; GFX900-NEXT: v_mov_b32_e32 v0, v3 5653; GFX900-NEXT: v_mov_b32_e32 v1, v3 5654; GFX900-NEXT: v_mov_b32_e32 v3, v2 5655; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5656; GFX900-NEXT: s_waitcnt vmcnt(0) 5657; GFX900-NEXT: s_setpc_b64 s[30:31] 5658; 5659; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_4: 5660; GFX90A: ; %bb.0: 5661; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5662; GFX90A-NEXT: ;;#ASMSTART 5663; GFX90A-NEXT: ; def v[2:4] 5664; GFX90A-NEXT: ;;#ASMEND 5665; GFX90A-NEXT: v_mov_b32_e32 v5, 0 5666; GFX90A-NEXT: v_mov_b32_e32 v0, v4 5667; GFX90A-NEXT: v_mov_b32_e32 v1, v4 5668; GFX90A-NEXT: v_mov_b32_e32 v2, v3 5669; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5670; GFX90A-NEXT: s_waitcnt vmcnt(0) 5671; GFX90A-NEXT: s_setpc_b64 s[30:31] 5672; 5673; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_4: 5674; GFX940: ; %bb.0: 5675; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5676; GFX940-NEXT: ;;#ASMSTART 5677; GFX940-NEXT: ; def v[2:4] 5678; GFX940-NEXT: ;;#ASMEND 5679; GFX940-NEXT: v_mov_b32_e32 v5, 0 5680; GFX940-NEXT: v_mov_b32_e32 v0, v4 5681; GFX940-NEXT: v_mov_b32_e32 v1, v4 5682; GFX940-NEXT: v_mov_b32_e32 v2, v3 5683; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 5684; GFX940-NEXT: s_waitcnt vmcnt(0) 5685; GFX940-NEXT: s_setpc_b64 s[30:31] 5686 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5687 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5688 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4> 5689 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5690 ret void 5691} 5692 5693define void @v_shuffle_v4i32_v3i32__5_5_u_4(ptr addrspace(1) inreg %ptr) { 5694; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4: 5695; GFX900: ; %bb.0: 5696; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5697; GFX900-NEXT: ;;#ASMSTART 5698; GFX900-NEXT: ; def v[1:3] 5699; GFX900-NEXT: ;;#ASMEND 5700; GFX900-NEXT: v_mov_b32_e32 v4, 0 5701; GFX900-NEXT: v_mov_b32_e32 v0, v3 5702; GFX900-NEXT: v_mov_b32_e32 v1, v3 5703; GFX900-NEXT: v_mov_b32_e32 v3, v2 5704; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5705; GFX900-NEXT: s_waitcnt vmcnt(0) 5706; GFX900-NEXT: s_setpc_b64 s[30:31] 5707; 5708; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4: 5709; GFX90A: ; %bb.0: 5710; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5711; GFX90A-NEXT: v_mov_b32_e32 v5, 0 5712; GFX90A-NEXT: ;;#ASMSTART 5713; GFX90A-NEXT: ; def v[2:4] 5714; GFX90A-NEXT: ;;#ASMEND 5715; GFX90A-NEXT: v_mov_b32_e32 v0, v4 5716; GFX90A-NEXT: v_mov_b32_e32 v1, v4 5717; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5718; GFX90A-NEXT: s_waitcnt vmcnt(0) 5719; GFX90A-NEXT: s_setpc_b64 s[30:31] 5720; 5721; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4: 5722; GFX940: ; %bb.0: 5723; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5724; GFX940-NEXT: v_mov_b32_e32 v5, 0 5725; GFX940-NEXT: ;;#ASMSTART 5726; GFX940-NEXT: ; def v[2:4] 5727; GFX940-NEXT: ;;#ASMEND 5728; GFX940-NEXT: s_nop 0 5729; GFX940-NEXT: v_mov_b32_e32 v0, v4 5730; GFX940-NEXT: v_mov_b32_e32 v1, v4 5731; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 5732; GFX940-NEXT: s_waitcnt vmcnt(0) 5733; GFX940-NEXT: s_setpc_b64 s[30:31] 5734 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5735 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5736 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4> 5737 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5738 ret void 5739} 5740 5741define void @v_shuffle_v4i32_v3i32__5_5_0_4(ptr addrspace(1) inreg %ptr) { 5742; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_4: 5743; GFX900: ; %bb.0: 5744; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5745; GFX900-NEXT: ;;#ASMSTART 5746; GFX900-NEXT: ; def v[2:4] 5747; GFX900-NEXT: ;;#ASMEND 5748; GFX900-NEXT: ;;#ASMSTART 5749; GFX900-NEXT: ; def v[3:5] 5750; GFX900-NEXT: ;;#ASMEND 5751; GFX900-NEXT: v_mov_b32_e32 v6, 0 5752; GFX900-NEXT: v_mov_b32_e32 v0, v5 5753; GFX900-NEXT: v_mov_b32_e32 v1, v5 5754; GFX900-NEXT: v_mov_b32_e32 v3, v4 5755; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 5756; GFX900-NEXT: s_waitcnt vmcnt(0) 5757; GFX900-NEXT: s_setpc_b64 s[30:31] 5758; 5759; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_4: 5760; GFX90A: ; %bb.0: 5761; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5762; GFX90A-NEXT: ;;#ASMSTART 5763; GFX90A-NEXT: ; def v[2:4] 5764; GFX90A-NEXT: ;;#ASMEND 5765; GFX90A-NEXT: v_mov_b32_e32 v7, 0 5766; GFX90A-NEXT: ;;#ASMSTART 5767; GFX90A-NEXT: ; def v[4:6] 5768; GFX90A-NEXT: ;;#ASMEND 5769; GFX90A-NEXT: v_mov_b32_e32 v0, v6 5770; GFX90A-NEXT: v_mov_b32_e32 v1, v6 5771; GFX90A-NEXT: v_mov_b32_e32 v3, v5 5772; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 5773; GFX90A-NEXT: s_waitcnt vmcnt(0) 5774; GFX90A-NEXT: s_setpc_b64 s[30:31] 5775; 5776; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_4: 5777; GFX940: ; %bb.0: 5778; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5779; GFX940-NEXT: ;;#ASMSTART 5780; GFX940-NEXT: ; def v[2:4] 5781; GFX940-NEXT: ;;#ASMEND 5782; GFX940-NEXT: v_mov_b32_e32 v7, 0 5783; GFX940-NEXT: ;;#ASMSTART 5784; GFX940-NEXT: ; def v[4:6] 5785; GFX940-NEXT: ;;#ASMEND 5786; GFX940-NEXT: s_nop 0 5787; GFX940-NEXT: v_mov_b32_e32 v0, v6 5788; GFX940-NEXT: v_mov_b32_e32 v1, v6 5789; GFX940-NEXT: v_mov_b32_e32 v3, v5 5790; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 5791; GFX940-NEXT: s_waitcnt vmcnt(0) 5792; GFX940-NEXT: s_setpc_b64 s[30:31] 5793 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5794 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5795 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4> 5796 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5797 ret void 5798} 5799 5800define void @v_shuffle_v4i32_v3i32__5_5_1_4(ptr addrspace(1) inreg %ptr) { 5801; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_4: 5802; GFX900: ; %bb.0: 5803; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5804; GFX900-NEXT: ;;#ASMSTART 5805; GFX900-NEXT: ; def v[1:3] 5806; GFX900-NEXT: ;;#ASMEND 5807; GFX900-NEXT: ;;#ASMSTART 5808; GFX900-NEXT: ; def v[3:5] 5809; GFX900-NEXT: ;;#ASMEND 5810; GFX900-NEXT: v_mov_b32_e32 v6, 0 5811; GFX900-NEXT: v_mov_b32_e32 v0, v5 5812; GFX900-NEXT: v_mov_b32_e32 v1, v5 5813; GFX900-NEXT: v_mov_b32_e32 v3, v4 5814; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 5815; GFX900-NEXT: s_waitcnt vmcnt(0) 5816; GFX900-NEXT: s_setpc_b64 s[30:31] 5817; 5818; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_4: 5819; GFX90A: ; %bb.0: 5820; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5821; GFX90A-NEXT: ;;#ASMSTART 5822; GFX90A-NEXT: ; def v[2:4] 5823; GFX90A-NEXT: ;;#ASMEND 5824; GFX90A-NEXT: v_mov_b32_e32 v7, 0 5825; GFX90A-NEXT: ;;#ASMSTART 5826; GFX90A-NEXT: ; def v[4:6] 5827; GFX90A-NEXT: ;;#ASMEND 5828; GFX90A-NEXT: v_mov_b32_e32 v0, v6 5829; GFX90A-NEXT: v_mov_b32_e32 v1, v6 5830; GFX90A-NEXT: v_mov_b32_e32 v2, v3 5831; GFX90A-NEXT: v_mov_b32_e32 v3, v5 5832; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 5833; GFX90A-NEXT: s_waitcnt vmcnt(0) 5834; GFX90A-NEXT: s_setpc_b64 s[30:31] 5835; 5836; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_4: 5837; GFX940: ; %bb.0: 5838; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5839; GFX940-NEXT: ;;#ASMSTART 5840; GFX940-NEXT: ; def v[2:4] 5841; GFX940-NEXT: ;;#ASMEND 5842; GFX940-NEXT: v_mov_b32_e32 v7, 0 5843; GFX940-NEXT: ;;#ASMSTART 5844; GFX940-NEXT: ; def v[4:6] 5845; GFX940-NEXT: ;;#ASMEND 5846; GFX940-NEXT: v_mov_b32_e32 v2, v3 5847; GFX940-NEXT: v_mov_b32_e32 v0, v6 5848; GFX940-NEXT: v_mov_b32_e32 v1, v6 5849; GFX940-NEXT: v_mov_b32_e32 v3, v5 5850; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 5851; GFX940-NEXT: s_waitcnt vmcnt(0) 5852; GFX940-NEXT: s_setpc_b64 s[30:31] 5853 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5854 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5855 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4> 5856 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5857 ret void 5858} 5859 5860define void @v_shuffle_v4i32_v3i32__5_5_2_4(ptr addrspace(1) inreg %ptr) { 5861; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_4: 5862; GFX900: ; %bb.0: 5863; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5864; GFX900-NEXT: ;;#ASMSTART 5865; GFX900-NEXT: ; def v[0:2] 5866; GFX900-NEXT: ;;#ASMEND 5867; GFX900-NEXT: ;;#ASMSTART 5868; GFX900-NEXT: ; def v[3:5] 5869; GFX900-NEXT: ;;#ASMEND 5870; GFX900-NEXT: v_mov_b32_e32 v6, 0 5871; GFX900-NEXT: v_mov_b32_e32 v0, v5 5872; GFX900-NEXT: v_mov_b32_e32 v1, v5 5873; GFX900-NEXT: v_mov_b32_e32 v3, v4 5874; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 5875; GFX900-NEXT: s_waitcnt vmcnt(0) 5876; GFX900-NEXT: s_setpc_b64 s[30:31] 5877; 5878; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_4: 5879; GFX90A: ; %bb.0: 5880; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5881; GFX90A-NEXT: ;;#ASMSTART 5882; GFX90A-NEXT: ; def v[0:2] 5883; GFX90A-NEXT: ;;#ASMEND 5884; GFX90A-NEXT: v_mov_b32_e32 v7, 0 5885; GFX90A-NEXT: ;;#ASMSTART 5886; GFX90A-NEXT: ; def v[4:6] 5887; GFX90A-NEXT: ;;#ASMEND 5888; GFX90A-NEXT: v_mov_b32_e32 v0, v6 5889; GFX90A-NEXT: v_mov_b32_e32 v1, v6 5890; GFX90A-NEXT: v_mov_b32_e32 v3, v5 5891; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 5892; GFX90A-NEXT: s_waitcnt vmcnt(0) 5893; GFX90A-NEXT: s_setpc_b64 s[30:31] 5894; 5895; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_4: 5896; GFX940: ; %bb.0: 5897; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5898; GFX940-NEXT: ;;#ASMSTART 5899; GFX940-NEXT: ; def v[0:2] 5900; GFX940-NEXT: ;;#ASMEND 5901; GFX940-NEXT: v_mov_b32_e32 v7, 0 5902; GFX940-NEXT: ;;#ASMSTART 5903; GFX940-NEXT: ; def v[4:6] 5904; GFX940-NEXT: ;;#ASMEND 5905; GFX940-NEXT: s_nop 0 5906; GFX940-NEXT: v_mov_b32_e32 v0, v6 5907; GFX940-NEXT: v_mov_b32_e32 v1, v6 5908; GFX940-NEXT: v_mov_b32_e32 v3, v5 5909; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 5910; GFX940-NEXT: s_waitcnt vmcnt(0) 5911; GFX940-NEXT: s_setpc_b64 s[30:31] 5912 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5913 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5914 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4> 5915 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5916 ret void 5917} 5918 5919define void @v_shuffle_v4i32_v3i32__5_5_3_4(ptr addrspace(1) inreg %ptr) { 5920; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_4: 5921; GFX900: ; %bb.0: 5922; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5923; GFX900-NEXT: v_mov_b32_e32 v5, 0 5924; GFX900-NEXT: ;;#ASMSTART 5925; GFX900-NEXT: ; def v[2:4] 5926; GFX900-NEXT: ;;#ASMEND 5927; GFX900-NEXT: v_mov_b32_e32 v0, v4 5928; GFX900-NEXT: v_mov_b32_e32 v1, v4 5929; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5930; GFX900-NEXT: s_waitcnt vmcnt(0) 5931; GFX900-NEXT: s_setpc_b64 s[30:31] 5932; 5933; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_4: 5934; GFX90A: ; %bb.0: 5935; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5936; GFX90A-NEXT: v_mov_b32_e32 v5, 0 5937; GFX90A-NEXT: ;;#ASMSTART 5938; GFX90A-NEXT: ; def v[2:4] 5939; GFX90A-NEXT: ;;#ASMEND 5940; GFX90A-NEXT: v_mov_b32_e32 v0, v4 5941; GFX90A-NEXT: v_mov_b32_e32 v1, v4 5942; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 5943; GFX90A-NEXT: s_waitcnt vmcnt(0) 5944; GFX90A-NEXT: s_setpc_b64 s[30:31] 5945; 5946; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_4: 5947; GFX940: ; %bb.0: 5948; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5949; GFX940-NEXT: v_mov_b32_e32 v5, 0 5950; GFX940-NEXT: ;;#ASMSTART 5951; GFX940-NEXT: ; def v[2:4] 5952; GFX940-NEXT: ;;#ASMEND 5953; GFX940-NEXT: s_nop 0 5954; GFX940-NEXT: v_mov_b32_e32 v0, v4 5955; GFX940-NEXT: v_mov_b32_e32 v1, v4 5956; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 5957; GFX940-NEXT: s_waitcnt vmcnt(0) 5958; GFX940-NEXT: s_setpc_b64 s[30:31] 5959 %vec0 = call <3 x i32> asm "; def $0", "=v"() 5960 %vec1 = call <3 x i32> asm "; def $0", "=v"() 5961 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4> 5962 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5963 ret void 5964} 5965 5966define void @v_shuffle_v4i32_v3i32__u_5_5_5(ptr addrspace(1) inreg %ptr) { 5967; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_5_5_5: 5968; GFX900: ; %bb.0: 5969; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5970; GFX900-NEXT: ;;#ASMSTART 5971; GFX900-NEXT: ; def v[0:2] 5972; GFX900-NEXT: ;;#ASMEND 5973; GFX900-NEXT: v_mov_b32_e32 v4, 0 5974; GFX900-NEXT: v_mov_b32_e32 v1, v2 5975; GFX900-NEXT: v_mov_b32_e32 v3, v2 5976; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5977; GFX900-NEXT: s_waitcnt vmcnt(0) 5978; GFX900-NEXT: s_setpc_b64 s[30:31] 5979; 5980; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_5_5_5: 5981; GFX90A: ; %bb.0: 5982; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5983; GFX90A-NEXT: ;;#ASMSTART 5984; GFX90A-NEXT: ; def v[0:2] 5985; GFX90A-NEXT: ;;#ASMEND 5986; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5987; GFX90A-NEXT: v_mov_b32_e32 v1, v2 5988; GFX90A-NEXT: v_mov_b32_e32 v3, v2 5989; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 5990; GFX90A-NEXT: s_waitcnt vmcnt(0) 5991; GFX90A-NEXT: s_setpc_b64 s[30:31] 5992; 5993; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_5_5_5: 5994; GFX940: ; %bb.0: 5995; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5996; GFX940-NEXT: ;;#ASMSTART 5997; GFX940-NEXT: ; def v[0:2] 5998; GFX940-NEXT: ;;#ASMEND 5999; GFX940-NEXT: v_mov_b32_e32 v4, 0 6000; GFX940-NEXT: v_mov_b32_e32 v1, v2 6001; GFX940-NEXT: v_mov_b32_e32 v3, v2 6002; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 6003; GFX940-NEXT: s_waitcnt vmcnt(0) 6004; GFX940-NEXT: s_setpc_b64 s[30:31] 6005 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6006 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6007 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5> 6008 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6009 ret void 6010} 6011 6012define void @v_shuffle_v4i32_v3i32__0_5_5_5(ptr addrspace(1) inreg %ptr) { 6013; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_5_5_5: 6014; GFX900: ; %bb.0: 6015; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6016; GFX900-NEXT: ;;#ASMSTART 6017; GFX900-NEXT: ; def v[0:2] 6018; GFX900-NEXT: ;;#ASMEND 6019; GFX900-NEXT: ;;#ASMSTART 6020; GFX900-NEXT: ; def v[1:3] 6021; GFX900-NEXT: ;;#ASMEND 6022; GFX900-NEXT: v_mov_b32_e32 v4, 0 6023; GFX900-NEXT: v_mov_b32_e32 v1, v3 6024; GFX900-NEXT: v_mov_b32_e32 v2, v3 6025; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6026; GFX900-NEXT: s_waitcnt vmcnt(0) 6027; GFX900-NEXT: s_setpc_b64 s[30:31] 6028; 6029; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_5_5_5: 6030; GFX90A: ; %bb.0: 6031; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6032; GFX90A-NEXT: ;;#ASMSTART 6033; GFX90A-NEXT: ; def v[0:2] 6034; GFX90A-NEXT: ;;#ASMEND 6035; GFX90A-NEXT: ;;#ASMSTART 6036; GFX90A-NEXT: ; def v[2:4] 6037; GFX90A-NEXT: ;;#ASMEND 6038; GFX90A-NEXT: v_mov_b32_e32 v5, 0 6039; GFX90A-NEXT: v_mov_b32_e32 v1, v4 6040; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6041; GFX90A-NEXT: v_mov_b32_e32 v3, v4 6042; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6043; GFX90A-NEXT: s_waitcnt vmcnt(0) 6044; GFX90A-NEXT: s_setpc_b64 s[30:31] 6045; 6046; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_5_5_5: 6047; GFX940: ; %bb.0: 6048; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6049; GFX940-NEXT: ;;#ASMSTART 6050; GFX940-NEXT: ; def v[0:2] 6051; GFX940-NEXT: ;;#ASMEND 6052; GFX940-NEXT: v_mov_b32_e32 v5, 0 6053; GFX940-NEXT: ;;#ASMSTART 6054; GFX940-NEXT: ; def v[2:4] 6055; GFX940-NEXT: ;;#ASMEND 6056; GFX940-NEXT: s_nop 0 6057; GFX940-NEXT: v_mov_b32_e32 v1, v4 6058; GFX940-NEXT: v_mov_b32_e32 v2, v4 6059; GFX940-NEXT: v_mov_b32_e32 v3, v4 6060; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 6061; GFX940-NEXT: s_waitcnt vmcnt(0) 6062; GFX940-NEXT: s_setpc_b64 s[30:31] 6063 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6064 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6065 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5> 6066 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6067 ret void 6068} 6069 6070define void @v_shuffle_v4i32_v3i32__1_5_5_5(ptr addrspace(1) inreg %ptr) { 6071; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_5_5_5: 6072; GFX900: ; %bb.0: 6073; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6074; GFX900-NEXT: ;;#ASMSTART 6075; GFX900-NEXT: ; def v[2:4] 6076; GFX900-NEXT: ;;#ASMEND 6077; GFX900-NEXT: ;;#ASMSTART 6078; GFX900-NEXT: ; def v[0:2] 6079; GFX900-NEXT: ;;#ASMEND 6080; GFX900-NEXT: v_mov_b32_e32 v5, 0 6081; GFX900-NEXT: v_mov_b32_e32 v0, v3 6082; GFX900-NEXT: v_mov_b32_e32 v1, v2 6083; GFX900-NEXT: v_mov_b32_e32 v3, v2 6084; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6085; GFX900-NEXT: s_waitcnt vmcnt(0) 6086; GFX900-NEXT: s_setpc_b64 s[30:31] 6087; 6088; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_5_5_5: 6089; GFX90A: ; %bb.0: 6090; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6091; GFX90A-NEXT: ;;#ASMSTART 6092; GFX90A-NEXT: ; def v[2:4] 6093; GFX90A-NEXT: ;;#ASMEND 6094; GFX90A-NEXT: ;;#ASMSTART 6095; GFX90A-NEXT: ; def v[0:2] 6096; GFX90A-NEXT: ;;#ASMEND 6097; GFX90A-NEXT: v_mov_b32_e32 v5, 0 6098; GFX90A-NEXT: v_mov_b32_e32 v0, v3 6099; GFX90A-NEXT: v_mov_b32_e32 v1, v2 6100; GFX90A-NEXT: v_mov_b32_e32 v3, v2 6101; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6102; GFX90A-NEXT: s_waitcnt vmcnt(0) 6103; GFX90A-NEXT: s_setpc_b64 s[30:31] 6104; 6105; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_5_5_5: 6106; GFX940: ; %bb.0: 6107; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6108; GFX940-NEXT: ;;#ASMSTART 6109; GFX940-NEXT: ; def v[2:4] 6110; GFX940-NEXT: ;;#ASMEND 6111; GFX940-NEXT: v_mov_b32_e32 v5, 0 6112; GFX940-NEXT: ;;#ASMSTART 6113; GFX940-NEXT: ; def v[0:2] 6114; GFX940-NEXT: ;;#ASMEND 6115; GFX940-NEXT: s_nop 0 6116; GFX940-NEXT: v_mov_b32_e32 v0, v3 6117; GFX940-NEXT: v_mov_b32_e32 v1, v2 6118; GFX940-NEXT: v_mov_b32_e32 v3, v2 6119; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 6120; GFX940-NEXT: s_waitcnt vmcnt(0) 6121; GFX940-NEXT: s_setpc_b64 s[30:31] 6122 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6123 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6124 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5> 6125 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6126 ret void 6127} 6128 6129define void @v_shuffle_v4i32_v3i32__2_5_5_5(ptr addrspace(1) inreg %ptr) { 6130; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_5_5_5: 6131; GFX900: ; %bb.0: 6132; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6133; GFX900-NEXT: ;;#ASMSTART 6134; GFX900-NEXT: ; def v[1:3] 6135; GFX900-NEXT: ;;#ASMEND 6136; GFX900-NEXT: ;;#ASMSTART 6137; GFX900-NEXT: ; def v[0:2] 6138; GFX900-NEXT: ;;#ASMEND 6139; GFX900-NEXT: v_mov_b32_e32 v4, 0 6140; GFX900-NEXT: v_mov_b32_e32 v0, v3 6141; GFX900-NEXT: v_mov_b32_e32 v1, v2 6142; GFX900-NEXT: v_mov_b32_e32 v3, v2 6143; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6144; GFX900-NEXT: s_waitcnt vmcnt(0) 6145; GFX900-NEXT: s_setpc_b64 s[30:31] 6146; 6147; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_5_5_5: 6148; GFX90A: ; %bb.0: 6149; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6150; GFX90A-NEXT: ;;#ASMSTART 6151; GFX90A-NEXT: ; def v[2:4] 6152; GFX90A-NEXT: ;;#ASMEND 6153; GFX90A-NEXT: ;;#ASMSTART 6154; GFX90A-NEXT: ; def v[0:2] 6155; GFX90A-NEXT: ;;#ASMEND 6156; GFX90A-NEXT: v_mov_b32_e32 v5, 0 6157; GFX90A-NEXT: v_mov_b32_e32 v0, v4 6158; GFX90A-NEXT: v_mov_b32_e32 v1, v2 6159; GFX90A-NEXT: v_mov_b32_e32 v3, v2 6160; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6161; GFX90A-NEXT: s_waitcnt vmcnt(0) 6162; GFX90A-NEXT: s_setpc_b64 s[30:31] 6163; 6164; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_5_5_5: 6165; GFX940: ; %bb.0: 6166; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6167; GFX940-NEXT: ;;#ASMSTART 6168; GFX940-NEXT: ; def v[2:4] 6169; GFX940-NEXT: ;;#ASMEND 6170; GFX940-NEXT: v_mov_b32_e32 v5, 0 6171; GFX940-NEXT: ;;#ASMSTART 6172; GFX940-NEXT: ; def v[0:2] 6173; GFX940-NEXT: ;;#ASMEND 6174; GFX940-NEXT: s_nop 0 6175; GFX940-NEXT: v_mov_b32_e32 v0, v4 6176; GFX940-NEXT: v_mov_b32_e32 v1, v2 6177; GFX940-NEXT: v_mov_b32_e32 v3, v2 6178; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 6179; GFX940-NEXT: s_waitcnt vmcnt(0) 6180; GFX940-NEXT: s_setpc_b64 s[30:31] 6181 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6182 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6183 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5> 6184 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6185 ret void 6186} 6187 6188define void @v_shuffle_v4i32_v3i32__3_5_5_5(ptr addrspace(1) inreg %ptr) { 6189; GFX900-LABEL: v_shuffle_v4i32_v3i32__3_5_5_5: 6190; GFX900: ; %bb.0: 6191; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6192; GFX900-NEXT: ;;#ASMSTART 6193; GFX900-NEXT: ; def v[0:2] 6194; GFX900-NEXT: ;;#ASMEND 6195; GFX900-NEXT: v_mov_b32_e32 v4, 0 6196; GFX900-NEXT: v_mov_b32_e32 v1, v2 6197; GFX900-NEXT: v_mov_b32_e32 v3, v2 6198; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6199; GFX900-NEXT: s_waitcnt vmcnt(0) 6200; GFX900-NEXT: s_setpc_b64 s[30:31] 6201; 6202; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_5_5_5: 6203; GFX90A: ; %bb.0: 6204; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6205; GFX90A-NEXT: ;;#ASMSTART 6206; GFX90A-NEXT: ; def v[0:2] 6207; GFX90A-NEXT: ;;#ASMEND 6208; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6209; GFX90A-NEXT: v_mov_b32_e32 v1, v2 6210; GFX90A-NEXT: v_mov_b32_e32 v3, v2 6211; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6212; GFX90A-NEXT: s_waitcnt vmcnt(0) 6213; GFX90A-NEXT: s_setpc_b64 s[30:31] 6214; 6215; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_5_5_5: 6216; GFX940: ; %bb.0: 6217; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6218; GFX940-NEXT: ;;#ASMSTART 6219; GFX940-NEXT: ; def v[0:2] 6220; GFX940-NEXT: ;;#ASMEND 6221; GFX940-NEXT: v_mov_b32_e32 v4, 0 6222; GFX940-NEXT: v_mov_b32_e32 v1, v2 6223; GFX940-NEXT: v_mov_b32_e32 v3, v2 6224; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 6225; GFX940-NEXT: s_waitcnt vmcnt(0) 6226; GFX940-NEXT: s_setpc_b64 s[30:31] 6227 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6228 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6229 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5> 6230 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6231 ret void 6232} 6233 6234define void @v_shuffle_v4i32_v3i32__4_5_5_5(ptr addrspace(1) inreg %ptr) { 6235; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_5_5_5: 6236; GFX900: ; %bb.0: 6237; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6238; GFX900-NEXT: ;;#ASMSTART 6239; GFX900-NEXT: ; def v[0:2] 6240; GFX900-NEXT: ;;#ASMEND 6241; GFX900-NEXT: v_mov_b32_e32 v4, 0 6242; GFX900-NEXT: v_mov_b32_e32 v0, v1 6243; GFX900-NEXT: v_mov_b32_e32 v1, v2 6244; GFX900-NEXT: v_mov_b32_e32 v3, v2 6245; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6246; GFX900-NEXT: s_waitcnt vmcnt(0) 6247; GFX900-NEXT: s_setpc_b64 s[30:31] 6248; 6249; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_5_5_5: 6250; GFX90A: ; %bb.0: 6251; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6252; GFX90A-NEXT: ;;#ASMSTART 6253; GFX90A-NEXT: ; def v[0:2] 6254; GFX90A-NEXT: ;;#ASMEND 6255; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6256; GFX90A-NEXT: v_mov_b32_e32 v0, v1 6257; GFX90A-NEXT: v_mov_b32_e32 v1, v2 6258; GFX90A-NEXT: v_mov_b32_e32 v3, v2 6259; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6260; GFX90A-NEXT: s_waitcnt vmcnt(0) 6261; GFX90A-NEXT: s_setpc_b64 s[30:31] 6262; 6263; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_5_5_5: 6264; GFX940: ; %bb.0: 6265; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6266; GFX940-NEXT: ;;#ASMSTART 6267; GFX940-NEXT: ; def v[0:2] 6268; GFX940-NEXT: ;;#ASMEND 6269; GFX940-NEXT: v_mov_b32_e32 v4, 0 6270; GFX940-NEXT: v_mov_b32_e32 v0, v1 6271; GFX940-NEXT: v_mov_b32_e32 v1, v2 6272; GFX940-NEXT: v_mov_b32_e32 v3, v2 6273; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 6274; GFX940-NEXT: s_waitcnt vmcnt(0) 6275; GFX940-NEXT: s_setpc_b64 s[30:31] 6276 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6277 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6278 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5> 6279 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6280 ret void 6281} 6282 6283define void @v_shuffle_v4i32_v3i32__5_u_5_5(ptr addrspace(1) inreg %ptr) { 6284; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_5_5: 6285; GFX900: ; %bb.0: 6286; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6287; GFX900-NEXT: ;;#ASMSTART 6288; GFX900-NEXT: ; def v[0:2] 6289; GFX900-NEXT: ;;#ASMEND 6290; GFX900-NEXT: v_mov_b32_e32 v4, 0 6291; GFX900-NEXT: v_mov_b32_e32 v0, v2 6292; GFX900-NEXT: v_mov_b32_e32 v3, v2 6293; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6294; GFX900-NEXT: s_waitcnt vmcnt(0) 6295; GFX900-NEXT: s_setpc_b64 s[30:31] 6296; 6297; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_5_5: 6298; GFX90A: ; %bb.0: 6299; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6300; GFX90A-NEXT: ;;#ASMSTART 6301; GFX90A-NEXT: ; def v[0:2] 6302; GFX90A-NEXT: ;;#ASMEND 6303; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6304; GFX90A-NEXT: v_mov_b32_e32 v0, v2 6305; GFX90A-NEXT: v_mov_b32_e32 v3, v2 6306; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6307; GFX90A-NEXT: s_waitcnt vmcnt(0) 6308; GFX90A-NEXT: s_setpc_b64 s[30:31] 6309; 6310; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_5_5: 6311; GFX940: ; %bb.0: 6312; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6313; GFX940-NEXT: ;;#ASMSTART 6314; GFX940-NEXT: ; def v[0:2] 6315; GFX940-NEXT: ;;#ASMEND 6316; GFX940-NEXT: v_mov_b32_e32 v4, 0 6317; GFX940-NEXT: v_mov_b32_e32 v0, v2 6318; GFX940-NEXT: v_mov_b32_e32 v3, v2 6319; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 6320; GFX940-NEXT: s_waitcnt vmcnt(0) 6321; GFX940-NEXT: s_setpc_b64 s[30:31] 6322 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6323 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6324 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5> 6325 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6326 ret void 6327} 6328 6329define void @v_shuffle_v4i32_v3i32__5_0_5_5(ptr addrspace(1) inreg %ptr) { 6330; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_5_5: 6331; GFX900: ; %bb.0: 6332; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6333; GFX900-NEXT: ;;#ASMSTART 6334; GFX900-NEXT: ; def v[1:3] 6335; GFX900-NEXT: ;;#ASMEND 6336; GFX900-NEXT: ;;#ASMSTART 6337; GFX900-NEXT: ; def v[2:4] 6338; GFX900-NEXT: ;;#ASMEND 6339; GFX900-NEXT: v_mov_b32_e32 v5, 0 6340; GFX900-NEXT: v_mov_b32_e32 v0, v4 6341; GFX900-NEXT: v_mov_b32_e32 v2, v4 6342; GFX900-NEXT: v_mov_b32_e32 v3, v4 6343; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6344; GFX900-NEXT: s_waitcnt vmcnt(0) 6345; GFX900-NEXT: s_setpc_b64 s[30:31] 6346; 6347; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_5_5: 6348; GFX90A: ; %bb.0: 6349; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6350; GFX90A-NEXT: ;;#ASMSTART 6351; GFX90A-NEXT: ; def v[0:2] 6352; GFX90A-NEXT: ;;#ASMEND 6353; GFX90A-NEXT: v_mov_b32_e32 v7, 0 6354; GFX90A-NEXT: ;;#ASMSTART 6355; GFX90A-NEXT: ; def v[4:6] 6356; GFX90A-NEXT: ;;#ASMEND 6357; GFX90A-NEXT: v_mov_b32_e32 v0, v2 6358; GFX90A-NEXT: v_mov_b32_e32 v1, v4 6359; GFX90A-NEXT: v_mov_b32_e32 v3, v2 6360; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 6361; GFX90A-NEXT: s_waitcnt vmcnt(0) 6362; GFX90A-NEXT: s_setpc_b64 s[30:31] 6363; 6364; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_5_5: 6365; GFX940: ; %bb.0: 6366; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6367; GFX940-NEXT: ;;#ASMSTART 6368; GFX940-NEXT: ; def v[0:2] 6369; GFX940-NEXT: ;;#ASMEND 6370; GFX940-NEXT: v_mov_b32_e32 v7, 0 6371; GFX940-NEXT: ;;#ASMSTART 6372; GFX940-NEXT: ; def v[4:6] 6373; GFX940-NEXT: ;;#ASMEND 6374; GFX940-NEXT: v_mov_b32_e32 v0, v2 6375; GFX940-NEXT: v_mov_b32_e32 v1, v4 6376; GFX940-NEXT: v_mov_b32_e32 v3, v2 6377; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 6378; GFX940-NEXT: s_waitcnt vmcnt(0) 6379; GFX940-NEXT: s_setpc_b64 s[30:31] 6380 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6381 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6382 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5> 6383 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6384 ret void 6385} 6386 6387define void @v_shuffle_v4i32_v3i32__5_1_5_5(ptr addrspace(1) inreg %ptr) { 6388; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_5_5: 6389; GFX900: ; %bb.0: 6390; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6391; GFX900-NEXT: ;;#ASMSTART 6392; GFX900-NEXT: ; def v[0:2] 6393; GFX900-NEXT: ;;#ASMEND 6394; GFX900-NEXT: ;;#ASMSTART 6395; GFX900-NEXT: ; def v[2:4] 6396; GFX900-NEXT: ;;#ASMEND 6397; GFX900-NEXT: v_mov_b32_e32 v5, 0 6398; GFX900-NEXT: v_mov_b32_e32 v0, v4 6399; GFX900-NEXT: v_mov_b32_e32 v2, v4 6400; GFX900-NEXT: v_mov_b32_e32 v3, v4 6401; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6402; GFX900-NEXT: s_waitcnt vmcnt(0) 6403; GFX900-NEXT: s_setpc_b64 s[30:31] 6404; 6405; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_5_5: 6406; GFX90A: ; %bb.0: 6407; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6408; GFX90A-NEXT: ;;#ASMSTART 6409; GFX90A-NEXT: ; def v[0:2] 6410; GFX90A-NEXT: ;;#ASMEND 6411; GFX90A-NEXT: ;;#ASMSTART 6412; GFX90A-NEXT: ; def v[2:4] 6413; GFX90A-NEXT: ;;#ASMEND 6414; GFX90A-NEXT: v_mov_b32_e32 v5, 0 6415; GFX90A-NEXT: v_mov_b32_e32 v0, v4 6416; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6417; GFX90A-NEXT: v_mov_b32_e32 v3, v4 6418; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6419; GFX90A-NEXT: s_waitcnt vmcnt(0) 6420; GFX90A-NEXT: s_setpc_b64 s[30:31] 6421; 6422; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_5_5: 6423; GFX940: ; %bb.0: 6424; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6425; GFX940-NEXT: ;;#ASMSTART 6426; GFX940-NEXT: ; def v[0:2] 6427; GFX940-NEXT: ;;#ASMEND 6428; GFX940-NEXT: v_mov_b32_e32 v5, 0 6429; GFX940-NEXT: ;;#ASMSTART 6430; GFX940-NEXT: ; def v[2:4] 6431; GFX940-NEXT: ;;#ASMEND 6432; GFX940-NEXT: s_nop 0 6433; GFX940-NEXT: v_mov_b32_e32 v0, v4 6434; GFX940-NEXT: v_mov_b32_e32 v2, v4 6435; GFX940-NEXT: v_mov_b32_e32 v3, v4 6436; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 6437; GFX940-NEXT: s_waitcnt vmcnt(0) 6438; GFX940-NEXT: s_setpc_b64 s[30:31] 6439 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6440 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6441 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5> 6442 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6443 ret void 6444} 6445 6446define void @v_shuffle_v4i32_v3i32__5_2_5_5(ptr addrspace(1) inreg %ptr) { 6447; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_5_5: 6448; GFX900: ; %bb.0: 6449; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6450; GFX900-NEXT: ;;#ASMSTART 6451; GFX900-NEXT: ; def v[1:3] 6452; GFX900-NEXT: ;;#ASMEND 6453; GFX900-NEXT: ;;#ASMSTART 6454; GFX900-NEXT: ; def v[0:2] 6455; GFX900-NEXT: ;;#ASMEND 6456; GFX900-NEXT: v_mov_b32_e32 v4, 0 6457; GFX900-NEXT: v_mov_b32_e32 v0, v2 6458; GFX900-NEXT: v_mov_b32_e32 v1, v3 6459; GFX900-NEXT: v_mov_b32_e32 v3, v2 6460; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6461; GFX900-NEXT: s_waitcnt vmcnt(0) 6462; GFX900-NEXT: s_setpc_b64 s[30:31] 6463; 6464; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_5_5: 6465; GFX90A: ; %bb.0: 6466; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6467; GFX90A-NEXT: ;;#ASMSTART 6468; GFX90A-NEXT: ; def v[2:4] 6469; GFX90A-NEXT: ;;#ASMEND 6470; GFX90A-NEXT: ;;#ASMSTART 6471; GFX90A-NEXT: ; def v[0:2] 6472; GFX90A-NEXT: ;;#ASMEND 6473; GFX90A-NEXT: v_mov_b32_e32 v5, 0 6474; GFX90A-NEXT: v_mov_b32_e32 v0, v2 6475; GFX90A-NEXT: v_mov_b32_e32 v1, v4 6476; GFX90A-NEXT: v_mov_b32_e32 v3, v2 6477; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6478; GFX90A-NEXT: s_waitcnt vmcnt(0) 6479; GFX90A-NEXT: s_setpc_b64 s[30:31] 6480; 6481; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_5_5: 6482; GFX940: ; %bb.0: 6483; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6484; GFX940-NEXT: ;;#ASMSTART 6485; GFX940-NEXT: ; def v[2:4] 6486; GFX940-NEXT: ;;#ASMEND 6487; GFX940-NEXT: v_mov_b32_e32 v5, 0 6488; GFX940-NEXT: ;;#ASMSTART 6489; GFX940-NEXT: ; def v[0:2] 6490; GFX940-NEXT: ;;#ASMEND 6491; GFX940-NEXT: s_nop 0 6492; GFX940-NEXT: v_mov_b32_e32 v0, v2 6493; GFX940-NEXT: v_mov_b32_e32 v1, v4 6494; GFX940-NEXT: v_mov_b32_e32 v3, v2 6495; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 6496; GFX940-NEXT: s_waitcnt vmcnt(0) 6497; GFX940-NEXT: s_setpc_b64 s[30:31] 6498 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6499 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6500 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5> 6501 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6502 ret void 6503} 6504 6505define void @v_shuffle_v4i32_v3i32__5_3_5_5(ptr addrspace(1) inreg %ptr) { 6506; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_5_5: 6507; GFX900: ; %bb.0: 6508; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6509; GFX900-NEXT: ;;#ASMSTART 6510; GFX900-NEXT: ; def v[1:3] 6511; GFX900-NEXT: ;;#ASMEND 6512; GFX900-NEXT: v_mov_b32_e32 v4, 0 6513; GFX900-NEXT: v_mov_b32_e32 v0, v3 6514; GFX900-NEXT: v_mov_b32_e32 v2, v3 6515; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6516; GFX900-NEXT: s_waitcnt vmcnt(0) 6517; GFX900-NEXT: s_setpc_b64 s[30:31] 6518; 6519; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_5_5: 6520; GFX90A: ; %bb.0: 6521; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6522; GFX90A-NEXT: ;;#ASMSTART 6523; GFX90A-NEXT: ; def v[2:4] 6524; GFX90A-NEXT: ;;#ASMEND 6525; GFX90A-NEXT: v_mov_b32_e32 v5, 0 6526; GFX90A-NEXT: v_mov_b32_e32 v0, v4 6527; GFX90A-NEXT: v_mov_b32_e32 v1, v2 6528; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6529; GFX90A-NEXT: v_mov_b32_e32 v3, v4 6530; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6531; GFX90A-NEXT: s_waitcnt vmcnt(0) 6532; GFX90A-NEXT: s_setpc_b64 s[30:31] 6533; 6534; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_5_5: 6535; GFX940: ; %bb.0: 6536; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6537; GFX940-NEXT: ;;#ASMSTART 6538; GFX940-NEXT: ; def v[2:4] 6539; GFX940-NEXT: ;;#ASMEND 6540; GFX940-NEXT: v_mov_b32_e32 v5, 0 6541; GFX940-NEXT: v_mov_b32_e32 v0, v4 6542; GFX940-NEXT: v_mov_b32_e32 v1, v2 6543; GFX940-NEXT: v_mov_b32_e32 v2, v4 6544; GFX940-NEXT: v_mov_b32_e32 v3, v4 6545; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 6546; GFX940-NEXT: s_waitcnt vmcnt(0) 6547; GFX940-NEXT: s_setpc_b64 s[30:31] 6548 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6549 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6550 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5> 6551 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6552 ret void 6553} 6554 6555define void @v_shuffle_v4i32_v3i32__5_4_5_5(ptr addrspace(1) inreg %ptr) { 6556; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_5_5: 6557; GFX900: ; %bb.0: 6558; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6559; GFX900-NEXT: ;;#ASMSTART 6560; GFX900-NEXT: ; def v[0:2] 6561; GFX900-NEXT: ;;#ASMEND 6562; GFX900-NEXT: v_mov_b32_e32 v4, 0 6563; GFX900-NEXT: v_mov_b32_e32 v0, v2 6564; GFX900-NEXT: v_mov_b32_e32 v3, v2 6565; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6566; GFX900-NEXT: s_waitcnt vmcnt(0) 6567; GFX900-NEXT: s_setpc_b64 s[30:31] 6568; 6569; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_5_5: 6570; GFX90A: ; %bb.0: 6571; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6572; GFX90A-NEXT: ;;#ASMSTART 6573; GFX90A-NEXT: ; def v[0:2] 6574; GFX90A-NEXT: ;;#ASMEND 6575; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6576; GFX90A-NEXT: v_mov_b32_e32 v0, v2 6577; GFX90A-NEXT: v_mov_b32_e32 v3, v2 6578; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6579; GFX90A-NEXT: s_waitcnt vmcnt(0) 6580; GFX90A-NEXT: s_setpc_b64 s[30:31] 6581; 6582; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_5_5: 6583; GFX940: ; %bb.0: 6584; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6585; GFX940-NEXT: ;;#ASMSTART 6586; GFX940-NEXT: ; def v[0:2] 6587; GFX940-NEXT: ;;#ASMEND 6588; GFX940-NEXT: v_mov_b32_e32 v4, 0 6589; GFX940-NEXT: v_mov_b32_e32 v0, v2 6590; GFX940-NEXT: v_mov_b32_e32 v3, v2 6591; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 6592; GFX940-NEXT: s_waitcnt vmcnt(0) 6593; GFX940-NEXT: s_setpc_b64 s[30:31] 6594 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6595 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6596 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5> 6597 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6598 ret void 6599} 6600 6601define void @v_shuffle_v4i32_v3i32__5_5_u_5(ptr addrspace(1) inreg %ptr) { 6602; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_5: 6603; GFX900: ; %bb.0: 6604; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6605; GFX900-NEXT: ;;#ASMSTART 6606; GFX900-NEXT: ; def v[1:3] 6607; GFX900-NEXT: ;;#ASMEND 6608; GFX900-NEXT: v_mov_b32_e32 v4, 0 6609; GFX900-NEXT: v_mov_b32_e32 v0, v3 6610; GFX900-NEXT: v_mov_b32_e32 v1, v3 6611; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6612; GFX900-NEXT: s_waitcnt vmcnt(0) 6613; GFX900-NEXT: s_setpc_b64 s[30:31] 6614; 6615; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_5: 6616; GFX90A: ; %bb.0: 6617; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6618; GFX90A-NEXT: ;;#ASMSTART 6619; GFX90A-NEXT: ; def v[0:2] 6620; GFX90A-NEXT: ;;#ASMEND 6621; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6622; GFX90A-NEXT: v_mov_b32_e32 v0, v2 6623; GFX90A-NEXT: v_mov_b32_e32 v1, v2 6624; GFX90A-NEXT: v_mov_b32_e32 v3, v2 6625; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6626; GFX90A-NEXT: s_waitcnt vmcnt(0) 6627; GFX90A-NEXT: s_setpc_b64 s[30:31] 6628; 6629; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_5: 6630; GFX940: ; %bb.0: 6631; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6632; GFX940-NEXT: ;;#ASMSTART 6633; GFX940-NEXT: ; def v[0:2] 6634; GFX940-NEXT: ;;#ASMEND 6635; GFX940-NEXT: v_mov_b32_e32 v4, 0 6636; GFX940-NEXT: v_mov_b32_e32 v0, v2 6637; GFX940-NEXT: v_mov_b32_e32 v1, v2 6638; GFX940-NEXT: v_mov_b32_e32 v3, v2 6639; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 6640; GFX940-NEXT: s_waitcnt vmcnt(0) 6641; GFX940-NEXT: s_setpc_b64 s[30:31] 6642 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6643 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6644 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5> 6645 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6646 ret void 6647} 6648 6649define void @v_shuffle_v4i32_v3i32__5_5_0_5(ptr addrspace(1) inreg %ptr) { 6650; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_5: 6651; GFX900: ; %bb.0: 6652; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6653; GFX900-NEXT: ;;#ASMSTART 6654; GFX900-NEXT: ; def v[1:3] 6655; GFX900-NEXT: ;;#ASMEND 6656; GFX900-NEXT: v_mov_b32_e32 v7, 0 6657; GFX900-NEXT: ;;#ASMSTART 6658; GFX900-NEXT: ; def v[4:6] 6659; GFX900-NEXT: ;;#ASMEND 6660; GFX900-NEXT: v_mov_b32_e32 v0, v3 6661; GFX900-NEXT: v_mov_b32_e32 v1, v3 6662; GFX900-NEXT: v_mov_b32_e32 v2, v4 6663; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 6664; GFX900-NEXT: s_waitcnt vmcnt(0) 6665; GFX900-NEXT: s_setpc_b64 s[30:31] 6666; 6667; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_5: 6668; GFX90A: ; %bb.0: 6669; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6670; GFX90A-NEXT: ;;#ASMSTART 6671; GFX90A-NEXT: ; def v[2:4] 6672; GFX90A-NEXT: ;;#ASMEND 6673; GFX90A-NEXT: v_mov_b32_e32 v7, 0 6674; GFX90A-NEXT: ;;#ASMSTART 6675; GFX90A-NEXT: ; def v[4:6] 6676; GFX90A-NEXT: ;;#ASMEND 6677; GFX90A-NEXT: v_mov_b32_e32 v0, v6 6678; GFX90A-NEXT: v_mov_b32_e32 v1, v6 6679; GFX90A-NEXT: v_mov_b32_e32 v3, v6 6680; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 6681; GFX90A-NEXT: s_waitcnt vmcnt(0) 6682; GFX90A-NEXT: s_setpc_b64 s[30:31] 6683; 6684; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_5: 6685; GFX940: ; %bb.0: 6686; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6687; GFX940-NEXT: ;;#ASMSTART 6688; GFX940-NEXT: ; def v[2:4] 6689; GFX940-NEXT: ;;#ASMEND 6690; GFX940-NEXT: v_mov_b32_e32 v7, 0 6691; GFX940-NEXT: ;;#ASMSTART 6692; GFX940-NEXT: ; def v[4:6] 6693; GFX940-NEXT: ;;#ASMEND 6694; GFX940-NEXT: s_nop 0 6695; GFX940-NEXT: v_mov_b32_e32 v0, v6 6696; GFX940-NEXT: v_mov_b32_e32 v1, v6 6697; GFX940-NEXT: v_mov_b32_e32 v3, v6 6698; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 6699; GFX940-NEXT: s_waitcnt vmcnt(0) 6700; GFX940-NEXT: s_setpc_b64 s[30:31] 6701 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6702 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6703 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5> 6704 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6705 ret void 6706} 6707 6708define void @v_shuffle_v4i32_v3i32__5_5_1_5(ptr addrspace(1) inreg %ptr) { 6709; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_5: 6710; GFX900: ; %bb.0: 6711; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6712; GFX900-NEXT: ;;#ASMSTART 6713; GFX900-NEXT: ; def v[1:3] 6714; GFX900-NEXT: ;;#ASMEND 6715; GFX900-NEXT: ;;#ASMSTART 6716; GFX900-NEXT: ; def v[3:5] 6717; GFX900-NEXT: ;;#ASMEND 6718; GFX900-NEXT: v_mov_b32_e32 v6, 0 6719; GFX900-NEXT: v_mov_b32_e32 v0, v5 6720; GFX900-NEXT: v_mov_b32_e32 v1, v5 6721; GFX900-NEXT: v_mov_b32_e32 v3, v5 6722; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 6723; GFX900-NEXT: s_waitcnt vmcnt(0) 6724; GFX900-NEXT: s_setpc_b64 s[30:31] 6725; 6726; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_5: 6727; GFX90A: ; %bb.0: 6728; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6729; GFX90A-NEXT: ;;#ASMSTART 6730; GFX90A-NEXT: ; def v[2:4] 6731; GFX90A-NEXT: ;;#ASMEND 6732; GFX90A-NEXT: v_mov_b32_e32 v7, 0 6733; GFX90A-NEXT: ;;#ASMSTART 6734; GFX90A-NEXT: ; def v[4:6] 6735; GFX90A-NEXT: ;;#ASMEND 6736; GFX90A-NEXT: v_mov_b32_e32 v0, v6 6737; GFX90A-NEXT: v_mov_b32_e32 v1, v6 6738; GFX90A-NEXT: v_mov_b32_e32 v2, v3 6739; GFX90A-NEXT: v_mov_b32_e32 v3, v6 6740; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 6741; GFX90A-NEXT: s_waitcnt vmcnt(0) 6742; GFX90A-NEXT: s_setpc_b64 s[30:31] 6743; 6744; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_5: 6745; GFX940: ; %bb.0: 6746; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6747; GFX940-NEXT: ;;#ASMSTART 6748; GFX940-NEXT: ; def v[2:4] 6749; GFX940-NEXT: ;;#ASMEND 6750; GFX940-NEXT: v_mov_b32_e32 v7, 0 6751; GFX940-NEXT: ;;#ASMSTART 6752; GFX940-NEXT: ; def v[4:6] 6753; GFX940-NEXT: ;;#ASMEND 6754; GFX940-NEXT: v_mov_b32_e32 v2, v3 6755; GFX940-NEXT: v_mov_b32_e32 v0, v6 6756; GFX940-NEXT: v_mov_b32_e32 v1, v6 6757; GFX940-NEXT: v_mov_b32_e32 v3, v6 6758; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 6759; GFX940-NEXT: s_waitcnt vmcnt(0) 6760; GFX940-NEXT: s_setpc_b64 s[30:31] 6761 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6762 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6763 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5> 6764 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6765 ret void 6766} 6767 6768define void @v_shuffle_v4i32_v3i32__5_5_2_5(ptr addrspace(1) inreg %ptr) { 6769; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_5: 6770; GFX900: ; %bb.0: 6771; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6772; GFX900-NEXT: ;;#ASMSTART 6773; GFX900-NEXT: ; def v[0:2] 6774; GFX900-NEXT: ;;#ASMEND 6775; GFX900-NEXT: ;;#ASMSTART 6776; GFX900-NEXT: ; def v[3:5] 6777; GFX900-NEXT: ;;#ASMEND 6778; GFX900-NEXT: v_mov_b32_e32 v6, 0 6779; GFX900-NEXT: v_mov_b32_e32 v0, v5 6780; GFX900-NEXT: v_mov_b32_e32 v1, v5 6781; GFX900-NEXT: v_mov_b32_e32 v3, v5 6782; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] 6783; GFX900-NEXT: s_waitcnt vmcnt(0) 6784; GFX900-NEXT: s_setpc_b64 s[30:31] 6785; 6786; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_5: 6787; GFX90A: ; %bb.0: 6788; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6789; GFX90A-NEXT: ;;#ASMSTART 6790; GFX90A-NEXT: ; def v[0:2] 6791; GFX90A-NEXT: ;;#ASMEND 6792; GFX90A-NEXT: v_mov_b32_e32 v7, 0 6793; GFX90A-NEXT: ;;#ASMSTART 6794; GFX90A-NEXT: ; def v[4:6] 6795; GFX90A-NEXT: ;;#ASMEND 6796; GFX90A-NEXT: v_mov_b32_e32 v0, v6 6797; GFX90A-NEXT: v_mov_b32_e32 v1, v6 6798; GFX90A-NEXT: v_mov_b32_e32 v3, v6 6799; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17] 6800; GFX90A-NEXT: s_waitcnt vmcnt(0) 6801; GFX90A-NEXT: s_setpc_b64 s[30:31] 6802; 6803; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_5: 6804; GFX940: ; %bb.0: 6805; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6806; GFX940-NEXT: ;;#ASMSTART 6807; GFX940-NEXT: ; def v[0:2] 6808; GFX940-NEXT: ;;#ASMEND 6809; GFX940-NEXT: v_mov_b32_e32 v7, 0 6810; GFX940-NEXT: ;;#ASMSTART 6811; GFX940-NEXT: ; def v[4:6] 6812; GFX940-NEXT: ;;#ASMEND 6813; GFX940-NEXT: s_nop 0 6814; GFX940-NEXT: v_mov_b32_e32 v0, v6 6815; GFX940-NEXT: v_mov_b32_e32 v1, v6 6816; GFX940-NEXT: v_mov_b32_e32 v3, v6 6817; GFX940-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1 6818; GFX940-NEXT: s_waitcnt vmcnt(0) 6819; GFX940-NEXT: s_setpc_b64 s[30:31] 6820 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6821 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6822 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5> 6823 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6824 ret void 6825} 6826 6827define void @v_shuffle_v4i32_v3i32__5_5_3_5(ptr addrspace(1) inreg %ptr) { 6828; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_5: 6829; GFX900: ; %bb.0: 6830; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6831; GFX900-NEXT: ;;#ASMSTART 6832; GFX900-NEXT: ; def v[2:4] 6833; GFX900-NEXT: ;;#ASMEND 6834; GFX900-NEXT: v_mov_b32_e32 v5, 0 6835; GFX900-NEXT: v_mov_b32_e32 v0, v4 6836; GFX900-NEXT: v_mov_b32_e32 v1, v4 6837; GFX900-NEXT: v_mov_b32_e32 v3, v4 6838; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6839; GFX900-NEXT: s_waitcnt vmcnt(0) 6840; GFX900-NEXT: s_setpc_b64 s[30:31] 6841; 6842; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_5: 6843; GFX90A: ; %bb.0: 6844; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6845; GFX90A-NEXT: ;;#ASMSTART 6846; GFX90A-NEXT: ; def v[2:4] 6847; GFX90A-NEXT: ;;#ASMEND 6848; GFX90A-NEXT: v_mov_b32_e32 v5, 0 6849; GFX90A-NEXT: v_mov_b32_e32 v0, v4 6850; GFX90A-NEXT: v_mov_b32_e32 v1, v4 6851; GFX90A-NEXT: v_mov_b32_e32 v3, v4 6852; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6853; GFX90A-NEXT: s_waitcnt vmcnt(0) 6854; GFX90A-NEXT: s_setpc_b64 s[30:31] 6855; 6856; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_5: 6857; GFX940: ; %bb.0: 6858; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6859; GFX940-NEXT: ;;#ASMSTART 6860; GFX940-NEXT: ; def v[2:4] 6861; GFX940-NEXT: ;;#ASMEND 6862; GFX940-NEXT: v_mov_b32_e32 v5, 0 6863; GFX940-NEXT: v_mov_b32_e32 v0, v4 6864; GFX940-NEXT: v_mov_b32_e32 v1, v4 6865; GFX940-NEXT: v_mov_b32_e32 v3, v4 6866; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 6867; GFX940-NEXT: s_waitcnt vmcnt(0) 6868; GFX940-NEXT: s_setpc_b64 s[30:31] 6869 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6870 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6871 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5> 6872 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6873 ret void 6874} 6875 6876define void @v_shuffle_v4i32_v3i32__5_5_4_5(ptr addrspace(1) inreg %ptr) { 6877; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5: 6878; GFX900: ; %bb.0: 6879; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6880; GFX900-NEXT: ;;#ASMSTART 6881; GFX900-NEXT: ; def v[1:3] 6882; GFX900-NEXT: ;;#ASMEND 6883; GFX900-NEXT: v_mov_b32_e32 v4, 0 6884; GFX900-NEXT: v_mov_b32_e32 v0, v3 6885; GFX900-NEXT: v_mov_b32_e32 v1, v3 6886; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] 6887; GFX900-NEXT: s_waitcnt vmcnt(0) 6888; GFX900-NEXT: s_setpc_b64 s[30:31] 6889; 6890; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5: 6891; GFX90A: ; %bb.0: 6892; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6893; GFX90A-NEXT: ;;#ASMSTART 6894; GFX90A-NEXT: ; def v[2:4] 6895; GFX90A-NEXT: ;;#ASMEND 6896; GFX90A-NEXT: v_mov_b32_e32 v5, 0 6897; GFX90A-NEXT: v_mov_b32_e32 v0, v4 6898; GFX90A-NEXT: v_mov_b32_e32 v1, v4 6899; GFX90A-NEXT: v_mov_b32_e32 v2, v3 6900; GFX90A-NEXT: v_mov_b32_e32 v3, v4 6901; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17] 6902; GFX90A-NEXT: s_waitcnt vmcnt(0) 6903; GFX90A-NEXT: s_setpc_b64 s[30:31] 6904; 6905; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5: 6906; GFX940: ; %bb.0: 6907; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6908; GFX940-NEXT: ;;#ASMSTART 6909; GFX940-NEXT: ; def v[2:4] 6910; GFX940-NEXT: ;;#ASMEND 6911; GFX940-NEXT: v_mov_b32_e32 v5, 0 6912; GFX940-NEXT: v_mov_b32_e32 v0, v4 6913; GFX940-NEXT: v_mov_b32_e32 v1, v4 6914; GFX940-NEXT: v_mov_b32_e32 v2, v3 6915; GFX940-NEXT: v_mov_b32_e32 v3, v4 6916; GFX940-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1 6917; GFX940-NEXT: s_waitcnt vmcnt(0) 6918; GFX940-NEXT: s_setpc_b64 s[30:31] 6919 %vec0 = call <3 x i32> asm "; def $0", "=v"() 6920 %vec1 = call <3 x i32> asm "; def $0", "=v"() 6921 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5> 6922 store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6923 ret void 6924} 6925 6926define void @s_shuffle_v4i32_v3i32__u_u_u_u() { 6927; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_u_u_u: 6928; GFX9: ; %bb.0: 6929; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6930; GFX9-NEXT: ;;#ASMSTART 6931; GFX9-NEXT: ; use s[8:11] 6932; GFX9-NEXT: ;;#ASMEND 6933; GFX9-NEXT: s_setpc_b64 s[30:31] 6934 %vec0 = call <3 x i32> asm "; def $0", "=s"() 6935 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> poison 6936 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 6937 ret void 6938} 6939 6940define void @s_shuffle_v4i32_v3i32__0_u_u_u() { 6941; GFX900-LABEL: s_shuffle_v4i32_v3i32__0_u_u_u: 6942; GFX900: ; %bb.0: 6943; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6944; GFX900-NEXT: ;;#ASMSTART 6945; GFX900-NEXT: ; def s[8:10] 6946; GFX900-NEXT: ;;#ASMEND 6947; GFX900-NEXT: ;;#ASMSTART 6948; GFX900-NEXT: ; use s[8:11] 6949; GFX900-NEXT: ;;#ASMEND 6950; GFX900-NEXT: s_setpc_b64 s[30:31] 6951; 6952; GFX90A-LABEL: s_shuffle_v4i32_v3i32__0_u_u_u: 6953; GFX90A: ; %bb.0: 6954; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6955; GFX90A-NEXT: ;;#ASMSTART 6956; GFX90A-NEXT: ; def s[8:10] 6957; GFX90A-NEXT: ;;#ASMEND 6958; GFX90A-NEXT: ;;#ASMSTART 6959; GFX90A-NEXT: ; use s[8:11] 6960; GFX90A-NEXT: ;;#ASMEND 6961; GFX90A-NEXT: s_setpc_b64 s[30:31] 6962; 6963; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_u_u_u: 6964; GFX940: ; %bb.0: 6965; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6966; GFX940-NEXT: ;;#ASMSTART 6967; GFX940-NEXT: ; def s[8:10] 6968; GFX940-NEXT: ;;#ASMEND 6969; GFX940-NEXT: s_nop 0 6970; GFX940-NEXT: ;;#ASMSTART 6971; GFX940-NEXT: ; use s[8:11] 6972; GFX940-NEXT: ;;#ASMEND 6973; GFX940-NEXT: s_setpc_b64 s[30:31] 6974 %vec0 = call <3 x i32> asm "; def $0", "=s"() 6975 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 6976 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 6977 ret void 6978} 6979 6980define void @s_shuffle_v4i32_v3i32__1_u_u_u() { 6981; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_u_u_u: 6982; GFX900: ; %bb.0: 6983; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6984; GFX900-NEXT: ;;#ASMSTART 6985; GFX900-NEXT: ; def s[4:6] 6986; GFX900-NEXT: ;;#ASMEND 6987; GFX900-NEXT: s_mov_b32 s8, s5 6988; GFX900-NEXT: ;;#ASMSTART 6989; GFX900-NEXT: ; use s[8:11] 6990; GFX900-NEXT: ;;#ASMEND 6991; GFX900-NEXT: s_setpc_b64 s[30:31] 6992; 6993; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_u_u_u: 6994; GFX90A: ; %bb.0: 6995; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6996; GFX90A-NEXT: ;;#ASMSTART 6997; GFX90A-NEXT: ; def s[4:6] 6998; GFX90A-NEXT: ;;#ASMEND 6999; GFX90A-NEXT: s_mov_b32 s8, s5 7000; GFX90A-NEXT: ;;#ASMSTART 7001; GFX90A-NEXT: ; use s[8:11] 7002; GFX90A-NEXT: ;;#ASMEND 7003; GFX90A-NEXT: s_setpc_b64 s[30:31] 7004; 7005; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_u_u_u: 7006; GFX940: ; %bb.0: 7007; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7008; GFX940-NEXT: ;;#ASMSTART 7009; GFX940-NEXT: ; def s[0:2] 7010; GFX940-NEXT: ;;#ASMEND 7011; GFX940-NEXT: s_mov_b32 s8, s1 7012; GFX940-NEXT: ;;#ASMSTART 7013; GFX940-NEXT: ; use s[8:11] 7014; GFX940-NEXT: ;;#ASMEND 7015; GFX940-NEXT: s_setpc_b64 s[30:31] 7016 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7017 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 7018 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7019 ret void 7020} 7021 7022define void @s_shuffle_v4i32_v3i32__2_u_u_u() { 7023; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_u_u_u: 7024; GFX900: ; %bb.0: 7025; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7026; GFX900-NEXT: ;;#ASMSTART 7027; GFX900-NEXT: ; def s[4:6] 7028; GFX900-NEXT: ;;#ASMEND 7029; GFX900-NEXT: s_mov_b32 s8, s6 7030; GFX900-NEXT: ;;#ASMSTART 7031; GFX900-NEXT: ; use s[8:11] 7032; GFX900-NEXT: ;;#ASMEND 7033; GFX900-NEXT: s_setpc_b64 s[30:31] 7034; 7035; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_u_u_u: 7036; GFX90A: ; %bb.0: 7037; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7038; GFX90A-NEXT: ;;#ASMSTART 7039; GFX90A-NEXT: ; def s[4:6] 7040; GFX90A-NEXT: ;;#ASMEND 7041; GFX90A-NEXT: s_mov_b32 s8, s6 7042; GFX90A-NEXT: ;;#ASMSTART 7043; GFX90A-NEXT: ; use s[8:11] 7044; GFX90A-NEXT: ;;#ASMEND 7045; GFX90A-NEXT: s_setpc_b64 s[30:31] 7046; 7047; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_u_u_u: 7048; GFX940: ; %bb.0: 7049; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7050; GFX940-NEXT: ;;#ASMSTART 7051; GFX940-NEXT: ; def s[0:2] 7052; GFX940-NEXT: ;;#ASMEND 7053; GFX940-NEXT: s_mov_b32 s8, s2 7054; GFX940-NEXT: ;;#ASMSTART 7055; GFX940-NEXT: ; use s[8:11] 7056; GFX940-NEXT: ;;#ASMEND 7057; GFX940-NEXT: s_setpc_b64 s[30:31] 7058 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7059 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 7060 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7061 ret void 7062} 7063 7064define void @s_shuffle_v4i32_v3i32__3_u_u_u() { 7065; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_u_u_u: 7066; GFX9: ; %bb.0: 7067; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7068; GFX9-NEXT: ;;#ASMSTART 7069; GFX9-NEXT: ; use s[8:11] 7070; GFX9-NEXT: ;;#ASMEND 7071; GFX9-NEXT: s_setpc_b64 s[30:31] 7072 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7073 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison> 7074 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7075 ret void 7076} 7077 7078define void @s_shuffle_v4i32_v3i32__4_u_u_u() { 7079; GFX900-LABEL: s_shuffle_v4i32_v3i32__4_u_u_u: 7080; GFX900: ; %bb.0: 7081; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7082; GFX900-NEXT: ;;#ASMSTART 7083; GFX900-NEXT: ; def s[4:6] 7084; GFX900-NEXT: ;;#ASMEND 7085; GFX900-NEXT: s_mov_b32 s8, s5 7086; GFX900-NEXT: ;;#ASMSTART 7087; GFX900-NEXT: ; use s[8:11] 7088; GFX900-NEXT: ;;#ASMEND 7089; GFX900-NEXT: s_setpc_b64 s[30:31] 7090; 7091; GFX90A-LABEL: s_shuffle_v4i32_v3i32__4_u_u_u: 7092; GFX90A: ; %bb.0: 7093; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7094; GFX90A-NEXT: ;;#ASMSTART 7095; GFX90A-NEXT: ; def s[4:6] 7096; GFX90A-NEXT: ;;#ASMEND 7097; GFX90A-NEXT: s_mov_b32 s8, s5 7098; GFX90A-NEXT: ;;#ASMSTART 7099; GFX90A-NEXT: ; use s[8:11] 7100; GFX90A-NEXT: ;;#ASMEND 7101; GFX90A-NEXT: s_setpc_b64 s[30:31] 7102; 7103; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_u_u_u: 7104; GFX940: ; %bb.0: 7105; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7106; GFX940-NEXT: ;;#ASMSTART 7107; GFX940-NEXT: ; def s[0:2] 7108; GFX940-NEXT: ;;#ASMEND 7109; GFX940-NEXT: s_mov_b32 s8, s1 7110; GFX940-NEXT: ;;#ASMSTART 7111; GFX940-NEXT: ; use s[8:11] 7112; GFX940-NEXT: ;;#ASMEND 7113; GFX940-NEXT: s_setpc_b64 s[30:31] 7114 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7115 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7116 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison> 7117 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7118 ret void 7119} 7120 7121define void @s_shuffle_v4i32_v3i32__5_u_u_u() { 7122; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_u_u: 7123; GFX900: ; %bb.0: 7124; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7125; GFX900-NEXT: ;;#ASMSTART 7126; GFX900-NEXT: ; def s[4:6] 7127; GFX900-NEXT: ;;#ASMEND 7128; GFX900-NEXT: s_mov_b32 s8, s6 7129; GFX900-NEXT: ;;#ASMSTART 7130; GFX900-NEXT: ; use s[8:11] 7131; GFX900-NEXT: ;;#ASMEND 7132; GFX900-NEXT: s_setpc_b64 s[30:31] 7133; 7134; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_u_u: 7135; GFX90A: ; %bb.0: 7136; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7137; GFX90A-NEXT: ;;#ASMSTART 7138; GFX90A-NEXT: ; def s[4:6] 7139; GFX90A-NEXT: ;;#ASMEND 7140; GFX90A-NEXT: s_mov_b32 s8, s6 7141; GFX90A-NEXT: ;;#ASMSTART 7142; GFX90A-NEXT: ; use s[8:11] 7143; GFX90A-NEXT: ;;#ASMEND 7144; GFX90A-NEXT: s_setpc_b64 s[30:31] 7145; 7146; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_u_u: 7147; GFX940: ; %bb.0: 7148; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7149; GFX940-NEXT: ;;#ASMSTART 7150; GFX940-NEXT: ; def s[0:2] 7151; GFX940-NEXT: ;;#ASMEND 7152; GFX940-NEXT: s_mov_b32 s8, s2 7153; GFX940-NEXT: ;;#ASMSTART 7154; GFX940-NEXT: ; use s[8:11] 7155; GFX940-NEXT: ;;#ASMEND 7156; GFX940-NEXT: s_setpc_b64 s[30:31] 7157 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7158 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7159 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison> 7160 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7161 ret void 7162} 7163 7164define void @s_shuffle_v4i32_v3i32__5_0_u_u() { 7165; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_u_u: 7166; GFX900: ; %bb.0: 7167; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7168; GFX900-NEXT: ;;#ASMSTART 7169; GFX900-NEXT: ; def s[8:10] 7170; GFX900-NEXT: ;;#ASMEND 7171; GFX900-NEXT: ;;#ASMSTART 7172; GFX900-NEXT: ; def s[4:6] 7173; GFX900-NEXT: ;;#ASMEND 7174; GFX900-NEXT: s_mov_b32 s8, s10 7175; GFX900-NEXT: s_mov_b32 s9, s4 7176; GFX900-NEXT: ;;#ASMSTART 7177; GFX900-NEXT: ; use s[8:11] 7178; GFX900-NEXT: ;;#ASMEND 7179; GFX900-NEXT: s_setpc_b64 s[30:31] 7180; 7181; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_u_u: 7182; GFX90A: ; %bb.0: 7183; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7184; GFX90A-NEXT: ;;#ASMSTART 7185; GFX90A-NEXT: ; def s[8:10] 7186; GFX90A-NEXT: ;;#ASMEND 7187; GFX90A-NEXT: ;;#ASMSTART 7188; GFX90A-NEXT: ; def s[4:6] 7189; GFX90A-NEXT: ;;#ASMEND 7190; GFX90A-NEXT: s_mov_b32 s8, s10 7191; GFX90A-NEXT: s_mov_b32 s9, s4 7192; GFX90A-NEXT: ;;#ASMSTART 7193; GFX90A-NEXT: ; use s[8:11] 7194; GFX90A-NEXT: ;;#ASMEND 7195; GFX90A-NEXT: s_setpc_b64 s[30:31] 7196; 7197; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_u_u: 7198; GFX940: ; %bb.0: 7199; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7200; GFX940-NEXT: ;;#ASMSTART 7201; GFX940-NEXT: ; def s[0:2] 7202; GFX940-NEXT: ;;#ASMEND 7203; GFX940-NEXT: ;;#ASMSTART 7204; GFX940-NEXT: ; def s[4:6] 7205; GFX940-NEXT: ;;#ASMEND 7206; GFX940-NEXT: s_mov_b32 s8, s6 7207; GFX940-NEXT: s_mov_b32 s9, s0 7208; GFX940-NEXT: ;;#ASMSTART 7209; GFX940-NEXT: ; use s[8:11] 7210; GFX940-NEXT: ;;#ASMEND 7211; GFX940-NEXT: s_setpc_b64 s[30:31] 7212 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7213 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7214 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison> 7215 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7216 ret void 7217} 7218 7219define void @s_shuffle_v4i32_v3i32__5_1_u_u() { 7220; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_u_u: 7221; GFX900: ; %bb.0: 7222; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7223; GFX900-NEXT: ;;#ASMSTART 7224; GFX900-NEXT: ; def s[8:10] 7225; GFX900-NEXT: ;;#ASMEND 7226; GFX900-NEXT: ;;#ASMSTART 7227; GFX900-NEXT: ; def s[4:6] 7228; GFX900-NEXT: ;;#ASMEND 7229; GFX900-NEXT: s_mov_b32 s8, s6 7230; GFX900-NEXT: ;;#ASMSTART 7231; GFX900-NEXT: ; use s[8:11] 7232; GFX900-NEXT: ;;#ASMEND 7233; GFX900-NEXT: s_setpc_b64 s[30:31] 7234; 7235; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_u_u: 7236; GFX90A: ; %bb.0: 7237; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7238; GFX90A-NEXT: ;;#ASMSTART 7239; GFX90A-NEXT: ; def s[8:10] 7240; GFX90A-NEXT: ;;#ASMEND 7241; GFX90A-NEXT: ;;#ASMSTART 7242; GFX90A-NEXT: ; def s[4:6] 7243; GFX90A-NEXT: ;;#ASMEND 7244; GFX90A-NEXT: s_mov_b32 s8, s6 7245; GFX90A-NEXT: ;;#ASMSTART 7246; GFX90A-NEXT: ; use s[8:11] 7247; GFX90A-NEXT: ;;#ASMEND 7248; GFX90A-NEXT: s_setpc_b64 s[30:31] 7249; 7250; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_u_u: 7251; GFX940: ; %bb.0: 7252; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7253; GFX940-NEXT: ;;#ASMSTART 7254; GFX940-NEXT: ; def s[8:10] 7255; GFX940-NEXT: ;;#ASMEND 7256; GFX940-NEXT: ;;#ASMSTART 7257; GFX940-NEXT: ; def s[0:2] 7258; GFX940-NEXT: ;;#ASMEND 7259; GFX940-NEXT: s_mov_b32 s8, s2 7260; GFX940-NEXT: ;;#ASMSTART 7261; GFX940-NEXT: ; use s[8:11] 7262; GFX940-NEXT: ;;#ASMEND 7263; GFX940-NEXT: s_setpc_b64 s[30:31] 7264 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7265 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7266 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison> 7267 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7268 ret void 7269} 7270 7271define void @s_shuffle_v4i32_v3i32__5_2_u_u() { 7272; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_u_u: 7273; GFX900: ; %bb.0: 7274; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7275; GFX900-NEXT: ;;#ASMSTART 7276; GFX900-NEXT: ; def s[8:10] 7277; GFX900-NEXT: ;;#ASMEND 7278; GFX900-NEXT: ;;#ASMSTART 7279; GFX900-NEXT: ; def s[4:6] 7280; GFX900-NEXT: ;;#ASMEND 7281; GFX900-NEXT: s_mov_b32 s8, s10 7282; GFX900-NEXT: s_mov_b32 s9, s6 7283; GFX900-NEXT: ;;#ASMSTART 7284; GFX900-NEXT: ; use s[8:11] 7285; GFX900-NEXT: ;;#ASMEND 7286; GFX900-NEXT: s_setpc_b64 s[30:31] 7287; 7288; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_u_u: 7289; GFX90A: ; %bb.0: 7290; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7291; GFX90A-NEXT: ;;#ASMSTART 7292; GFX90A-NEXT: ; def s[8:10] 7293; GFX90A-NEXT: ;;#ASMEND 7294; GFX90A-NEXT: ;;#ASMSTART 7295; GFX90A-NEXT: ; def s[4:6] 7296; GFX90A-NEXT: ;;#ASMEND 7297; GFX90A-NEXT: s_mov_b32 s8, s10 7298; GFX90A-NEXT: s_mov_b32 s9, s6 7299; GFX90A-NEXT: ;;#ASMSTART 7300; GFX90A-NEXT: ; use s[8:11] 7301; GFX90A-NEXT: ;;#ASMEND 7302; GFX90A-NEXT: s_setpc_b64 s[30:31] 7303; 7304; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_u_u: 7305; GFX940: ; %bb.0: 7306; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7307; GFX940-NEXT: ;;#ASMSTART 7308; GFX940-NEXT: ; def s[0:2] 7309; GFX940-NEXT: ;;#ASMEND 7310; GFX940-NEXT: ;;#ASMSTART 7311; GFX940-NEXT: ; def s[4:6] 7312; GFX940-NEXT: ;;#ASMEND 7313; GFX940-NEXT: s_mov_b32 s8, s6 7314; GFX940-NEXT: s_mov_b32 s9, s2 7315; GFX940-NEXT: ;;#ASMSTART 7316; GFX940-NEXT: ; use s[8:11] 7317; GFX940-NEXT: ;;#ASMEND 7318; GFX940-NEXT: s_setpc_b64 s[30:31] 7319 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7320 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7321 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison> 7322 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7323 ret void 7324} 7325 7326define void @s_shuffle_v4i32_v3i32__5_3_u_u() { 7327; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_u_u: 7328; GFX900: ; %bb.0: 7329; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7330; GFX900-NEXT: ;;#ASMSTART 7331; GFX900-NEXT: ; def s[4:6] 7332; GFX900-NEXT: ;;#ASMEND 7333; GFX900-NEXT: s_mov_b32 s8, s6 7334; GFX900-NEXT: s_mov_b32 s9, s4 7335; GFX900-NEXT: ;;#ASMSTART 7336; GFX900-NEXT: ; use s[8:11] 7337; GFX900-NEXT: ;;#ASMEND 7338; GFX900-NEXT: s_setpc_b64 s[30:31] 7339; 7340; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_u_u: 7341; GFX90A: ; %bb.0: 7342; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7343; GFX90A-NEXT: ;;#ASMSTART 7344; GFX90A-NEXT: ; def s[4:6] 7345; GFX90A-NEXT: ;;#ASMEND 7346; GFX90A-NEXT: s_mov_b32 s8, s6 7347; GFX90A-NEXT: s_mov_b32 s9, s4 7348; GFX90A-NEXT: ;;#ASMSTART 7349; GFX90A-NEXT: ; use s[8:11] 7350; GFX90A-NEXT: ;;#ASMEND 7351; GFX90A-NEXT: s_setpc_b64 s[30:31] 7352; 7353; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_u_u: 7354; GFX940: ; %bb.0: 7355; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7356; GFX940-NEXT: ;;#ASMSTART 7357; GFX940-NEXT: ; def s[0:2] 7358; GFX940-NEXT: ;;#ASMEND 7359; GFX940-NEXT: s_mov_b32 s8, s2 7360; GFX940-NEXT: s_mov_b32 s9, s0 7361; GFX940-NEXT: ;;#ASMSTART 7362; GFX940-NEXT: ; use s[8:11] 7363; GFX940-NEXT: ;;#ASMEND 7364; GFX940-NEXT: s_setpc_b64 s[30:31] 7365 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7366 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7367 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison> 7368 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7369 ret void 7370} 7371 7372define void @s_shuffle_v4i32_v3i32__5_4_u_u() { 7373; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_4_u_u: 7374; GFX9: ; %bb.0: 7375; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7376; GFX9-NEXT: ;;#ASMSTART 7377; GFX9-NEXT: ; def s[8:10] 7378; GFX9-NEXT: ;;#ASMEND 7379; GFX9-NEXT: s_mov_b32 s8, s10 7380; GFX9-NEXT: ;;#ASMSTART 7381; GFX9-NEXT: ; use s[8:11] 7382; GFX9-NEXT: ;;#ASMEND 7383; GFX9-NEXT: s_setpc_b64 s[30:31] 7384 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7385 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7386 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison> 7387 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7388 ret void 7389} 7390 7391define void @s_shuffle_v4i32_v3i32__5_5_u_u() { 7392; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_u: 7393; GFX900: ; %bb.0: 7394; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7395; GFX900-NEXT: ;;#ASMSTART 7396; GFX900-NEXT: ; def s[4:6] 7397; GFX900-NEXT: ;;#ASMEND 7398; GFX900-NEXT: s_mov_b32 s8, s6 7399; GFX900-NEXT: s_mov_b32 s9, s6 7400; GFX900-NEXT: ;;#ASMSTART 7401; GFX900-NEXT: ; use s[8:11] 7402; GFX900-NEXT: ;;#ASMEND 7403; GFX900-NEXT: s_setpc_b64 s[30:31] 7404; 7405; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_u: 7406; GFX90A: ; %bb.0: 7407; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7408; GFX90A-NEXT: ;;#ASMSTART 7409; GFX90A-NEXT: ; def s[4:6] 7410; GFX90A-NEXT: ;;#ASMEND 7411; GFX90A-NEXT: s_mov_b32 s8, s6 7412; GFX90A-NEXT: s_mov_b32 s9, s6 7413; GFX90A-NEXT: ;;#ASMSTART 7414; GFX90A-NEXT: ; use s[8:11] 7415; GFX90A-NEXT: ;;#ASMEND 7416; GFX90A-NEXT: s_setpc_b64 s[30:31] 7417; 7418; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_u: 7419; GFX940: ; %bb.0: 7420; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7421; GFX940-NEXT: ;;#ASMSTART 7422; GFX940-NEXT: ; def s[0:2] 7423; GFX940-NEXT: ;;#ASMEND 7424; GFX940-NEXT: s_mov_b32 s8, s2 7425; GFX940-NEXT: s_mov_b32 s9, s2 7426; GFX940-NEXT: ;;#ASMSTART 7427; GFX940-NEXT: ; use s[8:11] 7428; GFX940-NEXT: ;;#ASMEND 7429; GFX940-NEXT: s_setpc_b64 s[30:31] 7430 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7431 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7432 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison> 7433 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7434 ret void 7435} 7436 7437define void @s_shuffle_v4i32_v3i32__5_5_0_u() { 7438; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_u: 7439; GFX900: ; %bb.0: 7440; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7441; GFX900-NEXT: ;;#ASMSTART 7442; GFX900-NEXT: ; def s[8:10] 7443; GFX900-NEXT: ;;#ASMEND 7444; GFX900-NEXT: ;;#ASMSTART 7445; GFX900-NEXT: ; def s[4:6] 7446; GFX900-NEXT: ;;#ASMEND 7447; GFX900-NEXT: s_mov_b32 s8, s10 7448; GFX900-NEXT: s_mov_b32 s9, s10 7449; GFX900-NEXT: s_mov_b32 s10, s4 7450; GFX900-NEXT: ;;#ASMSTART 7451; GFX900-NEXT: ; use s[8:11] 7452; GFX900-NEXT: ;;#ASMEND 7453; GFX900-NEXT: s_setpc_b64 s[30:31] 7454; 7455; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_u: 7456; GFX90A: ; %bb.0: 7457; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7458; GFX90A-NEXT: ;;#ASMSTART 7459; GFX90A-NEXT: ; def s[8:10] 7460; GFX90A-NEXT: ;;#ASMEND 7461; GFX90A-NEXT: ;;#ASMSTART 7462; GFX90A-NEXT: ; def s[4:6] 7463; GFX90A-NEXT: ;;#ASMEND 7464; GFX90A-NEXT: s_mov_b32 s8, s10 7465; GFX90A-NEXT: s_mov_b32 s9, s10 7466; GFX90A-NEXT: s_mov_b32 s10, s4 7467; GFX90A-NEXT: ;;#ASMSTART 7468; GFX90A-NEXT: ; use s[8:11] 7469; GFX90A-NEXT: ;;#ASMEND 7470; GFX90A-NEXT: s_setpc_b64 s[30:31] 7471; 7472; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_u: 7473; GFX940: ; %bb.0: 7474; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7475; GFX940-NEXT: ;;#ASMSTART 7476; GFX940-NEXT: ; def s[0:2] 7477; GFX940-NEXT: ;;#ASMEND 7478; GFX940-NEXT: ;;#ASMSTART 7479; GFX940-NEXT: ; def s[4:6] 7480; GFX940-NEXT: ;;#ASMEND 7481; GFX940-NEXT: s_mov_b32 s8, s6 7482; GFX940-NEXT: s_mov_b32 s9, s6 7483; GFX940-NEXT: s_mov_b32 s10, s0 7484; GFX940-NEXT: ;;#ASMSTART 7485; GFX940-NEXT: ; use s[8:11] 7486; GFX940-NEXT: ;;#ASMEND 7487; GFX940-NEXT: s_setpc_b64 s[30:31] 7488 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7489 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7490 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison> 7491 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7492 ret void 7493} 7494 7495define void @s_shuffle_v4i32_v3i32__5_5_1_u() { 7496; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_u: 7497; GFX900: ; %bb.0: 7498; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7499; GFX900-NEXT: ;;#ASMSTART 7500; GFX900-NEXT: ; def s[8:10] 7501; GFX900-NEXT: ;;#ASMEND 7502; GFX900-NEXT: ;;#ASMSTART 7503; GFX900-NEXT: ; def s[4:6] 7504; GFX900-NEXT: ;;#ASMEND 7505; GFX900-NEXT: s_mov_b32 s8, s10 7506; GFX900-NEXT: s_mov_b32 s9, s10 7507; GFX900-NEXT: s_mov_b32 s10, s5 7508; GFX900-NEXT: ;;#ASMSTART 7509; GFX900-NEXT: ; use s[8:11] 7510; GFX900-NEXT: ;;#ASMEND 7511; GFX900-NEXT: s_setpc_b64 s[30:31] 7512; 7513; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_u: 7514; GFX90A: ; %bb.0: 7515; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7516; GFX90A-NEXT: ;;#ASMSTART 7517; GFX90A-NEXT: ; def s[8:10] 7518; GFX90A-NEXT: ;;#ASMEND 7519; GFX90A-NEXT: ;;#ASMSTART 7520; GFX90A-NEXT: ; def s[4:6] 7521; GFX90A-NEXT: ;;#ASMEND 7522; GFX90A-NEXT: s_mov_b32 s8, s10 7523; GFX90A-NEXT: s_mov_b32 s9, s10 7524; GFX90A-NEXT: s_mov_b32 s10, s5 7525; GFX90A-NEXT: ;;#ASMSTART 7526; GFX90A-NEXT: ; use s[8:11] 7527; GFX90A-NEXT: ;;#ASMEND 7528; GFX90A-NEXT: s_setpc_b64 s[30:31] 7529; 7530; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_u: 7531; GFX940: ; %bb.0: 7532; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7533; GFX940-NEXT: ;;#ASMSTART 7534; GFX940-NEXT: ; def s[0:2] 7535; GFX940-NEXT: ;;#ASMEND 7536; GFX940-NEXT: ;;#ASMSTART 7537; GFX940-NEXT: ; def s[4:6] 7538; GFX940-NEXT: ;;#ASMEND 7539; GFX940-NEXT: s_mov_b32 s8, s6 7540; GFX940-NEXT: s_mov_b32 s9, s6 7541; GFX940-NEXT: s_mov_b32 s10, s1 7542; GFX940-NEXT: ;;#ASMSTART 7543; GFX940-NEXT: ; use s[8:11] 7544; GFX940-NEXT: ;;#ASMEND 7545; GFX940-NEXT: s_setpc_b64 s[30:31] 7546 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7547 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7548 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 poison> 7549 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7550 ret void 7551} 7552 7553define void @s_shuffle_v4i32_v3i32__5_5_2_u() { 7554; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_u: 7555; GFX900: ; %bb.0: 7556; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7557; GFX900-NEXT: ;;#ASMSTART 7558; GFX900-NEXT: ; def s[8:10] 7559; GFX900-NEXT: ;;#ASMEND 7560; GFX900-NEXT: ;;#ASMSTART 7561; GFX900-NEXT: ; def s[4:6] 7562; GFX900-NEXT: ;;#ASMEND 7563; GFX900-NEXT: s_mov_b32 s8, s6 7564; GFX900-NEXT: s_mov_b32 s9, s6 7565; GFX900-NEXT: ;;#ASMSTART 7566; GFX900-NEXT: ; use s[8:11] 7567; GFX900-NEXT: ;;#ASMEND 7568; GFX900-NEXT: s_setpc_b64 s[30:31] 7569; 7570; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_u: 7571; GFX90A: ; %bb.0: 7572; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7573; GFX90A-NEXT: ;;#ASMSTART 7574; GFX90A-NEXT: ; def s[8:10] 7575; GFX90A-NEXT: ;;#ASMEND 7576; GFX90A-NEXT: ;;#ASMSTART 7577; GFX90A-NEXT: ; def s[4:6] 7578; GFX90A-NEXT: ;;#ASMEND 7579; GFX90A-NEXT: s_mov_b32 s8, s6 7580; GFX90A-NEXT: s_mov_b32 s9, s6 7581; GFX90A-NEXT: ;;#ASMSTART 7582; GFX90A-NEXT: ; use s[8:11] 7583; GFX90A-NEXT: ;;#ASMEND 7584; GFX90A-NEXT: s_setpc_b64 s[30:31] 7585; 7586; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_u: 7587; GFX940: ; %bb.0: 7588; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7589; GFX940-NEXT: ;;#ASMSTART 7590; GFX940-NEXT: ; def s[8:10] 7591; GFX940-NEXT: ;;#ASMEND 7592; GFX940-NEXT: ;;#ASMSTART 7593; GFX940-NEXT: ; def s[0:2] 7594; GFX940-NEXT: ;;#ASMEND 7595; GFX940-NEXT: s_mov_b32 s8, s2 7596; GFX940-NEXT: s_mov_b32 s9, s2 7597; GFX940-NEXT: ;;#ASMSTART 7598; GFX940-NEXT: ; use s[8:11] 7599; GFX940-NEXT: ;;#ASMEND 7600; GFX940-NEXT: s_setpc_b64 s[30:31] 7601 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7602 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7603 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 poison> 7604 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7605 ret void 7606} 7607 7608define void @s_shuffle_v4i32_v3i32__5_5_3_u() { 7609; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_u: 7610; GFX900: ; %bb.0: 7611; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7612; GFX900-NEXT: ;;#ASMSTART 7613; GFX900-NEXT: ; def s[4:6] 7614; GFX900-NEXT: ;;#ASMEND 7615; GFX900-NEXT: s_mov_b32 s8, s6 7616; GFX900-NEXT: s_mov_b32 s9, s6 7617; GFX900-NEXT: s_mov_b32 s10, s4 7618; GFX900-NEXT: ;;#ASMSTART 7619; GFX900-NEXT: ; use s[8:11] 7620; GFX900-NEXT: ;;#ASMEND 7621; GFX900-NEXT: s_setpc_b64 s[30:31] 7622; 7623; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_u: 7624; GFX90A: ; %bb.0: 7625; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7626; GFX90A-NEXT: ;;#ASMSTART 7627; GFX90A-NEXT: ; def s[4:6] 7628; GFX90A-NEXT: ;;#ASMEND 7629; GFX90A-NEXT: s_mov_b32 s8, s6 7630; GFX90A-NEXT: s_mov_b32 s9, s6 7631; GFX90A-NEXT: s_mov_b32 s10, s4 7632; GFX90A-NEXT: ;;#ASMSTART 7633; GFX90A-NEXT: ; use s[8:11] 7634; GFX90A-NEXT: ;;#ASMEND 7635; GFX90A-NEXT: s_setpc_b64 s[30:31] 7636; 7637; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_u: 7638; GFX940: ; %bb.0: 7639; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7640; GFX940-NEXT: ;;#ASMSTART 7641; GFX940-NEXT: ; def s[0:2] 7642; GFX940-NEXT: ;;#ASMEND 7643; GFX940-NEXT: s_mov_b32 s8, s2 7644; GFX940-NEXT: s_mov_b32 s9, s2 7645; GFX940-NEXT: s_mov_b32 s10, s0 7646; GFX940-NEXT: ;;#ASMSTART 7647; GFX940-NEXT: ; use s[8:11] 7648; GFX940-NEXT: ;;#ASMEND 7649; GFX940-NEXT: s_setpc_b64 s[30:31] 7650 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7651 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7652 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison> 7653 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7654 ret void 7655} 7656 7657define void @s_shuffle_v4i32_v3i32__5_5_4_u() { 7658; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_u: 7659; GFX900: ; %bb.0: 7660; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7661; GFX900-NEXT: ;;#ASMSTART 7662; GFX900-NEXT: ; def s[4:6] 7663; GFX900-NEXT: ;;#ASMEND 7664; GFX900-NEXT: s_mov_b32 s8, s6 7665; GFX900-NEXT: s_mov_b32 s9, s6 7666; GFX900-NEXT: s_mov_b32 s10, s5 7667; GFX900-NEXT: ;;#ASMSTART 7668; GFX900-NEXT: ; use s[8:11] 7669; GFX900-NEXT: ;;#ASMEND 7670; GFX900-NEXT: s_setpc_b64 s[30:31] 7671; 7672; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_u: 7673; GFX90A: ; %bb.0: 7674; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7675; GFX90A-NEXT: ;;#ASMSTART 7676; GFX90A-NEXT: ; def s[4:6] 7677; GFX90A-NEXT: ;;#ASMEND 7678; GFX90A-NEXT: s_mov_b32 s8, s6 7679; GFX90A-NEXT: s_mov_b32 s9, s6 7680; GFX90A-NEXT: s_mov_b32 s10, s5 7681; GFX90A-NEXT: ;;#ASMSTART 7682; GFX90A-NEXT: ; use s[8:11] 7683; GFX90A-NEXT: ;;#ASMEND 7684; GFX90A-NEXT: s_setpc_b64 s[30:31] 7685; 7686; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_u: 7687; GFX940: ; %bb.0: 7688; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7689; GFX940-NEXT: ;;#ASMSTART 7690; GFX940-NEXT: ; def s[0:2] 7691; GFX940-NEXT: ;;#ASMEND 7692; GFX940-NEXT: s_mov_b32 s8, s2 7693; GFX940-NEXT: s_mov_b32 s9, s2 7694; GFX940-NEXT: s_mov_b32 s10, s1 7695; GFX940-NEXT: ;;#ASMSTART 7696; GFX940-NEXT: ; use s[8:11] 7697; GFX940-NEXT: ;;#ASMEND 7698; GFX940-NEXT: s_setpc_b64 s[30:31] 7699 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7700 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7701 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison> 7702 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7703 ret void 7704} 7705 7706define void @s_shuffle_v4i32_v3i32__5_5_5_u() { 7707; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_5_5_u: 7708; GFX9: ; %bb.0: 7709; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7710; GFX9-NEXT: ;;#ASMSTART 7711; GFX9-NEXT: ; def s[8:10] 7712; GFX9-NEXT: ;;#ASMEND 7713; GFX9-NEXT: s_mov_b32 s8, s10 7714; GFX9-NEXT: s_mov_b32 s9, s10 7715; GFX9-NEXT: ;;#ASMSTART 7716; GFX9-NEXT: ; use s[8:11] 7717; GFX9-NEXT: ;;#ASMEND 7718; GFX9-NEXT: s_setpc_b64 s[30:31] 7719 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7720 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7721 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison> 7722 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7723 ret void 7724} 7725 7726define void @s_shuffle_v4i32_v3i32__5_5_5_0() { 7727; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_5_0: 7728; GFX900: ; %bb.0: 7729; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7730; GFX900-NEXT: ;;#ASMSTART 7731; GFX900-NEXT: ; def s[8:10] 7732; GFX900-NEXT: ;;#ASMEND 7733; GFX900-NEXT: ;;#ASMSTART 7734; GFX900-NEXT: ; def s[4:6] 7735; GFX900-NEXT: ;;#ASMEND 7736; GFX900-NEXT: s_mov_b32 s8, s10 7737; GFX900-NEXT: s_mov_b32 s9, s10 7738; GFX900-NEXT: s_mov_b32 s11, s4 7739; GFX900-NEXT: ;;#ASMSTART 7740; GFX900-NEXT: ; use s[8:11] 7741; GFX900-NEXT: ;;#ASMEND 7742; GFX900-NEXT: s_setpc_b64 s[30:31] 7743; 7744; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_5_0: 7745; GFX90A: ; %bb.0: 7746; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7747; GFX90A-NEXT: ;;#ASMSTART 7748; GFX90A-NEXT: ; def s[8:10] 7749; GFX90A-NEXT: ;;#ASMEND 7750; GFX90A-NEXT: ;;#ASMSTART 7751; GFX90A-NEXT: ; def s[4:6] 7752; GFX90A-NEXT: ;;#ASMEND 7753; GFX90A-NEXT: s_mov_b32 s8, s10 7754; GFX90A-NEXT: s_mov_b32 s9, s10 7755; GFX90A-NEXT: s_mov_b32 s11, s4 7756; GFX90A-NEXT: ;;#ASMSTART 7757; GFX90A-NEXT: ; use s[8:11] 7758; GFX90A-NEXT: ;;#ASMEND 7759; GFX90A-NEXT: s_setpc_b64 s[30:31] 7760; 7761; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_0: 7762; GFX940: ; %bb.0: 7763; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7764; GFX940-NEXT: ;;#ASMSTART 7765; GFX940-NEXT: ; def s[8:10] 7766; GFX940-NEXT: ;;#ASMEND 7767; GFX940-NEXT: ;;#ASMSTART 7768; GFX940-NEXT: ; def s[0:2] 7769; GFX940-NEXT: ;;#ASMEND 7770; GFX940-NEXT: s_mov_b32 s8, s10 7771; GFX940-NEXT: s_mov_b32 s9, s10 7772; GFX940-NEXT: s_mov_b32 s11, s0 7773; GFX940-NEXT: ;;#ASMSTART 7774; GFX940-NEXT: ; use s[8:11] 7775; GFX940-NEXT: ;;#ASMEND 7776; GFX940-NEXT: s_setpc_b64 s[30:31] 7777 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7778 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7779 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 0> 7780 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7781 ret void 7782} 7783 7784define void @s_shuffle_v4i32_v3i32__5_5_5_1() { 7785; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_5_1: 7786; GFX900: ; %bb.0: 7787; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7788; GFX900-NEXT: ;;#ASMSTART 7789; GFX900-NEXT: ; def s[8:10] 7790; GFX900-NEXT: ;;#ASMEND 7791; GFX900-NEXT: ;;#ASMSTART 7792; GFX900-NEXT: ; def s[4:6] 7793; GFX900-NEXT: ;;#ASMEND 7794; GFX900-NEXT: s_mov_b32 s8, s10 7795; GFX900-NEXT: s_mov_b32 s9, s10 7796; GFX900-NEXT: s_mov_b32 s11, s5 7797; GFX900-NEXT: ;;#ASMSTART 7798; GFX900-NEXT: ; use s[8:11] 7799; GFX900-NEXT: ;;#ASMEND 7800; GFX900-NEXT: s_setpc_b64 s[30:31] 7801; 7802; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_5_1: 7803; GFX90A: ; %bb.0: 7804; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7805; GFX90A-NEXT: ;;#ASMSTART 7806; GFX90A-NEXT: ; def s[8:10] 7807; GFX90A-NEXT: ;;#ASMEND 7808; GFX90A-NEXT: ;;#ASMSTART 7809; GFX90A-NEXT: ; def s[4:6] 7810; GFX90A-NEXT: ;;#ASMEND 7811; GFX90A-NEXT: s_mov_b32 s8, s10 7812; GFX90A-NEXT: s_mov_b32 s9, s10 7813; GFX90A-NEXT: s_mov_b32 s11, s5 7814; GFX90A-NEXT: ;;#ASMSTART 7815; GFX90A-NEXT: ; use s[8:11] 7816; GFX90A-NEXT: ;;#ASMEND 7817; GFX90A-NEXT: s_setpc_b64 s[30:31] 7818; 7819; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_1: 7820; GFX940: ; %bb.0: 7821; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7822; GFX940-NEXT: ;;#ASMSTART 7823; GFX940-NEXT: ; def s[8:10] 7824; GFX940-NEXT: ;;#ASMEND 7825; GFX940-NEXT: ;;#ASMSTART 7826; GFX940-NEXT: ; def s[0:2] 7827; GFX940-NEXT: ;;#ASMEND 7828; GFX940-NEXT: s_mov_b32 s8, s10 7829; GFX940-NEXT: s_mov_b32 s9, s10 7830; GFX940-NEXT: s_mov_b32 s11, s1 7831; GFX940-NEXT: ;;#ASMSTART 7832; GFX940-NEXT: ; use s[8:11] 7833; GFX940-NEXT: ;;#ASMEND 7834; GFX940-NEXT: s_setpc_b64 s[30:31] 7835 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7836 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7837 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1> 7838 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7839 ret void 7840} 7841 7842define void @s_shuffle_v4i32_v3i32__5_5_5_2() { 7843; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_5_2: 7844; GFX900: ; %bb.0: 7845; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7846; GFX900-NEXT: ;;#ASMSTART 7847; GFX900-NEXT: ; def s[8:10] 7848; GFX900-NEXT: ;;#ASMEND 7849; GFX900-NEXT: ;;#ASMSTART 7850; GFX900-NEXT: ; def s[4:6] 7851; GFX900-NEXT: ;;#ASMEND 7852; GFX900-NEXT: s_mov_b32 s8, s10 7853; GFX900-NEXT: s_mov_b32 s9, s10 7854; GFX900-NEXT: s_mov_b32 s11, s6 7855; GFX900-NEXT: ;;#ASMSTART 7856; GFX900-NEXT: ; use s[8:11] 7857; GFX900-NEXT: ;;#ASMEND 7858; GFX900-NEXT: s_setpc_b64 s[30:31] 7859; 7860; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_5_2: 7861; GFX90A: ; %bb.0: 7862; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7863; GFX90A-NEXT: ;;#ASMSTART 7864; GFX90A-NEXT: ; def s[8:10] 7865; GFX90A-NEXT: ;;#ASMEND 7866; GFX90A-NEXT: ;;#ASMSTART 7867; GFX90A-NEXT: ; def s[4:6] 7868; GFX90A-NEXT: ;;#ASMEND 7869; GFX90A-NEXT: s_mov_b32 s8, s10 7870; GFX90A-NEXT: s_mov_b32 s9, s10 7871; GFX90A-NEXT: s_mov_b32 s11, s6 7872; GFX90A-NEXT: ;;#ASMSTART 7873; GFX90A-NEXT: ; use s[8:11] 7874; GFX90A-NEXT: ;;#ASMEND 7875; GFX90A-NEXT: s_setpc_b64 s[30:31] 7876; 7877; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_2: 7878; GFX940: ; %bb.0: 7879; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7880; GFX940-NEXT: ;;#ASMSTART 7881; GFX940-NEXT: ; def s[8:10] 7882; GFX940-NEXT: ;;#ASMEND 7883; GFX940-NEXT: ;;#ASMSTART 7884; GFX940-NEXT: ; def s[0:2] 7885; GFX940-NEXT: ;;#ASMEND 7886; GFX940-NEXT: s_mov_b32 s8, s10 7887; GFX940-NEXT: s_mov_b32 s9, s10 7888; GFX940-NEXT: s_mov_b32 s11, s2 7889; GFX940-NEXT: ;;#ASMSTART 7890; GFX940-NEXT: ; use s[8:11] 7891; GFX940-NEXT: ;;#ASMEND 7892; GFX940-NEXT: s_setpc_b64 s[30:31] 7893 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7894 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7895 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2> 7896 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7897 ret void 7898} 7899 7900define void @s_shuffle_v4i32_v3i32__5_5_5_3() { 7901; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_5_3: 7902; GFX900: ; %bb.0: 7903; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7904; GFX900-NEXT: ;;#ASMSTART 7905; GFX900-NEXT: ; def s[4:6] 7906; GFX900-NEXT: ;;#ASMEND 7907; GFX900-NEXT: s_mov_b32 s8, s6 7908; GFX900-NEXT: s_mov_b32 s9, s6 7909; GFX900-NEXT: s_mov_b32 s10, s6 7910; GFX900-NEXT: s_mov_b32 s11, s4 7911; GFX900-NEXT: ;;#ASMSTART 7912; GFX900-NEXT: ; use s[8:11] 7913; GFX900-NEXT: ;;#ASMEND 7914; GFX900-NEXT: s_setpc_b64 s[30:31] 7915; 7916; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_5_3: 7917; GFX90A: ; %bb.0: 7918; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7919; GFX90A-NEXT: ;;#ASMSTART 7920; GFX90A-NEXT: ; def s[4:6] 7921; GFX90A-NEXT: ;;#ASMEND 7922; GFX90A-NEXT: s_mov_b32 s8, s6 7923; GFX90A-NEXT: s_mov_b32 s9, s6 7924; GFX90A-NEXT: s_mov_b32 s10, s6 7925; GFX90A-NEXT: s_mov_b32 s11, s4 7926; GFX90A-NEXT: ;;#ASMSTART 7927; GFX90A-NEXT: ; use s[8:11] 7928; GFX90A-NEXT: ;;#ASMEND 7929; GFX90A-NEXT: s_setpc_b64 s[30:31] 7930; 7931; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_3: 7932; GFX940: ; %bb.0: 7933; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7934; GFX940-NEXT: ;;#ASMSTART 7935; GFX940-NEXT: ; def s[0:2] 7936; GFX940-NEXT: ;;#ASMEND 7937; GFX940-NEXT: s_mov_b32 s8, s2 7938; GFX940-NEXT: s_mov_b32 s9, s2 7939; GFX940-NEXT: s_mov_b32 s10, s2 7940; GFX940-NEXT: s_mov_b32 s11, s0 7941; GFX940-NEXT: ;;#ASMSTART 7942; GFX940-NEXT: ; use s[8:11] 7943; GFX940-NEXT: ;;#ASMEND 7944; GFX940-NEXT: s_setpc_b64 s[30:31] 7945 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7946 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7947 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3> 7948 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 7949 ret void 7950} 7951 7952define void @s_shuffle_v4i32_v3i32__5_5_5_4() { 7953; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_5_4: 7954; GFX900: ; %bb.0: 7955; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7956; GFX900-NEXT: ;;#ASMSTART 7957; GFX900-NEXT: ; def s[4:6] 7958; GFX900-NEXT: ;;#ASMEND 7959; GFX900-NEXT: s_mov_b32 s8, s6 7960; GFX900-NEXT: s_mov_b32 s9, s6 7961; GFX900-NEXT: s_mov_b32 s10, s6 7962; GFX900-NEXT: s_mov_b32 s11, s5 7963; GFX900-NEXT: ;;#ASMSTART 7964; GFX900-NEXT: ; use s[8:11] 7965; GFX900-NEXT: ;;#ASMEND 7966; GFX900-NEXT: s_setpc_b64 s[30:31] 7967; 7968; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_5_4: 7969; GFX90A: ; %bb.0: 7970; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7971; GFX90A-NEXT: ;;#ASMSTART 7972; GFX90A-NEXT: ; def s[4:6] 7973; GFX90A-NEXT: ;;#ASMEND 7974; GFX90A-NEXT: s_mov_b32 s8, s6 7975; GFX90A-NEXT: s_mov_b32 s9, s6 7976; GFX90A-NEXT: s_mov_b32 s10, s6 7977; GFX90A-NEXT: s_mov_b32 s11, s5 7978; GFX90A-NEXT: ;;#ASMSTART 7979; GFX90A-NEXT: ; use s[8:11] 7980; GFX90A-NEXT: ;;#ASMEND 7981; GFX90A-NEXT: s_setpc_b64 s[30:31] 7982; 7983; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_4: 7984; GFX940: ; %bb.0: 7985; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7986; GFX940-NEXT: ;;#ASMSTART 7987; GFX940-NEXT: ; def s[0:2] 7988; GFX940-NEXT: ;;#ASMEND 7989; GFX940-NEXT: s_mov_b32 s8, s2 7990; GFX940-NEXT: s_mov_b32 s9, s2 7991; GFX940-NEXT: s_mov_b32 s10, s2 7992; GFX940-NEXT: s_mov_b32 s11, s1 7993; GFX940-NEXT: ;;#ASMSTART 7994; GFX940-NEXT: ; use s[8:11] 7995; GFX940-NEXT: ;;#ASMEND 7996; GFX940-NEXT: s_setpc_b64 s[30:31] 7997 %vec0 = call <3 x i32> asm "; def $0", "=s"() 7998 %vec1 = call <3 x i32> asm "; def $0", "=s"() 7999 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4> 8000 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8001 ret void 8002} 8003 8004define void @s_shuffle_v4i32_v3i32__5_5_5_5() { 8005; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_5_5_5: 8006; GFX9: ; %bb.0: 8007; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8008; GFX9-NEXT: ;;#ASMSTART 8009; GFX9-NEXT: ; def s[8:10] 8010; GFX9-NEXT: ;;#ASMEND 8011; GFX9-NEXT: s_mov_b32 s8, s10 8012; GFX9-NEXT: s_mov_b32 s9, s10 8013; GFX9-NEXT: s_mov_b32 s11, s10 8014; GFX9-NEXT: ;;#ASMSTART 8015; GFX9-NEXT: ; use s[8:11] 8016; GFX9-NEXT: ;;#ASMEND 8017; GFX9-NEXT: s_setpc_b64 s[30:31] 8018 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8019 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8020 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5> 8021 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8022 ret void 8023} 8024 8025define void @s_shuffle_v4i32_v3i32__u_0_0_0() { 8026; GFX900-LABEL: s_shuffle_v4i32_v3i32__u_0_0_0: 8027; GFX900: ; %bb.0: 8028; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8029; GFX900-NEXT: ;;#ASMSTART 8030; GFX900-NEXT: ; def s[4:6] 8031; GFX900-NEXT: ;;#ASMEND 8032; GFX900-NEXT: s_mov_b32 s9, s4 8033; GFX900-NEXT: s_mov_b32 s10, s4 8034; GFX900-NEXT: s_mov_b32 s11, s4 8035; GFX900-NEXT: ;;#ASMSTART 8036; GFX900-NEXT: ; use s[8:11] 8037; GFX900-NEXT: ;;#ASMEND 8038; GFX900-NEXT: s_setpc_b64 s[30:31] 8039; 8040; GFX90A-LABEL: s_shuffle_v4i32_v3i32__u_0_0_0: 8041; GFX90A: ; %bb.0: 8042; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8043; GFX90A-NEXT: ;;#ASMSTART 8044; GFX90A-NEXT: ; def s[4:6] 8045; GFX90A-NEXT: ;;#ASMEND 8046; GFX90A-NEXT: s_mov_b32 s9, s4 8047; GFX90A-NEXT: s_mov_b32 s10, s4 8048; GFX90A-NEXT: s_mov_b32 s11, s4 8049; GFX90A-NEXT: ;;#ASMSTART 8050; GFX90A-NEXT: ; use s[8:11] 8051; GFX90A-NEXT: ;;#ASMEND 8052; GFX90A-NEXT: s_setpc_b64 s[30:31] 8053; 8054; GFX940-LABEL: s_shuffle_v4i32_v3i32__u_0_0_0: 8055; GFX940: ; %bb.0: 8056; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8057; GFX940-NEXT: ;;#ASMSTART 8058; GFX940-NEXT: ; def s[0:2] 8059; GFX940-NEXT: ;;#ASMEND 8060; GFX940-NEXT: s_mov_b32 s9, s0 8061; GFX940-NEXT: s_mov_b32 s10, s0 8062; GFX940-NEXT: s_mov_b32 s11, s0 8063; GFX940-NEXT: ;;#ASMSTART 8064; GFX940-NEXT: ; use s[8:11] 8065; GFX940-NEXT: ;;#ASMEND 8066; GFX940-NEXT: s_setpc_b64 s[30:31] 8067 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8068 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0> 8069 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8070 ret void 8071} 8072 8073define void @s_shuffle_v4i32_v3i32__0_0_0_0() { 8074; GFX9-LABEL: s_shuffle_v4i32_v3i32__0_0_0_0: 8075; GFX9: ; %bb.0: 8076; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8077; GFX9-NEXT: ;;#ASMSTART 8078; GFX9-NEXT: ; def s[8:10] 8079; GFX9-NEXT: ;;#ASMEND 8080; GFX9-NEXT: s_mov_b32 s9, s8 8081; GFX9-NEXT: s_mov_b32 s10, s8 8082; GFX9-NEXT: s_mov_b32 s11, s8 8083; GFX9-NEXT: ;;#ASMSTART 8084; GFX9-NEXT: ; use s[8:11] 8085; GFX9-NEXT: ;;#ASMEND 8086; GFX9-NEXT: s_setpc_b64 s[30:31] 8087 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8088 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> zeroinitializer 8089 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8090 ret void 8091} 8092 8093define void @s_shuffle_v4i32_v3i32__1_0_0_0() { 8094; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_0_0_0: 8095; GFX900: ; %bb.0: 8096; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8097; GFX900-NEXT: ;;#ASMSTART 8098; GFX900-NEXT: ; def s[4:6] 8099; GFX900-NEXT: ;;#ASMEND 8100; GFX900-NEXT: s_mov_b32 s8, s5 8101; GFX900-NEXT: s_mov_b32 s9, s4 8102; GFX900-NEXT: s_mov_b32 s10, s4 8103; GFX900-NEXT: s_mov_b32 s11, s4 8104; GFX900-NEXT: ;;#ASMSTART 8105; GFX900-NEXT: ; use s[8:11] 8106; GFX900-NEXT: ;;#ASMEND 8107; GFX900-NEXT: s_setpc_b64 s[30:31] 8108; 8109; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_0_0_0: 8110; GFX90A: ; %bb.0: 8111; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8112; GFX90A-NEXT: ;;#ASMSTART 8113; GFX90A-NEXT: ; def s[4:6] 8114; GFX90A-NEXT: ;;#ASMEND 8115; GFX90A-NEXT: s_mov_b32 s8, s5 8116; GFX90A-NEXT: s_mov_b32 s9, s4 8117; GFX90A-NEXT: s_mov_b32 s10, s4 8118; GFX90A-NEXT: s_mov_b32 s11, s4 8119; GFX90A-NEXT: ;;#ASMSTART 8120; GFX90A-NEXT: ; use s[8:11] 8121; GFX90A-NEXT: ;;#ASMEND 8122; GFX90A-NEXT: s_setpc_b64 s[30:31] 8123; 8124; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_0_0_0: 8125; GFX940: ; %bb.0: 8126; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8127; GFX940-NEXT: ;;#ASMSTART 8128; GFX940-NEXT: ; def s[0:2] 8129; GFX940-NEXT: ;;#ASMEND 8130; GFX940-NEXT: s_mov_b32 s8, s1 8131; GFX940-NEXT: s_mov_b32 s9, s0 8132; GFX940-NEXT: s_mov_b32 s10, s0 8133; GFX940-NEXT: s_mov_b32 s11, s0 8134; GFX940-NEXT: ;;#ASMSTART 8135; GFX940-NEXT: ; use s[8:11] 8136; GFX940-NEXT: ;;#ASMEND 8137; GFX940-NEXT: s_setpc_b64 s[30:31] 8138 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8139 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 8140 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8141 ret void 8142} 8143 8144define void @s_shuffle_v4i32_v3i32__2_0_0_0() { 8145; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_0_0_0: 8146; GFX900: ; %bb.0: 8147; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8148; GFX900-NEXT: ;;#ASMSTART 8149; GFX900-NEXT: ; def s[4:6] 8150; GFX900-NEXT: ;;#ASMEND 8151; GFX900-NEXT: s_mov_b32 s8, s6 8152; GFX900-NEXT: s_mov_b32 s9, s4 8153; GFX900-NEXT: s_mov_b32 s10, s4 8154; GFX900-NEXT: s_mov_b32 s11, s4 8155; GFX900-NEXT: ;;#ASMSTART 8156; GFX900-NEXT: ; use s[8:11] 8157; GFX900-NEXT: ;;#ASMEND 8158; GFX900-NEXT: s_setpc_b64 s[30:31] 8159; 8160; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_0_0_0: 8161; GFX90A: ; %bb.0: 8162; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8163; GFX90A-NEXT: ;;#ASMSTART 8164; GFX90A-NEXT: ; def s[4:6] 8165; GFX90A-NEXT: ;;#ASMEND 8166; GFX90A-NEXT: s_mov_b32 s8, s6 8167; GFX90A-NEXT: s_mov_b32 s9, s4 8168; GFX90A-NEXT: s_mov_b32 s10, s4 8169; GFX90A-NEXT: s_mov_b32 s11, s4 8170; GFX90A-NEXT: ;;#ASMSTART 8171; GFX90A-NEXT: ; use s[8:11] 8172; GFX90A-NEXT: ;;#ASMEND 8173; GFX90A-NEXT: s_setpc_b64 s[30:31] 8174; 8175; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_0_0_0: 8176; GFX940: ; %bb.0: 8177; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8178; GFX940-NEXT: ;;#ASMSTART 8179; GFX940-NEXT: ; def s[0:2] 8180; GFX940-NEXT: ;;#ASMEND 8181; GFX940-NEXT: s_mov_b32 s8, s2 8182; GFX940-NEXT: s_mov_b32 s9, s0 8183; GFX940-NEXT: s_mov_b32 s10, s0 8184; GFX940-NEXT: s_mov_b32 s11, s0 8185; GFX940-NEXT: ;;#ASMSTART 8186; GFX940-NEXT: ; use s[8:11] 8187; GFX940-NEXT: ;;#ASMEND 8188; GFX940-NEXT: s_setpc_b64 s[30:31] 8189 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8190 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0> 8191 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8192 ret void 8193} 8194 8195define void @s_shuffle_v4i32_v3i32__3_0_0_0() { 8196; GFX900-LABEL: s_shuffle_v4i32_v3i32__3_0_0_0: 8197; GFX900: ; %bb.0: 8198; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8199; GFX900-NEXT: ;;#ASMSTART 8200; GFX900-NEXT: ; def s[4:6] 8201; GFX900-NEXT: ;;#ASMEND 8202; GFX900-NEXT: s_mov_b32 s9, s4 8203; GFX900-NEXT: s_mov_b32 s10, s4 8204; GFX900-NEXT: s_mov_b32 s11, s4 8205; GFX900-NEXT: ;;#ASMSTART 8206; GFX900-NEXT: ; use s[8:11] 8207; GFX900-NEXT: ;;#ASMEND 8208; GFX900-NEXT: s_setpc_b64 s[30:31] 8209; 8210; GFX90A-LABEL: s_shuffle_v4i32_v3i32__3_0_0_0: 8211; GFX90A: ; %bb.0: 8212; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8213; GFX90A-NEXT: ;;#ASMSTART 8214; GFX90A-NEXT: ; def s[4:6] 8215; GFX90A-NEXT: ;;#ASMEND 8216; GFX90A-NEXT: s_mov_b32 s9, s4 8217; GFX90A-NEXT: s_mov_b32 s10, s4 8218; GFX90A-NEXT: s_mov_b32 s11, s4 8219; GFX90A-NEXT: ;;#ASMSTART 8220; GFX90A-NEXT: ; use s[8:11] 8221; GFX90A-NEXT: ;;#ASMEND 8222; GFX90A-NEXT: s_setpc_b64 s[30:31] 8223; 8224; GFX940-LABEL: s_shuffle_v4i32_v3i32__3_0_0_0: 8225; GFX940: ; %bb.0: 8226; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8227; GFX940-NEXT: ;;#ASMSTART 8228; GFX940-NEXT: ; def s[0:2] 8229; GFX940-NEXT: ;;#ASMEND 8230; GFX940-NEXT: s_mov_b32 s9, s0 8231; GFX940-NEXT: s_mov_b32 s10, s0 8232; GFX940-NEXT: s_mov_b32 s11, s0 8233; GFX940-NEXT: ;;#ASMSTART 8234; GFX940-NEXT: ; use s[8:11] 8235; GFX940-NEXT: ;;#ASMEND 8236; GFX940-NEXT: s_setpc_b64 s[30:31] 8237 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8238 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0> 8239 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8240 ret void 8241} 8242 8243define void @s_shuffle_v4i32_v3i32__4_0_0_0() { 8244; GFX900-LABEL: s_shuffle_v4i32_v3i32__4_0_0_0: 8245; GFX900: ; %bb.0: 8246; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8247; GFX900-NEXT: ;;#ASMSTART 8248; GFX900-NEXT: ; def s[8:10] 8249; GFX900-NEXT: ;;#ASMEND 8250; GFX900-NEXT: ;;#ASMSTART 8251; GFX900-NEXT: ; def s[4:6] 8252; GFX900-NEXT: ;;#ASMEND 8253; GFX900-NEXT: s_mov_b32 s8, s9 8254; GFX900-NEXT: s_mov_b32 s9, s4 8255; GFX900-NEXT: s_mov_b32 s10, s4 8256; GFX900-NEXT: s_mov_b32 s11, s4 8257; GFX900-NEXT: ;;#ASMSTART 8258; GFX900-NEXT: ; use s[8:11] 8259; GFX900-NEXT: ;;#ASMEND 8260; GFX900-NEXT: s_setpc_b64 s[30:31] 8261; 8262; GFX90A-LABEL: s_shuffle_v4i32_v3i32__4_0_0_0: 8263; GFX90A: ; %bb.0: 8264; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8265; GFX90A-NEXT: ;;#ASMSTART 8266; GFX90A-NEXT: ; def s[8:10] 8267; GFX90A-NEXT: ;;#ASMEND 8268; GFX90A-NEXT: ;;#ASMSTART 8269; GFX90A-NEXT: ; def s[4:6] 8270; GFX90A-NEXT: ;;#ASMEND 8271; GFX90A-NEXT: s_mov_b32 s8, s9 8272; GFX90A-NEXT: s_mov_b32 s9, s4 8273; GFX90A-NEXT: s_mov_b32 s10, s4 8274; GFX90A-NEXT: s_mov_b32 s11, s4 8275; GFX90A-NEXT: ;;#ASMSTART 8276; GFX90A-NEXT: ; use s[8:11] 8277; GFX90A-NEXT: ;;#ASMEND 8278; GFX90A-NEXT: s_setpc_b64 s[30:31] 8279; 8280; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_0_0_0: 8281; GFX940: ; %bb.0: 8282; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8283; GFX940-NEXT: ;;#ASMSTART 8284; GFX940-NEXT: ; def s[0:2] 8285; GFX940-NEXT: ;;#ASMEND 8286; GFX940-NEXT: ;;#ASMSTART 8287; GFX940-NEXT: ; def s[4:6] 8288; GFX940-NEXT: ;;#ASMEND 8289; GFX940-NEXT: s_mov_b32 s8, s5 8290; GFX940-NEXT: s_mov_b32 s9, s0 8291; GFX940-NEXT: s_mov_b32 s10, s0 8292; GFX940-NEXT: s_mov_b32 s11, s0 8293; GFX940-NEXT: ;;#ASMSTART 8294; GFX940-NEXT: ; use s[8:11] 8295; GFX940-NEXT: ;;#ASMEND 8296; GFX940-NEXT: s_setpc_b64 s[30:31] 8297 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8298 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8299 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0> 8300 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8301 ret void 8302} 8303 8304define void @s_shuffle_v4i32_v3i32__5_0_0_0() { 8305; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_0_0: 8306; GFX900: ; %bb.0: 8307; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8308; GFX900-NEXT: ;;#ASMSTART 8309; GFX900-NEXT: ; def s[8:10] 8310; GFX900-NEXT: ;;#ASMEND 8311; GFX900-NEXT: ;;#ASMSTART 8312; GFX900-NEXT: ; def s[4:6] 8313; GFX900-NEXT: ;;#ASMEND 8314; GFX900-NEXT: s_mov_b32 s8, s10 8315; GFX900-NEXT: s_mov_b32 s9, s4 8316; GFX900-NEXT: s_mov_b32 s10, s4 8317; GFX900-NEXT: s_mov_b32 s11, s4 8318; GFX900-NEXT: ;;#ASMSTART 8319; GFX900-NEXT: ; use s[8:11] 8320; GFX900-NEXT: ;;#ASMEND 8321; GFX900-NEXT: s_setpc_b64 s[30:31] 8322; 8323; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_0_0: 8324; GFX90A: ; %bb.0: 8325; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8326; GFX90A-NEXT: ;;#ASMSTART 8327; GFX90A-NEXT: ; def s[8:10] 8328; GFX90A-NEXT: ;;#ASMEND 8329; GFX90A-NEXT: ;;#ASMSTART 8330; GFX90A-NEXT: ; def s[4:6] 8331; GFX90A-NEXT: ;;#ASMEND 8332; GFX90A-NEXT: s_mov_b32 s8, s10 8333; GFX90A-NEXT: s_mov_b32 s9, s4 8334; GFX90A-NEXT: s_mov_b32 s10, s4 8335; GFX90A-NEXT: s_mov_b32 s11, s4 8336; GFX90A-NEXT: ;;#ASMSTART 8337; GFX90A-NEXT: ; use s[8:11] 8338; GFX90A-NEXT: ;;#ASMEND 8339; GFX90A-NEXT: s_setpc_b64 s[30:31] 8340; 8341; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_0_0: 8342; GFX940: ; %bb.0: 8343; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8344; GFX940-NEXT: ;;#ASMSTART 8345; GFX940-NEXT: ; def s[0:2] 8346; GFX940-NEXT: ;;#ASMEND 8347; GFX940-NEXT: ;;#ASMSTART 8348; GFX940-NEXT: ; def s[4:6] 8349; GFX940-NEXT: ;;#ASMEND 8350; GFX940-NEXT: s_mov_b32 s8, s6 8351; GFX940-NEXT: s_mov_b32 s9, s0 8352; GFX940-NEXT: s_mov_b32 s10, s0 8353; GFX940-NEXT: s_mov_b32 s11, s0 8354; GFX940-NEXT: ;;#ASMSTART 8355; GFX940-NEXT: ; use s[8:11] 8356; GFX940-NEXT: ;;#ASMEND 8357; GFX940-NEXT: s_setpc_b64 s[30:31] 8358 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8359 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8360 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0> 8361 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8362 ret void 8363} 8364 8365define void @s_shuffle_v4i32_v3i32__5_u_0_0() { 8366; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_0_0: 8367; GFX900: ; %bb.0: 8368; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8369; GFX900-NEXT: ;;#ASMSTART 8370; GFX900-NEXT: ; def s[8:10] 8371; GFX900-NEXT: ;;#ASMEND 8372; GFX900-NEXT: ;;#ASMSTART 8373; GFX900-NEXT: ; def s[4:6] 8374; GFX900-NEXT: ;;#ASMEND 8375; GFX900-NEXT: s_mov_b32 s8, s10 8376; GFX900-NEXT: s_mov_b32 s10, s4 8377; GFX900-NEXT: s_mov_b32 s11, s4 8378; GFX900-NEXT: ;;#ASMSTART 8379; GFX900-NEXT: ; use s[8:11] 8380; GFX900-NEXT: ;;#ASMEND 8381; GFX900-NEXT: s_setpc_b64 s[30:31] 8382; 8383; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_0_0: 8384; GFX90A: ; %bb.0: 8385; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8386; GFX90A-NEXT: ;;#ASMSTART 8387; GFX90A-NEXT: ; def s[8:10] 8388; GFX90A-NEXT: ;;#ASMEND 8389; GFX90A-NEXT: ;;#ASMSTART 8390; GFX90A-NEXT: ; def s[4:6] 8391; GFX90A-NEXT: ;;#ASMEND 8392; GFX90A-NEXT: s_mov_b32 s8, s10 8393; GFX90A-NEXT: s_mov_b32 s10, s4 8394; GFX90A-NEXT: s_mov_b32 s11, s4 8395; GFX90A-NEXT: ;;#ASMSTART 8396; GFX90A-NEXT: ; use s[8:11] 8397; GFX90A-NEXT: ;;#ASMEND 8398; GFX90A-NEXT: s_setpc_b64 s[30:31] 8399; 8400; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_0_0: 8401; GFX940: ; %bb.0: 8402; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8403; GFX940-NEXT: ;;#ASMSTART 8404; GFX940-NEXT: ; def s[0:2] 8405; GFX940-NEXT: ;;#ASMEND 8406; GFX940-NEXT: ;;#ASMSTART 8407; GFX940-NEXT: ; def s[4:6] 8408; GFX940-NEXT: ;;#ASMEND 8409; GFX940-NEXT: s_mov_b32 s8, s6 8410; GFX940-NEXT: s_mov_b32 s10, s0 8411; GFX940-NEXT: s_mov_b32 s11, s0 8412; GFX940-NEXT: ;;#ASMSTART 8413; GFX940-NEXT: ; use s[8:11] 8414; GFX940-NEXT: ;;#ASMEND 8415; GFX940-NEXT: s_setpc_b64 s[30:31] 8416 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8417 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8418 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0> 8419 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8420 ret void 8421} 8422 8423define void @s_shuffle_v4i32_v3i32__5_1_0_0() { 8424; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_0_0: 8425; GFX900: ; %bb.0: 8426; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8427; GFX900-NEXT: ;;#ASMSTART 8428; GFX900-NEXT: ; def s[8:10] 8429; GFX900-NEXT: ;;#ASMEND 8430; GFX900-NEXT: ;;#ASMSTART 8431; GFX900-NEXT: ; def s[4:6] 8432; GFX900-NEXT: ;;#ASMEND 8433; GFX900-NEXT: s_mov_b32 s8, s10 8434; GFX900-NEXT: s_mov_b32 s9, s5 8435; GFX900-NEXT: s_mov_b32 s10, s4 8436; GFX900-NEXT: s_mov_b32 s11, s4 8437; GFX900-NEXT: ;;#ASMSTART 8438; GFX900-NEXT: ; use s[8:11] 8439; GFX900-NEXT: ;;#ASMEND 8440; GFX900-NEXT: s_setpc_b64 s[30:31] 8441; 8442; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_0_0: 8443; GFX90A: ; %bb.0: 8444; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8445; GFX90A-NEXT: ;;#ASMSTART 8446; GFX90A-NEXT: ; def s[8:10] 8447; GFX90A-NEXT: ;;#ASMEND 8448; GFX90A-NEXT: ;;#ASMSTART 8449; GFX90A-NEXT: ; def s[4:6] 8450; GFX90A-NEXT: ;;#ASMEND 8451; GFX90A-NEXT: s_mov_b32 s8, s10 8452; GFX90A-NEXT: s_mov_b32 s9, s5 8453; GFX90A-NEXT: s_mov_b32 s10, s4 8454; GFX90A-NEXT: s_mov_b32 s11, s4 8455; GFX90A-NEXT: ;;#ASMSTART 8456; GFX90A-NEXT: ; use s[8:11] 8457; GFX90A-NEXT: ;;#ASMEND 8458; GFX90A-NEXT: s_setpc_b64 s[30:31] 8459; 8460; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_0_0: 8461; GFX940: ; %bb.0: 8462; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8463; GFX940-NEXT: ;;#ASMSTART 8464; GFX940-NEXT: ; def s[0:2] 8465; GFX940-NEXT: ;;#ASMEND 8466; GFX940-NEXT: ;;#ASMSTART 8467; GFX940-NEXT: ; def s[4:6] 8468; GFX940-NEXT: ;;#ASMEND 8469; GFX940-NEXT: s_mov_b32 s8, s6 8470; GFX940-NEXT: s_mov_b32 s9, s1 8471; GFX940-NEXT: s_mov_b32 s10, s0 8472; GFX940-NEXT: s_mov_b32 s11, s0 8473; GFX940-NEXT: ;;#ASMSTART 8474; GFX940-NEXT: ; use s[8:11] 8475; GFX940-NEXT: ;;#ASMEND 8476; GFX940-NEXT: s_setpc_b64 s[30:31] 8477 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8478 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8479 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0> 8480 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8481 ret void 8482} 8483 8484define void @s_shuffle_v4i32_v3i32__5_2_0_0() { 8485; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_0_0: 8486; GFX900: ; %bb.0: 8487; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8488; GFX900-NEXT: ;;#ASMSTART 8489; GFX900-NEXT: ; def s[8:10] 8490; GFX900-NEXT: ;;#ASMEND 8491; GFX900-NEXT: ;;#ASMSTART 8492; GFX900-NEXT: ; def s[4:6] 8493; GFX900-NEXT: ;;#ASMEND 8494; GFX900-NEXT: s_mov_b32 s8, s10 8495; GFX900-NEXT: s_mov_b32 s9, s6 8496; GFX900-NEXT: s_mov_b32 s10, s4 8497; GFX900-NEXT: s_mov_b32 s11, s4 8498; GFX900-NEXT: ;;#ASMSTART 8499; GFX900-NEXT: ; use s[8:11] 8500; GFX900-NEXT: ;;#ASMEND 8501; GFX900-NEXT: s_setpc_b64 s[30:31] 8502; 8503; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_0_0: 8504; GFX90A: ; %bb.0: 8505; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8506; GFX90A-NEXT: ;;#ASMSTART 8507; GFX90A-NEXT: ; def s[8:10] 8508; GFX90A-NEXT: ;;#ASMEND 8509; GFX90A-NEXT: ;;#ASMSTART 8510; GFX90A-NEXT: ; def s[4:6] 8511; GFX90A-NEXT: ;;#ASMEND 8512; GFX90A-NEXT: s_mov_b32 s8, s10 8513; GFX90A-NEXT: s_mov_b32 s9, s6 8514; GFX90A-NEXT: s_mov_b32 s10, s4 8515; GFX90A-NEXT: s_mov_b32 s11, s4 8516; GFX90A-NEXT: ;;#ASMSTART 8517; GFX90A-NEXT: ; use s[8:11] 8518; GFX90A-NEXT: ;;#ASMEND 8519; GFX90A-NEXT: s_setpc_b64 s[30:31] 8520; 8521; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_0_0: 8522; GFX940: ; %bb.0: 8523; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8524; GFX940-NEXT: ;;#ASMSTART 8525; GFX940-NEXT: ; def s[0:2] 8526; GFX940-NEXT: ;;#ASMEND 8527; GFX940-NEXT: ;;#ASMSTART 8528; GFX940-NEXT: ; def s[4:6] 8529; GFX940-NEXT: ;;#ASMEND 8530; GFX940-NEXT: s_mov_b32 s8, s6 8531; GFX940-NEXT: s_mov_b32 s9, s2 8532; GFX940-NEXT: s_mov_b32 s10, s0 8533; GFX940-NEXT: s_mov_b32 s11, s0 8534; GFX940-NEXT: ;;#ASMSTART 8535; GFX940-NEXT: ; use s[8:11] 8536; GFX940-NEXT: ;;#ASMEND 8537; GFX940-NEXT: s_setpc_b64 s[30:31] 8538 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8539 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8540 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0> 8541 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8542 ret void 8543} 8544 8545define void @s_shuffle_v4i32_v3i32__5_3_0_0() { 8546; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_0_0: 8547; GFX900: ; %bb.0: 8548; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8549; GFX900-NEXT: ;;#ASMSTART 8550; GFX900-NEXT: ; def s[4:6] 8551; GFX900-NEXT: ;;#ASMEND 8552; GFX900-NEXT: ;;#ASMSTART 8553; GFX900-NEXT: ; def s[12:14] 8554; GFX900-NEXT: ;;#ASMEND 8555; GFX900-NEXT: s_mov_b32 s8, s14 8556; GFX900-NEXT: s_mov_b32 s9, s12 8557; GFX900-NEXT: s_mov_b32 s10, s4 8558; GFX900-NEXT: s_mov_b32 s11, s4 8559; GFX900-NEXT: ;;#ASMSTART 8560; GFX900-NEXT: ; use s[8:11] 8561; GFX900-NEXT: ;;#ASMEND 8562; GFX900-NEXT: s_setpc_b64 s[30:31] 8563; 8564; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_0_0: 8565; GFX90A: ; %bb.0: 8566; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8567; GFX90A-NEXT: ;;#ASMSTART 8568; GFX90A-NEXT: ; def s[4:6] 8569; GFX90A-NEXT: ;;#ASMEND 8570; GFX90A-NEXT: ;;#ASMSTART 8571; GFX90A-NEXT: ; def s[12:14] 8572; GFX90A-NEXT: ;;#ASMEND 8573; GFX90A-NEXT: s_mov_b32 s8, s14 8574; GFX90A-NEXT: s_mov_b32 s9, s12 8575; GFX90A-NEXT: s_mov_b32 s10, s4 8576; GFX90A-NEXT: s_mov_b32 s11, s4 8577; GFX90A-NEXT: ;;#ASMSTART 8578; GFX90A-NEXT: ; use s[8:11] 8579; GFX90A-NEXT: ;;#ASMEND 8580; GFX90A-NEXT: s_setpc_b64 s[30:31] 8581; 8582; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_0_0: 8583; GFX940: ; %bb.0: 8584; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8585; GFX940-NEXT: ;;#ASMSTART 8586; GFX940-NEXT: ; def s[0:2] 8587; GFX940-NEXT: ;;#ASMEND 8588; GFX940-NEXT: ;;#ASMSTART 8589; GFX940-NEXT: ; def s[4:6] 8590; GFX940-NEXT: ;;#ASMEND 8591; GFX940-NEXT: s_mov_b32 s8, s6 8592; GFX940-NEXT: s_mov_b32 s9, s4 8593; GFX940-NEXT: s_mov_b32 s10, s0 8594; GFX940-NEXT: s_mov_b32 s11, s0 8595; GFX940-NEXT: ;;#ASMSTART 8596; GFX940-NEXT: ; use s[8:11] 8597; GFX940-NEXT: ;;#ASMEND 8598; GFX940-NEXT: s_setpc_b64 s[30:31] 8599 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8600 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8601 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0> 8602 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8603 ret void 8604} 8605 8606define void @s_shuffle_v4i32_v3i32__5_4_0_0() { 8607; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_4_0_0: 8608; GFX900: ; %bb.0: 8609; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8610; GFX900-NEXT: ;;#ASMSTART 8611; GFX900-NEXT: ; def s[8:10] 8612; GFX900-NEXT: ;;#ASMEND 8613; GFX900-NEXT: ;;#ASMSTART 8614; GFX900-NEXT: ; def s[4:6] 8615; GFX900-NEXT: ;;#ASMEND 8616; GFX900-NEXT: s_mov_b32 s8, s10 8617; GFX900-NEXT: s_mov_b32 s10, s4 8618; GFX900-NEXT: s_mov_b32 s11, s4 8619; GFX900-NEXT: ;;#ASMSTART 8620; GFX900-NEXT: ; use s[8:11] 8621; GFX900-NEXT: ;;#ASMEND 8622; GFX900-NEXT: s_setpc_b64 s[30:31] 8623; 8624; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_4_0_0: 8625; GFX90A: ; %bb.0: 8626; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8627; GFX90A-NEXT: ;;#ASMSTART 8628; GFX90A-NEXT: ; def s[8:10] 8629; GFX90A-NEXT: ;;#ASMEND 8630; GFX90A-NEXT: ;;#ASMSTART 8631; GFX90A-NEXT: ; def s[4:6] 8632; GFX90A-NEXT: ;;#ASMEND 8633; GFX90A-NEXT: s_mov_b32 s8, s10 8634; GFX90A-NEXT: s_mov_b32 s10, s4 8635; GFX90A-NEXT: s_mov_b32 s11, s4 8636; GFX90A-NEXT: ;;#ASMSTART 8637; GFX90A-NEXT: ; use s[8:11] 8638; GFX90A-NEXT: ;;#ASMEND 8639; GFX90A-NEXT: s_setpc_b64 s[30:31] 8640; 8641; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_0_0: 8642; GFX940: ; %bb.0: 8643; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8644; GFX940-NEXT: ;;#ASMSTART 8645; GFX940-NEXT: ; def s[8:10] 8646; GFX940-NEXT: ;;#ASMEND 8647; GFX940-NEXT: ;;#ASMSTART 8648; GFX940-NEXT: ; def s[0:2] 8649; GFX940-NEXT: ;;#ASMEND 8650; GFX940-NEXT: s_mov_b32 s8, s10 8651; GFX940-NEXT: s_mov_b32 s10, s0 8652; GFX940-NEXT: s_mov_b32 s11, s0 8653; GFX940-NEXT: ;;#ASMSTART 8654; GFX940-NEXT: ; use s[8:11] 8655; GFX940-NEXT: ;;#ASMEND 8656; GFX940-NEXT: s_setpc_b64 s[30:31] 8657 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8658 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8659 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0> 8660 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8661 ret void 8662} 8663 8664define void @s_shuffle_v4i32_v3i32__5_5_0_0() { 8665; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_0: 8666; GFX900: ; %bb.0: 8667; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8668; GFX900-NEXT: ;;#ASMSTART 8669; GFX900-NEXT: ; def s[8:10] 8670; GFX900-NEXT: ;;#ASMEND 8671; GFX900-NEXT: ;;#ASMSTART 8672; GFX900-NEXT: ; def s[4:6] 8673; GFX900-NEXT: ;;#ASMEND 8674; GFX900-NEXT: s_mov_b32 s8, s10 8675; GFX900-NEXT: s_mov_b32 s9, s10 8676; GFX900-NEXT: s_mov_b32 s10, s4 8677; GFX900-NEXT: s_mov_b32 s11, s4 8678; GFX900-NEXT: ;;#ASMSTART 8679; GFX900-NEXT: ; use s[8:11] 8680; GFX900-NEXT: ;;#ASMEND 8681; GFX900-NEXT: s_setpc_b64 s[30:31] 8682; 8683; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_0: 8684; GFX90A: ; %bb.0: 8685; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8686; GFX90A-NEXT: ;;#ASMSTART 8687; GFX90A-NEXT: ; def s[8:10] 8688; GFX90A-NEXT: ;;#ASMEND 8689; GFX90A-NEXT: ;;#ASMSTART 8690; GFX90A-NEXT: ; def s[4:6] 8691; GFX90A-NEXT: ;;#ASMEND 8692; GFX90A-NEXT: s_mov_b32 s8, s10 8693; GFX90A-NEXT: s_mov_b32 s9, s10 8694; GFX90A-NEXT: s_mov_b32 s10, s4 8695; GFX90A-NEXT: s_mov_b32 s11, s4 8696; GFX90A-NEXT: ;;#ASMSTART 8697; GFX90A-NEXT: ; use s[8:11] 8698; GFX90A-NEXT: ;;#ASMEND 8699; GFX90A-NEXT: s_setpc_b64 s[30:31] 8700; 8701; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_0: 8702; GFX940: ; %bb.0: 8703; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8704; GFX940-NEXT: ;;#ASMSTART 8705; GFX940-NEXT: ; def s[0:2] 8706; GFX940-NEXT: ;;#ASMEND 8707; GFX940-NEXT: ;;#ASMSTART 8708; GFX940-NEXT: ; def s[4:6] 8709; GFX940-NEXT: ;;#ASMEND 8710; GFX940-NEXT: s_mov_b32 s8, s6 8711; GFX940-NEXT: s_mov_b32 s9, s6 8712; GFX940-NEXT: s_mov_b32 s10, s0 8713; GFX940-NEXT: s_mov_b32 s11, s0 8714; GFX940-NEXT: ;;#ASMSTART 8715; GFX940-NEXT: ; use s[8:11] 8716; GFX940-NEXT: ;;#ASMEND 8717; GFX940-NEXT: s_setpc_b64 s[30:31] 8718 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8719 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8720 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0> 8721 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8722 ret void 8723} 8724 8725define void @s_shuffle_v4i32_v3i32__5_5_u_0() { 8726; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_0: 8727; GFX900: ; %bb.0: 8728; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8729; GFX900-NEXT: ;;#ASMSTART 8730; GFX900-NEXT: ; def s[8:10] 8731; GFX900-NEXT: ;;#ASMEND 8732; GFX900-NEXT: ;;#ASMSTART 8733; GFX900-NEXT: ; def s[4:6] 8734; GFX900-NEXT: ;;#ASMEND 8735; GFX900-NEXT: s_mov_b32 s8, s10 8736; GFX900-NEXT: s_mov_b32 s9, s10 8737; GFX900-NEXT: s_mov_b32 s11, s4 8738; GFX900-NEXT: ;;#ASMSTART 8739; GFX900-NEXT: ; use s[8:11] 8740; GFX900-NEXT: ;;#ASMEND 8741; GFX900-NEXT: s_setpc_b64 s[30:31] 8742; 8743; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_0: 8744; GFX90A: ; %bb.0: 8745; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8746; GFX90A-NEXT: ;;#ASMSTART 8747; GFX90A-NEXT: ; def s[8:10] 8748; GFX90A-NEXT: ;;#ASMEND 8749; GFX90A-NEXT: ;;#ASMSTART 8750; GFX90A-NEXT: ; def s[4:6] 8751; GFX90A-NEXT: ;;#ASMEND 8752; GFX90A-NEXT: s_mov_b32 s8, s10 8753; GFX90A-NEXT: s_mov_b32 s9, s10 8754; GFX90A-NEXT: s_mov_b32 s11, s4 8755; GFX90A-NEXT: ;;#ASMSTART 8756; GFX90A-NEXT: ; use s[8:11] 8757; GFX90A-NEXT: ;;#ASMEND 8758; GFX90A-NEXT: s_setpc_b64 s[30:31] 8759; 8760; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_0: 8761; GFX940: ; %bb.0: 8762; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8763; GFX940-NEXT: ;;#ASMSTART 8764; GFX940-NEXT: ; def s[0:2] 8765; GFX940-NEXT: ;;#ASMEND 8766; GFX940-NEXT: ;;#ASMSTART 8767; GFX940-NEXT: ; def s[4:6] 8768; GFX940-NEXT: ;;#ASMEND 8769; GFX940-NEXT: s_mov_b32 s8, s6 8770; GFX940-NEXT: s_mov_b32 s9, s6 8771; GFX940-NEXT: s_mov_b32 s11, s0 8772; GFX940-NEXT: ;;#ASMSTART 8773; GFX940-NEXT: ; use s[8:11] 8774; GFX940-NEXT: ;;#ASMEND 8775; GFX940-NEXT: s_setpc_b64 s[30:31] 8776 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8777 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8778 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0> 8779 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8780 ret void 8781} 8782 8783define void @s_shuffle_v4i32_v3i32__5_5_1_0() { 8784; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_0: 8785; GFX900: ; %bb.0: 8786; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8787; GFX900-NEXT: ;;#ASMSTART 8788; GFX900-NEXT: ; def s[8:10] 8789; GFX900-NEXT: ;;#ASMEND 8790; GFX900-NEXT: ;;#ASMSTART 8791; GFX900-NEXT: ; def s[4:6] 8792; GFX900-NEXT: ;;#ASMEND 8793; GFX900-NEXT: s_mov_b32 s8, s10 8794; GFX900-NEXT: s_mov_b32 s9, s10 8795; GFX900-NEXT: s_mov_b32 s10, s5 8796; GFX900-NEXT: s_mov_b32 s11, s4 8797; GFX900-NEXT: ;;#ASMSTART 8798; GFX900-NEXT: ; use s[8:11] 8799; GFX900-NEXT: ;;#ASMEND 8800; GFX900-NEXT: s_setpc_b64 s[30:31] 8801; 8802; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_0: 8803; GFX90A: ; %bb.0: 8804; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8805; GFX90A-NEXT: ;;#ASMSTART 8806; GFX90A-NEXT: ; def s[8:10] 8807; GFX90A-NEXT: ;;#ASMEND 8808; GFX90A-NEXT: ;;#ASMSTART 8809; GFX90A-NEXT: ; def s[4:6] 8810; GFX90A-NEXT: ;;#ASMEND 8811; GFX90A-NEXT: s_mov_b32 s8, s10 8812; GFX90A-NEXT: s_mov_b32 s9, s10 8813; GFX90A-NEXT: s_mov_b32 s10, s5 8814; GFX90A-NEXT: s_mov_b32 s11, s4 8815; GFX90A-NEXT: ;;#ASMSTART 8816; GFX90A-NEXT: ; use s[8:11] 8817; GFX90A-NEXT: ;;#ASMEND 8818; GFX90A-NEXT: s_setpc_b64 s[30:31] 8819; 8820; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_0: 8821; GFX940: ; %bb.0: 8822; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8823; GFX940-NEXT: ;;#ASMSTART 8824; GFX940-NEXT: ; def s[0:2] 8825; GFX940-NEXT: ;;#ASMEND 8826; GFX940-NEXT: ;;#ASMSTART 8827; GFX940-NEXT: ; def s[4:6] 8828; GFX940-NEXT: ;;#ASMEND 8829; GFX940-NEXT: s_mov_b32 s8, s6 8830; GFX940-NEXT: s_mov_b32 s9, s6 8831; GFX940-NEXT: s_mov_b32 s10, s1 8832; GFX940-NEXT: s_mov_b32 s11, s0 8833; GFX940-NEXT: ;;#ASMSTART 8834; GFX940-NEXT: ; use s[8:11] 8835; GFX940-NEXT: ;;#ASMEND 8836; GFX940-NEXT: s_setpc_b64 s[30:31] 8837 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8838 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8839 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0> 8840 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8841 ret void 8842} 8843 8844define void @s_shuffle_v4i32_v3i32__5_5_2_0() { 8845; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_0: 8846; GFX900: ; %bb.0: 8847; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8848; GFX900-NEXT: ;;#ASMSTART 8849; GFX900-NEXT: ; def s[8:10] 8850; GFX900-NEXT: ;;#ASMEND 8851; GFX900-NEXT: ;;#ASMSTART 8852; GFX900-NEXT: ; def s[4:6] 8853; GFX900-NEXT: ;;#ASMEND 8854; GFX900-NEXT: s_mov_b32 s8, s10 8855; GFX900-NEXT: s_mov_b32 s9, s10 8856; GFX900-NEXT: s_mov_b32 s10, s6 8857; GFX900-NEXT: s_mov_b32 s11, s4 8858; GFX900-NEXT: ;;#ASMSTART 8859; GFX900-NEXT: ; use s[8:11] 8860; GFX900-NEXT: ;;#ASMEND 8861; GFX900-NEXT: s_setpc_b64 s[30:31] 8862; 8863; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_0: 8864; GFX90A: ; %bb.0: 8865; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8866; GFX90A-NEXT: ;;#ASMSTART 8867; GFX90A-NEXT: ; def s[8:10] 8868; GFX90A-NEXT: ;;#ASMEND 8869; GFX90A-NEXT: ;;#ASMSTART 8870; GFX90A-NEXT: ; def s[4:6] 8871; GFX90A-NEXT: ;;#ASMEND 8872; GFX90A-NEXT: s_mov_b32 s8, s10 8873; GFX90A-NEXT: s_mov_b32 s9, s10 8874; GFX90A-NEXT: s_mov_b32 s10, s6 8875; GFX90A-NEXT: s_mov_b32 s11, s4 8876; GFX90A-NEXT: ;;#ASMSTART 8877; GFX90A-NEXT: ; use s[8:11] 8878; GFX90A-NEXT: ;;#ASMEND 8879; GFX90A-NEXT: s_setpc_b64 s[30:31] 8880; 8881; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_0: 8882; GFX940: ; %bb.0: 8883; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8884; GFX940-NEXT: ;;#ASMSTART 8885; GFX940-NEXT: ; def s[0:2] 8886; GFX940-NEXT: ;;#ASMEND 8887; GFX940-NEXT: ;;#ASMSTART 8888; GFX940-NEXT: ; def s[4:6] 8889; GFX940-NEXT: ;;#ASMEND 8890; GFX940-NEXT: s_mov_b32 s8, s6 8891; GFX940-NEXT: s_mov_b32 s9, s6 8892; GFX940-NEXT: s_mov_b32 s10, s2 8893; GFX940-NEXT: s_mov_b32 s11, s0 8894; GFX940-NEXT: ;;#ASMSTART 8895; GFX940-NEXT: ; use s[8:11] 8896; GFX940-NEXT: ;;#ASMEND 8897; GFX940-NEXT: s_setpc_b64 s[30:31] 8898 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8899 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8900 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0> 8901 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8902 ret void 8903} 8904 8905define void @s_shuffle_v4i32_v3i32__5_5_3_0() { 8906; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_0: 8907; GFX900: ; %bb.0: 8908; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8909; GFX900-NEXT: ;;#ASMSTART 8910; GFX900-NEXT: ; def s[4:6] 8911; GFX900-NEXT: ;;#ASMEND 8912; GFX900-NEXT: ;;#ASMSTART 8913; GFX900-NEXT: ; def s[12:14] 8914; GFX900-NEXT: ;;#ASMEND 8915; GFX900-NEXT: s_mov_b32 s8, s14 8916; GFX900-NEXT: s_mov_b32 s9, s14 8917; GFX900-NEXT: s_mov_b32 s10, s12 8918; GFX900-NEXT: s_mov_b32 s11, s4 8919; GFX900-NEXT: ;;#ASMSTART 8920; GFX900-NEXT: ; use s[8:11] 8921; GFX900-NEXT: ;;#ASMEND 8922; GFX900-NEXT: s_setpc_b64 s[30:31] 8923; 8924; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_0: 8925; GFX90A: ; %bb.0: 8926; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8927; GFX90A-NEXT: ;;#ASMSTART 8928; GFX90A-NEXT: ; def s[4:6] 8929; GFX90A-NEXT: ;;#ASMEND 8930; GFX90A-NEXT: ;;#ASMSTART 8931; GFX90A-NEXT: ; def s[12:14] 8932; GFX90A-NEXT: ;;#ASMEND 8933; GFX90A-NEXT: s_mov_b32 s8, s14 8934; GFX90A-NEXT: s_mov_b32 s9, s14 8935; GFX90A-NEXT: s_mov_b32 s10, s12 8936; GFX90A-NEXT: s_mov_b32 s11, s4 8937; GFX90A-NEXT: ;;#ASMSTART 8938; GFX90A-NEXT: ; use s[8:11] 8939; GFX90A-NEXT: ;;#ASMEND 8940; GFX90A-NEXT: s_setpc_b64 s[30:31] 8941; 8942; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_0: 8943; GFX940: ; %bb.0: 8944; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8945; GFX940-NEXT: ;;#ASMSTART 8946; GFX940-NEXT: ; def s[0:2] 8947; GFX940-NEXT: ;;#ASMEND 8948; GFX940-NEXT: ;;#ASMSTART 8949; GFX940-NEXT: ; def s[4:6] 8950; GFX940-NEXT: ;;#ASMEND 8951; GFX940-NEXT: s_mov_b32 s8, s6 8952; GFX940-NEXT: s_mov_b32 s9, s6 8953; GFX940-NEXT: s_mov_b32 s10, s4 8954; GFX940-NEXT: s_mov_b32 s11, s0 8955; GFX940-NEXT: ;;#ASMSTART 8956; GFX940-NEXT: ; use s[8:11] 8957; GFX940-NEXT: ;;#ASMEND 8958; GFX940-NEXT: s_setpc_b64 s[30:31] 8959 %vec0 = call <3 x i32> asm "; def $0", "=s"() 8960 %vec1 = call <3 x i32> asm "; def $0", "=s"() 8961 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0> 8962 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 8963 ret void 8964} 8965 8966define void @s_shuffle_v4i32_v3i32__5_5_4_0() { 8967; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_0: 8968; GFX900: ; %bb.0: 8969; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8970; GFX900-NEXT: ;;#ASMSTART 8971; GFX900-NEXT: ; def s[4:6] 8972; GFX900-NEXT: ;;#ASMEND 8973; GFX900-NEXT: ;;#ASMSTART 8974; GFX900-NEXT: ; def s[12:14] 8975; GFX900-NEXT: ;;#ASMEND 8976; GFX900-NEXT: s_mov_b32 s8, s14 8977; GFX900-NEXT: s_mov_b32 s9, s14 8978; GFX900-NEXT: s_mov_b32 s10, s13 8979; GFX900-NEXT: s_mov_b32 s11, s4 8980; GFX900-NEXT: ;;#ASMSTART 8981; GFX900-NEXT: ; use s[8:11] 8982; GFX900-NEXT: ;;#ASMEND 8983; GFX900-NEXT: s_setpc_b64 s[30:31] 8984; 8985; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_0: 8986; GFX90A: ; %bb.0: 8987; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8988; GFX90A-NEXT: ;;#ASMSTART 8989; GFX90A-NEXT: ; def s[4:6] 8990; GFX90A-NEXT: ;;#ASMEND 8991; GFX90A-NEXT: ;;#ASMSTART 8992; GFX90A-NEXT: ; def s[12:14] 8993; GFX90A-NEXT: ;;#ASMEND 8994; GFX90A-NEXT: s_mov_b32 s8, s14 8995; GFX90A-NEXT: s_mov_b32 s9, s14 8996; GFX90A-NEXT: s_mov_b32 s10, s13 8997; GFX90A-NEXT: s_mov_b32 s11, s4 8998; GFX90A-NEXT: ;;#ASMSTART 8999; GFX90A-NEXT: ; use s[8:11] 9000; GFX90A-NEXT: ;;#ASMEND 9001; GFX90A-NEXT: s_setpc_b64 s[30:31] 9002; 9003; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_0: 9004; GFX940: ; %bb.0: 9005; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9006; GFX940-NEXT: ;;#ASMSTART 9007; GFX940-NEXT: ; def s[0:2] 9008; GFX940-NEXT: ;;#ASMEND 9009; GFX940-NEXT: ;;#ASMSTART 9010; GFX940-NEXT: ; def s[4:6] 9011; GFX940-NEXT: ;;#ASMEND 9012; GFX940-NEXT: s_mov_b32 s8, s6 9013; GFX940-NEXT: s_mov_b32 s9, s6 9014; GFX940-NEXT: s_mov_b32 s10, s5 9015; GFX940-NEXT: s_mov_b32 s11, s0 9016; GFX940-NEXT: ;;#ASMSTART 9017; GFX940-NEXT: ; use s[8:11] 9018; GFX940-NEXT: ;;#ASMEND 9019; GFX940-NEXT: s_setpc_b64 s[30:31] 9020 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9021 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9022 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0> 9023 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9024 ret void 9025} 9026 9027define void @s_shuffle_v4i32_v3i32__u_1_1_1() { 9028; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_1_1_1: 9029; GFX9: ; %bb.0: 9030; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9031; GFX9-NEXT: ;;#ASMSTART 9032; GFX9-NEXT: ; def s[8:10] 9033; GFX9-NEXT: ;;#ASMEND 9034; GFX9-NEXT: s_mov_b32 s10, s9 9035; GFX9-NEXT: s_mov_b32 s11, s9 9036; GFX9-NEXT: ;;#ASMSTART 9037; GFX9-NEXT: ; use s[8:11] 9038; GFX9-NEXT: ;;#ASMEND 9039; GFX9-NEXT: s_setpc_b64 s[30:31] 9040 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9041 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1> 9042 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9043 ret void 9044} 9045 9046define void @s_shuffle_v4i32_v3i32__0_1_1_1() { 9047; GFX9-LABEL: s_shuffle_v4i32_v3i32__0_1_1_1: 9048; GFX9: ; %bb.0: 9049; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9050; GFX9-NEXT: ;;#ASMSTART 9051; GFX9-NEXT: ; def s[8:10] 9052; GFX9-NEXT: ;;#ASMEND 9053; GFX9-NEXT: s_mov_b32 s10, s9 9054; GFX9-NEXT: s_mov_b32 s11, s9 9055; GFX9-NEXT: ;;#ASMSTART 9056; GFX9-NEXT: ; use s[8:11] 9057; GFX9-NEXT: ;;#ASMEND 9058; GFX9-NEXT: s_setpc_b64 s[30:31] 9059 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9060 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1> 9061 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9062 ret void 9063} 9064 9065define void @s_shuffle_v4i32_v3i32__1_1_1_1() { 9066; GFX9-LABEL: s_shuffle_v4i32_v3i32__1_1_1_1: 9067; GFX9: ; %bb.0: 9068; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9069; GFX9-NEXT: ;;#ASMSTART 9070; GFX9-NEXT: ; def s[8:10] 9071; GFX9-NEXT: ;;#ASMEND 9072; GFX9-NEXT: s_mov_b32 s8, s9 9073; GFX9-NEXT: s_mov_b32 s10, s9 9074; GFX9-NEXT: s_mov_b32 s11, s9 9075; GFX9-NEXT: ;;#ASMSTART 9076; GFX9-NEXT: ; use s[8:11] 9077; GFX9-NEXT: ;;#ASMEND 9078; GFX9-NEXT: s_setpc_b64 s[30:31] 9079 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9080 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 9081 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9082 ret void 9083} 9084 9085define void @s_shuffle_v4i32_v3i32__2_1_1_1() { 9086; GFX9-LABEL: s_shuffle_v4i32_v3i32__2_1_1_1: 9087; GFX9: ; %bb.0: 9088; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9089; GFX9-NEXT: ;;#ASMSTART 9090; GFX9-NEXT: ; def s[8:10] 9091; GFX9-NEXT: ;;#ASMEND 9092; GFX9-NEXT: s_mov_b32 s8, s10 9093; GFX9-NEXT: s_mov_b32 s10, s9 9094; GFX9-NEXT: s_mov_b32 s11, s9 9095; GFX9-NEXT: ;;#ASMSTART 9096; GFX9-NEXT: ; use s[8:11] 9097; GFX9-NEXT: ;;#ASMEND 9098; GFX9-NEXT: s_setpc_b64 s[30:31] 9099 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9100 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1> 9101 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9102 ret void 9103} 9104 9105define void @s_shuffle_v4i32_v3i32__3_1_1_1() { 9106; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_1_1_1: 9107; GFX9: ; %bb.0: 9108; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9109; GFX9-NEXT: ;;#ASMSTART 9110; GFX9-NEXT: ; def s[8:10] 9111; GFX9-NEXT: ;;#ASMEND 9112; GFX9-NEXT: s_mov_b32 s10, s9 9113; GFX9-NEXT: s_mov_b32 s11, s9 9114; GFX9-NEXT: ;;#ASMSTART 9115; GFX9-NEXT: ; use s[8:11] 9116; GFX9-NEXT: ;;#ASMEND 9117; GFX9-NEXT: s_setpc_b64 s[30:31] 9118 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9119 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1> 9120 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9121 ret void 9122} 9123 9124define void @s_shuffle_v4i32_v3i32__4_1_1_1() { 9125; GFX900-LABEL: s_shuffle_v4i32_v3i32__4_1_1_1: 9126; GFX900: ; %bb.0: 9127; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9128; GFX900-NEXT: ;;#ASMSTART 9129; GFX900-NEXT: ; def s[8:10] 9130; GFX900-NEXT: ;;#ASMEND 9131; GFX900-NEXT: ;;#ASMSTART 9132; GFX900-NEXT: ; def s[4:6] 9133; GFX900-NEXT: ;;#ASMEND 9134; GFX900-NEXT: s_mov_b32 s8, s5 9135; GFX900-NEXT: s_mov_b32 s10, s9 9136; GFX900-NEXT: s_mov_b32 s11, s9 9137; GFX900-NEXT: ;;#ASMSTART 9138; GFX900-NEXT: ; use s[8:11] 9139; GFX900-NEXT: ;;#ASMEND 9140; GFX900-NEXT: s_setpc_b64 s[30:31] 9141; 9142; GFX90A-LABEL: s_shuffle_v4i32_v3i32__4_1_1_1: 9143; GFX90A: ; %bb.0: 9144; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9145; GFX90A-NEXT: ;;#ASMSTART 9146; GFX90A-NEXT: ; def s[8:10] 9147; GFX90A-NEXT: ;;#ASMEND 9148; GFX90A-NEXT: ;;#ASMSTART 9149; GFX90A-NEXT: ; def s[4:6] 9150; GFX90A-NEXT: ;;#ASMEND 9151; GFX90A-NEXT: s_mov_b32 s8, s5 9152; GFX90A-NEXT: s_mov_b32 s10, s9 9153; GFX90A-NEXT: s_mov_b32 s11, s9 9154; GFX90A-NEXT: ;;#ASMSTART 9155; GFX90A-NEXT: ; use s[8:11] 9156; GFX90A-NEXT: ;;#ASMEND 9157; GFX90A-NEXT: s_setpc_b64 s[30:31] 9158; 9159; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_1_1_1: 9160; GFX940: ; %bb.0: 9161; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9162; GFX940-NEXT: ;;#ASMSTART 9163; GFX940-NEXT: ; def s[8:10] 9164; GFX940-NEXT: ;;#ASMEND 9165; GFX940-NEXT: ;;#ASMSTART 9166; GFX940-NEXT: ; def s[0:2] 9167; GFX940-NEXT: ;;#ASMEND 9168; GFX940-NEXT: s_mov_b32 s8, s1 9169; GFX940-NEXT: s_mov_b32 s10, s9 9170; GFX940-NEXT: s_mov_b32 s11, s9 9171; GFX940-NEXT: ;;#ASMSTART 9172; GFX940-NEXT: ; use s[8:11] 9173; GFX940-NEXT: ;;#ASMEND 9174; GFX940-NEXT: s_setpc_b64 s[30:31] 9175 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9176 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9177 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1> 9178 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9179 ret void 9180} 9181 9182define void @s_shuffle_v4i32_v3i32__5_1_1_1() { 9183; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_1_1: 9184; GFX900: ; %bb.0: 9185; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9186; GFX900-NEXT: ;;#ASMSTART 9187; GFX900-NEXT: ; def s[8:10] 9188; GFX900-NEXT: ;;#ASMEND 9189; GFX900-NEXT: ;;#ASMSTART 9190; GFX900-NEXT: ; def s[4:6] 9191; GFX900-NEXT: ;;#ASMEND 9192; GFX900-NEXT: s_mov_b32 s8, s6 9193; GFX900-NEXT: s_mov_b32 s10, s9 9194; GFX900-NEXT: s_mov_b32 s11, s9 9195; GFX900-NEXT: ;;#ASMSTART 9196; GFX900-NEXT: ; use s[8:11] 9197; GFX900-NEXT: ;;#ASMEND 9198; GFX900-NEXT: s_setpc_b64 s[30:31] 9199; 9200; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_1_1: 9201; GFX90A: ; %bb.0: 9202; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9203; GFX90A-NEXT: ;;#ASMSTART 9204; GFX90A-NEXT: ; def s[8:10] 9205; GFX90A-NEXT: ;;#ASMEND 9206; GFX90A-NEXT: ;;#ASMSTART 9207; GFX90A-NEXT: ; def s[4:6] 9208; GFX90A-NEXT: ;;#ASMEND 9209; GFX90A-NEXT: s_mov_b32 s8, s6 9210; GFX90A-NEXT: s_mov_b32 s10, s9 9211; GFX90A-NEXT: s_mov_b32 s11, s9 9212; GFX90A-NEXT: ;;#ASMSTART 9213; GFX90A-NEXT: ; use s[8:11] 9214; GFX90A-NEXT: ;;#ASMEND 9215; GFX90A-NEXT: s_setpc_b64 s[30:31] 9216; 9217; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_1_1: 9218; GFX940: ; %bb.0: 9219; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9220; GFX940-NEXT: ;;#ASMSTART 9221; GFX940-NEXT: ; def s[8:10] 9222; GFX940-NEXT: ;;#ASMEND 9223; GFX940-NEXT: ;;#ASMSTART 9224; GFX940-NEXT: ; def s[0:2] 9225; GFX940-NEXT: ;;#ASMEND 9226; GFX940-NEXT: s_mov_b32 s8, s2 9227; GFX940-NEXT: s_mov_b32 s10, s9 9228; GFX940-NEXT: s_mov_b32 s11, s9 9229; GFX940-NEXT: ;;#ASMSTART 9230; GFX940-NEXT: ; use s[8:11] 9231; GFX940-NEXT: ;;#ASMEND 9232; GFX940-NEXT: s_setpc_b64 s[30:31] 9233 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9234 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9235 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1> 9236 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9237 ret void 9238} 9239 9240define void @s_shuffle_v4i32_v3i32__5_u_1_1() { 9241; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_1_1: 9242; GFX900: ; %bb.0: 9243; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9244; GFX900-NEXT: ;;#ASMSTART 9245; GFX900-NEXT: ; def s[8:10] 9246; GFX900-NEXT: ;;#ASMEND 9247; GFX900-NEXT: ;;#ASMSTART 9248; GFX900-NEXT: ; def s[4:6] 9249; GFX900-NEXT: ;;#ASMEND 9250; GFX900-NEXT: s_mov_b32 s8, s10 9251; GFX900-NEXT: s_mov_b32 s10, s5 9252; GFX900-NEXT: s_mov_b32 s11, s5 9253; GFX900-NEXT: ;;#ASMSTART 9254; GFX900-NEXT: ; use s[8:11] 9255; GFX900-NEXT: ;;#ASMEND 9256; GFX900-NEXT: s_setpc_b64 s[30:31] 9257; 9258; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_1_1: 9259; GFX90A: ; %bb.0: 9260; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9261; GFX90A-NEXT: ;;#ASMSTART 9262; GFX90A-NEXT: ; def s[8:10] 9263; GFX90A-NEXT: ;;#ASMEND 9264; GFX90A-NEXT: ;;#ASMSTART 9265; GFX90A-NEXT: ; def s[4:6] 9266; GFX90A-NEXT: ;;#ASMEND 9267; GFX90A-NEXT: s_mov_b32 s8, s10 9268; GFX90A-NEXT: s_mov_b32 s10, s5 9269; GFX90A-NEXT: s_mov_b32 s11, s5 9270; GFX90A-NEXT: ;;#ASMSTART 9271; GFX90A-NEXT: ; use s[8:11] 9272; GFX90A-NEXT: ;;#ASMEND 9273; GFX90A-NEXT: s_setpc_b64 s[30:31] 9274; 9275; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_1_1: 9276; GFX940: ; %bb.0: 9277; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9278; GFX940-NEXT: ;;#ASMSTART 9279; GFX940-NEXT: ; def s[0:2] 9280; GFX940-NEXT: ;;#ASMEND 9281; GFX940-NEXT: ;;#ASMSTART 9282; GFX940-NEXT: ; def s[4:6] 9283; GFX940-NEXT: ;;#ASMEND 9284; GFX940-NEXT: s_mov_b32 s8, s6 9285; GFX940-NEXT: s_mov_b32 s10, s1 9286; GFX940-NEXT: s_mov_b32 s11, s1 9287; GFX940-NEXT: ;;#ASMSTART 9288; GFX940-NEXT: ; use s[8:11] 9289; GFX940-NEXT: ;;#ASMEND 9290; GFX940-NEXT: s_setpc_b64 s[30:31] 9291 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9292 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9293 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1> 9294 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9295 ret void 9296} 9297 9298define void @s_shuffle_v4i32_v3i32__5_0_1_1() { 9299; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_1_1: 9300; GFX900: ; %bb.0: 9301; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9302; GFX900-NEXT: ;;#ASMSTART 9303; GFX900-NEXT: ; def s[8:10] 9304; GFX900-NEXT: ;;#ASMEND 9305; GFX900-NEXT: ;;#ASMSTART 9306; GFX900-NEXT: ; def s[4:6] 9307; GFX900-NEXT: ;;#ASMEND 9308; GFX900-NEXT: s_mov_b32 s8, s10 9309; GFX900-NEXT: s_mov_b32 s9, s4 9310; GFX900-NEXT: s_mov_b32 s10, s5 9311; GFX900-NEXT: s_mov_b32 s11, s5 9312; GFX900-NEXT: ;;#ASMSTART 9313; GFX900-NEXT: ; use s[8:11] 9314; GFX900-NEXT: ;;#ASMEND 9315; GFX900-NEXT: s_setpc_b64 s[30:31] 9316; 9317; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_1_1: 9318; GFX90A: ; %bb.0: 9319; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9320; GFX90A-NEXT: ;;#ASMSTART 9321; GFX90A-NEXT: ; def s[8:10] 9322; GFX90A-NEXT: ;;#ASMEND 9323; GFX90A-NEXT: ;;#ASMSTART 9324; GFX90A-NEXT: ; def s[4:6] 9325; GFX90A-NEXT: ;;#ASMEND 9326; GFX90A-NEXT: s_mov_b32 s8, s10 9327; GFX90A-NEXT: s_mov_b32 s9, s4 9328; GFX90A-NEXT: s_mov_b32 s10, s5 9329; GFX90A-NEXT: s_mov_b32 s11, s5 9330; GFX90A-NEXT: ;;#ASMSTART 9331; GFX90A-NEXT: ; use s[8:11] 9332; GFX90A-NEXT: ;;#ASMEND 9333; GFX90A-NEXT: s_setpc_b64 s[30:31] 9334; 9335; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_1_1: 9336; GFX940: ; %bb.0: 9337; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9338; GFX940-NEXT: ;;#ASMSTART 9339; GFX940-NEXT: ; def s[0:2] 9340; GFX940-NEXT: ;;#ASMEND 9341; GFX940-NEXT: ;;#ASMSTART 9342; GFX940-NEXT: ; def s[4:6] 9343; GFX940-NEXT: ;;#ASMEND 9344; GFX940-NEXT: s_mov_b32 s8, s6 9345; GFX940-NEXT: s_mov_b32 s9, s0 9346; GFX940-NEXT: s_mov_b32 s10, s1 9347; GFX940-NEXT: s_mov_b32 s11, s1 9348; GFX940-NEXT: ;;#ASMSTART 9349; GFX940-NEXT: ; use s[8:11] 9350; GFX940-NEXT: ;;#ASMEND 9351; GFX940-NEXT: s_setpc_b64 s[30:31] 9352 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9353 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9354 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1> 9355 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9356 ret void 9357} 9358 9359define void @s_shuffle_v4i32_v3i32__5_2_1_1() { 9360; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_1_1: 9361; GFX900: ; %bb.0: 9362; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9363; GFX900-NEXT: ;;#ASMSTART 9364; GFX900-NEXT: ; def s[8:10] 9365; GFX900-NEXT: ;;#ASMEND 9366; GFX900-NEXT: ;;#ASMSTART 9367; GFX900-NEXT: ; def s[4:6] 9368; GFX900-NEXT: ;;#ASMEND 9369; GFX900-NEXT: s_mov_b32 s8, s10 9370; GFX900-NEXT: s_mov_b32 s9, s6 9371; GFX900-NEXT: s_mov_b32 s10, s5 9372; GFX900-NEXT: s_mov_b32 s11, s5 9373; GFX900-NEXT: ;;#ASMSTART 9374; GFX900-NEXT: ; use s[8:11] 9375; GFX900-NEXT: ;;#ASMEND 9376; GFX900-NEXT: s_setpc_b64 s[30:31] 9377; 9378; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_1_1: 9379; GFX90A: ; %bb.0: 9380; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9381; GFX90A-NEXT: ;;#ASMSTART 9382; GFX90A-NEXT: ; def s[8:10] 9383; GFX90A-NEXT: ;;#ASMEND 9384; GFX90A-NEXT: ;;#ASMSTART 9385; GFX90A-NEXT: ; def s[4:6] 9386; GFX90A-NEXT: ;;#ASMEND 9387; GFX90A-NEXT: s_mov_b32 s8, s10 9388; GFX90A-NEXT: s_mov_b32 s9, s6 9389; GFX90A-NEXT: s_mov_b32 s10, s5 9390; GFX90A-NEXT: s_mov_b32 s11, s5 9391; GFX90A-NEXT: ;;#ASMSTART 9392; GFX90A-NEXT: ; use s[8:11] 9393; GFX90A-NEXT: ;;#ASMEND 9394; GFX90A-NEXT: s_setpc_b64 s[30:31] 9395; 9396; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_1_1: 9397; GFX940: ; %bb.0: 9398; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9399; GFX940-NEXT: ;;#ASMSTART 9400; GFX940-NEXT: ; def s[0:2] 9401; GFX940-NEXT: ;;#ASMEND 9402; GFX940-NEXT: ;;#ASMSTART 9403; GFX940-NEXT: ; def s[4:6] 9404; GFX940-NEXT: ;;#ASMEND 9405; GFX940-NEXT: s_mov_b32 s8, s6 9406; GFX940-NEXT: s_mov_b32 s9, s2 9407; GFX940-NEXT: s_mov_b32 s10, s1 9408; GFX940-NEXT: s_mov_b32 s11, s1 9409; GFX940-NEXT: ;;#ASMSTART 9410; GFX940-NEXT: ; use s[8:11] 9411; GFX940-NEXT: ;;#ASMEND 9412; GFX940-NEXT: s_setpc_b64 s[30:31] 9413 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9414 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9415 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1> 9416 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9417 ret void 9418} 9419 9420define void @s_shuffle_v4i32_v3i32__5_3_1_1() { 9421; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_1_1: 9422; GFX900: ; %bb.0: 9423; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9424; GFX900-NEXT: ;;#ASMSTART 9425; GFX900-NEXT: ; def s[4:6] 9426; GFX900-NEXT: ;;#ASMEND 9427; GFX900-NEXT: ;;#ASMSTART 9428; GFX900-NEXT: ; def s[12:14] 9429; GFX900-NEXT: ;;#ASMEND 9430; GFX900-NEXT: s_mov_b32 s8, s14 9431; GFX900-NEXT: s_mov_b32 s9, s12 9432; GFX900-NEXT: s_mov_b32 s10, s5 9433; GFX900-NEXT: s_mov_b32 s11, s5 9434; GFX900-NEXT: ;;#ASMSTART 9435; GFX900-NEXT: ; use s[8:11] 9436; GFX900-NEXT: ;;#ASMEND 9437; GFX900-NEXT: s_setpc_b64 s[30:31] 9438; 9439; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_1_1: 9440; GFX90A: ; %bb.0: 9441; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9442; GFX90A-NEXT: ;;#ASMSTART 9443; GFX90A-NEXT: ; def s[4:6] 9444; GFX90A-NEXT: ;;#ASMEND 9445; GFX90A-NEXT: ;;#ASMSTART 9446; GFX90A-NEXT: ; def s[12:14] 9447; GFX90A-NEXT: ;;#ASMEND 9448; GFX90A-NEXT: s_mov_b32 s8, s14 9449; GFX90A-NEXT: s_mov_b32 s9, s12 9450; GFX90A-NEXT: s_mov_b32 s10, s5 9451; GFX90A-NEXT: s_mov_b32 s11, s5 9452; GFX90A-NEXT: ;;#ASMSTART 9453; GFX90A-NEXT: ; use s[8:11] 9454; GFX90A-NEXT: ;;#ASMEND 9455; GFX90A-NEXT: s_setpc_b64 s[30:31] 9456; 9457; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_1_1: 9458; GFX940: ; %bb.0: 9459; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9460; GFX940-NEXT: ;;#ASMSTART 9461; GFX940-NEXT: ; def s[0:2] 9462; GFX940-NEXT: ;;#ASMEND 9463; GFX940-NEXT: ;;#ASMSTART 9464; GFX940-NEXT: ; def s[4:6] 9465; GFX940-NEXT: ;;#ASMEND 9466; GFX940-NEXT: s_mov_b32 s8, s6 9467; GFX940-NEXT: s_mov_b32 s9, s4 9468; GFX940-NEXT: s_mov_b32 s10, s1 9469; GFX940-NEXT: s_mov_b32 s11, s1 9470; GFX940-NEXT: ;;#ASMSTART 9471; GFX940-NEXT: ; use s[8:11] 9472; GFX940-NEXT: ;;#ASMEND 9473; GFX940-NEXT: s_setpc_b64 s[30:31] 9474 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9475 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9476 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1> 9477 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9478 ret void 9479} 9480 9481define void @s_shuffle_v4i32_v3i32__5_4_1_1() { 9482; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_4_1_1: 9483; GFX900: ; %bb.0: 9484; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9485; GFX900-NEXT: ;;#ASMSTART 9486; GFX900-NEXT: ; def s[8:10] 9487; GFX900-NEXT: ;;#ASMEND 9488; GFX900-NEXT: ;;#ASMSTART 9489; GFX900-NEXT: ; def s[4:6] 9490; GFX900-NEXT: ;;#ASMEND 9491; GFX900-NEXT: s_mov_b32 s8, s10 9492; GFX900-NEXT: s_mov_b32 s10, s5 9493; GFX900-NEXT: s_mov_b32 s11, s5 9494; GFX900-NEXT: ;;#ASMSTART 9495; GFX900-NEXT: ; use s[8:11] 9496; GFX900-NEXT: ;;#ASMEND 9497; GFX900-NEXT: s_setpc_b64 s[30:31] 9498; 9499; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_4_1_1: 9500; GFX90A: ; %bb.0: 9501; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9502; GFX90A-NEXT: ;;#ASMSTART 9503; GFX90A-NEXT: ; def s[8:10] 9504; GFX90A-NEXT: ;;#ASMEND 9505; GFX90A-NEXT: ;;#ASMSTART 9506; GFX90A-NEXT: ; def s[4:6] 9507; GFX90A-NEXT: ;;#ASMEND 9508; GFX90A-NEXT: s_mov_b32 s8, s10 9509; GFX90A-NEXT: s_mov_b32 s10, s5 9510; GFX90A-NEXT: s_mov_b32 s11, s5 9511; GFX90A-NEXT: ;;#ASMSTART 9512; GFX90A-NEXT: ; use s[8:11] 9513; GFX90A-NEXT: ;;#ASMEND 9514; GFX90A-NEXT: s_setpc_b64 s[30:31] 9515; 9516; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_1_1: 9517; GFX940: ; %bb.0: 9518; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9519; GFX940-NEXT: ;;#ASMSTART 9520; GFX940-NEXT: ; def s[8:10] 9521; GFX940-NEXT: ;;#ASMEND 9522; GFX940-NEXT: ;;#ASMSTART 9523; GFX940-NEXT: ; def s[0:2] 9524; GFX940-NEXT: ;;#ASMEND 9525; GFX940-NEXT: s_mov_b32 s8, s10 9526; GFX940-NEXT: s_mov_b32 s10, s1 9527; GFX940-NEXT: s_mov_b32 s11, s1 9528; GFX940-NEXT: ;;#ASMSTART 9529; GFX940-NEXT: ; use s[8:11] 9530; GFX940-NEXT: ;;#ASMEND 9531; GFX940-NEXT: s_setpc_b64 s[30:31] 9532 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9533 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9534 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1> 9535 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9536 ret void 9537} 9538 9539define void @s_shuffle_v4i32_v3i32__5_5_1_1() { 9540; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_1: 9541; GFX900: ; %bb.0: 9542; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9543; GFX900-NEXT: ;;#ASMSTART 9544; GFX900-NEXT: ; def s[8:10] 9545; GFX900-NEXT: ;;#ASMEND 9546; GFX900-NEXT: ;;#ASMSTART 9547; GFX900-NEXT: ; def s[4:6] 9548; GFX900-NEXT: ;;#ASMEND 9549; GFX900-NEXT: s_mov_b32 s8, s10 9550; GFX900-NEXT: s_mov_b32 s9, s10 9551; GFX900-NEXT: s_mov_b32 s10, s5 9552; GFX900-NEXT: s_mov_b32 s11, s5 9553; GFX900-NEXT: ;;#ASMSTART 9554; GFX900-NEXT: ; use s[8:11] 9555; GFX900-NEXT: ;;#ASMEND 9556; GFX900-NEXT: s_setpc_b64 s[30:31] 9557; 9558; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_1: 9559; GFX90A: ; %bb.0: 9560; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9561; GFX90A-NEXT: ;;#ASMSTART 9562; GFX90A-NEXT: ; def s[8:10] 9563; GFX90A-NEXT: ;;#ASMEND 9564; GFX90A-NEXT: ;;#ASMSTART 9565; GFX90A-NEXT: ; def s[4:6] 9566; GFX90A-NEXT: ;;#ASMEND 9567; GFX90A-NEXT: s_mov_b32 s8, s10 9568; GFX90A-NEXT: s_mov_b32 s9, s10 9569; GFX90A-NEXT: s_mov_b32 s10, s5 9570; GFX90A-NEXT: s_mov_b32 s11, s5 9571; GFX90A-NEXT: ;;#ASMSTART 9572; GFX90A-NEXT: ; use s[8:11] 9573; GFX90A-NEXT: ;;#ASMEND 9574; GFX90A-NEXT: s_setpc_b64 s[30:31] 9575; 9576; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_1: 9577; GFX940: ; %bb.0: 9578; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9579; GFX940-NEXT: ;;#ASMSTART 9580; GFX940-NEXT: ; def s[0:2] 9581; GFX940-NEXT: ;;#ASMEND 9582; GFX940-NEXT: ;;#ASMSTART 9583; GFX940-NEXT: ; def s[4:6] 9584; GFX940-NEXT: ;;#ASMEND 9585; GFX940-NEXT: s_mov_b32 s8, s6 9586; GFX940-NEXT: s_mov_b32 s9, s6 9587; GFX940-NEXT: s_mov_b32 s10, s1 9588; GFX940-NEXT: s_mov_b32 s11, s1 9589; GFX940-NEXT: ;;#ASMSTART 9590; GFX940-NEXT: ; use s[8:11] 9591; GFX940-NEXT: ;;#ASMEND 9592; GFX940-NEXT: s_setpc_b64 s[30:31] 9593 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9594 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9595 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1> 9596 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9597 ret void 9598} 9599 9600define void @s_shuffle_v4i32_v3i32__5_5_u_1() { 9601; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_1: 9602; GFX900: ; %bb.0: 9603; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9604; GFX900-NEXT: ;;#ASMSTART 9605; GFX900-NEXT: ; def s[8:10] 9606; GFX900-NEXT: ;;#ASMEND 9607; GFX900-NEXT: ;;#ASMSTART 9608; GFX900-NEXT: ; def s[4:6] 9609; GFX900-NEXT: ;;#ASMEND 9610; GFX900-NEXT: s_mov_b32 s8, s10 9611; GFX900-NEXT: s_mov_b32 s9, s10 9612; GFX900-NEXT: s_mov_b32 s11, s5 9613; GFX900-NEXT: ;;#ASMSTART 9614; GFX900-NEXT: ; use s[8:11] 9615; GFX900-NEXT: ;;#ASMEND 9616; GFX900-NEXT: s_setpc_b64 s[30:31] 9617; 9618; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_1: 9619; GFX90A: ; %bb.0: 9620; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9621; GFX90A-NEXT: ;;#ASMSTART 9622; GFX90A-NEXT: ; def s[8:10] 9623; GFX90A-NEXT: ;;#ASMEND 9624; GFX90A-NEXT: ;;#ASMSTART 9625; GFX90A-NEXT: ; def s[4:6] 9626; GFX90A-NEXT: ;;#ASMEND 9627; GFX90A-NEXT: s_mov_b32 s8, s10 9628; GFX90A-NEXT: s_mov_b32 s9, s10 9629; GFX90A-NEXT: s_mov_b32 s11, s5 9630; GFX90A-NEXT: ;;#ASMSTART 9631; GFX90A-NEXT: ; use s[8:11] 9632; GFX90A-NEXT: ;;#ASMEND 9633; GFX90A-NEXT: s_setpc_b64 s[30:31] 9634; 9635; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_1: 9636; GFX940: ; %bb.0: 9637; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9638; GFX940-NEXT: ;;#ASMSTART 9639; GFX940-NEXT: ; def s[0:2] 9640; GFX940-NEXT: ;;#ASMEND 9641; GFX940-NEXT: ;;#ASMSTART 9642; GFX940-NEXT: ; def s[4:6] 9643; GFX940-NEXT: ;;#ASMEND 9644; GFX940-NEXT: s_mov_b32 s8, s6 9645; GFX940-NEXT: s_mov_b32 s9, s6 9646; GFX940-NEXT: s_mov_b32 s11, s1 9647; GFX940-NEXT: ;;#ASMSTART 9648; GFX940-NEXT: ; use s[8:11] 9649; GFX940-NEXT: ;;#ASMEND 9650; GFX940-NEXT: s_setpc_b64 s[30:31] 9651 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9652 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9653 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1> 9654 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9655 ret void 9656} 9657 9658define void @s_shuffle_v4i32_v3i32__5_5_0_1() { 9659; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_1: 9660; GFX900: ; %bb.0: 9661; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9662; GFX900-NEXT: ;;#ASMSTART 9663; GFX900-NEXT: ; def s[8:10] 9664; GFX900-NEXT: ;;#ASMEND 9665; GFX900-NEXT: ;;#ASMSTART 9666; GFX900-NEXT: ; def s[4:6] 9667; GFX900-NEXT: ;;#ASMEND 9668; GFX900-NEXT: s_mov_b32 s8, s10 9669; GFX900-NEXT: s_mov_b32 s9, s10 9670; GFX900-NEXT: s_mov_b32 s10, s4 9671; GFX900-NEXT: s_mov_b32 s11, s5 9672; GFX900-NEXT: ;;#ASMSTART 9673; GFX900-NEXT: ; use s[8:11] 9674; GFX900-NEXT: ;;#ASMEND 9675; GFX900-NEXT: s_setpc_b64 s[30:31] 9676; 9677; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_1: 9678; GFX90A: ; %bb.0: 9679; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9680; GFX90A-NEXT: ;;#ASMSTART 9681; GFX90A-NEXT: ; def s[8:10] 9682; GFX90A-NEXT: ;;#ASMEND 9683; GFX90A-NEXT: ;;#ASMSTART 9684; GFX90A-NEXT: ; def s[4:6] 9685; GFX90A-NEXT: ;;#ASMEND 9686; GFX90A-NEXT: s_mov_b32 s8, s10 9687; GFX90A-NEXT: s_mov_b32 s9, s10 9688; GFX90A-NEXT: s_mov_b32 s10, s4 9689; GFX90A-NEXT: s_mov_b32 s11, s5 9690; GFX90A-NEXT: ;;#ASMSTART 9691; GFX90A-NEXT: ; use s[8:11] 9692; GFX90A-NEXT: ;;#ASMEND 9693; GFX90A-NEXT: s_setpc_b64 s[30:31] 9694; 9695; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_1: 9696; GFX940: ; %bb.0: 9697; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9698; GFX940-NEXT: ;;#ASMSTART 9699; GFX940-NEXT: ; def s[0:2] 9700; GFX940-NEXT: ;;#ASMEND 9701; GFX940-NEXT: ;;#ASMSTART 9702; GFX940-NEXT: ; def s[4:6] 9703; GFX940-NEXT: ;;#ASMEND 9704; GFX940-NEXT: s_mov_b32 s8, s6 9705; GFX940-NEXT: s_mov_b32 s9, s6 9706; GFX940-NEXT: s_mov_b32 s10, s0 9707; GFX940-NEXT: s_mov_b32 s11, s1 9708; GFX940-NEXT: ;;#ASMSTART 9709; GFX940-NEXT: ; use s[8:11] 9710; GFX940-NEXT: ;;#ASMEND 9711; GFX940-NEXT: s_setpc_b64 s[30:31] 9712 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9713 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9714 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1> 9715 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9716 ret void 9717} 9718 9719define void @s_shuffle_v4i32_v3i32__5_5_2_1() { 9720; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_1: 9721; GFX900: ; %bb.0: 9722; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9723; GFX900-NEXT: ;;#ASMSTART 9724; GFX900-NEXT: ; def s[8:10] 9725; GFX900-NEXT: ;;#ASMEND 9726; GFX900-NEXT: ;;#ASMSTART 9727; GFX900-NEXT: ; def s[4:6] 9728; GFX900-NEXT: ;;#ASMEND 9729; GFX900-NEXT: s_mov_b32 s8, s10 9730; GFX900-NEXT: s_mov_b32 s9, s10 9731; GFX900-NEXT: s_mov_b32 s10, s6 9732; GFX900-NEXT: s_mov_b32 s11, s5 9733; GFX900-NEXT: ;;#ASMSTART 9734; GFX900-NEXT: ; use s[8:11] 9735; GFX900-NEXT: ;;#ASMEND 9736; GFX900-NEXT: s_setpc_b64 s[30:31] 9737; 9738; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_1: 9739; GFX90A: ; %bb.0: 9740; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9741; GFX90A-NEXT: ;;#ASMSTART 9742; GFX90A-NEXT: ; def s[8:10] 9743; GFX90A-NEXT: ;;#ASMEND 9744; GFX90A-NEXT: ;;#ASMSTART 9745; GFX90A-NEXT: ; def s[4:6] 9746; GFX90A-NEXT: ;;#ASMEND 9747; GFX90A-NEXT: s_mov_b32 s8, s10 9748; GFX90A-NEXT: s_mov_b32 s9, s10 9749; GFX90A-NEXT: s_mov_b32 s10, s6 9750; GFX90A-NEXT: s_mov_b32 s11, s5 9751; GFX90A-NEXT: ;;#ASMSTART 9752; GFX90A-NEXT: ; use s[8:11] 9753; GFX90A-NEXT: ;;#ASMEND 9754; GFX90A-NEXT: s_setpc_b64 s[30:31] 9755; 9756; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_1: 9757; GFX940: ; %bb.0: 9758; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9759; GFX940-NEXT: ;;#ASMSTART 9760; GFX940-NEXT: ; def s[0:2] 9761; GFX940-NEXT: ;;#ASMEND 9762; GFX940-NEXT: ;;#ASMSTART 9763; GFX940-NEXT: ; def s[4:6] 9764; GFX940-NEXT: ;;#ASMEND 9765; GFX940-NEXT: s_mov_b32 s8, s6 9766; GFX940-NEXT: s_mov_b32 s9, s6 9767; GFX940-NEXT: s_mov_b32 s10, s2 9768; GFX940-NEXT: s_mov_b32 s11, s1 9769; GFX940-NEXT: ;;#ASMSTART 9770; GFX940-NEXT: ; use s[8:11] 9771; GFX940-NEXT: ;;#ASMEND 9772; GFX940-NEXT: s_setpc_b64 s[30:31] 9773 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9774 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9775 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1> 9776 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9777 ret void 9778} 9779 9780define void @s_shuffle_v4i32_v3i32__5_5_3_1() { 9781; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_1: 9782; GFX900: ; %bb.0: 9783; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9784; GFX900-NEXT: ;;#ASMSTART 9785; GFX900-NEXT: ; def s[4:6] 9786; GFX900-NEXT: ;;#ASMEND 9787; GFX900-NEXT: ;;#ASMSTART 9788; GFX900-NEXT: ; def s[12:14] 9789; GFX900-NEXT: ;;#ASMEND 9790; GFX900-NEXT: s_mov_b32 s8, s14 9791; GFX900-NEXT: s_mov_b32 s9, s14 9792; GFX900-NEXT: s_mov_b32 s10, s12 9793; GFX900-NEXT: s_mov_b32 s11, s5 9794; GFX900-NEXT: ;;#ASMSTART 9795; GFX900-NEXT: ; use s[8:11] 9796; GFX900-NEXT: ;;#ASMEND 9797; GFX900-NEXT: s_setpc_b64 s[30:31] 9798; 9799; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_1: 9800; GFX90A: ; %bb.0: 9801; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9802; GFX90A-NEXT: ;;#ASMSTART 9803; GFX90A-NEXT: ; def s[4:6] 9804; GFX90A-NEXT: ;;#ASMEND 9805; GFX90A-NEXT: ;;#ASMSTART 9806; GFX90A-NEXT: ; def s[12:14] 9807; GFX90A-NEXT: ;;#ASMEND 9808; GFX90A-NEXT: s_mov_b32 s8, s14 9809; GFX90A-NEXT: s_mov_b32 s9, s14 9810; GFX90A-NEXT: s_mov_b32 s10, s12 9811; GFX90A-NEXT: s_mov_b32 s11, s5 9812; GFX90A-NEXT: ;;#ASMSTART 9813; GFX90A-NEXT: ; use s[8:11] 9814; GFX90A-NEXT: ;;#ASMEND 9815; GFX90A-NEXT: s_setpc_b64 s[30:31] 9816; 9817; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_1: 9818; GFX940: ; %bb.0: 9819; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9820; GFX940-NEXT: ;;#ASMSTART 9821; GFX940-NEXT: ; def s[0:2] 9822; GFX940-NEXT: ;;#ASMEND 9823; GFX940-NEXT: ;;#ASMSTART 9824; GFX940-NEXT: ; def s[4:6] 9825; GFX940-NEXT: ;;#ASMEND 9826; GFX940-NEXT: s_mov_b32 s8, s6 9827; GFX940-NEXT: s_mov_b32 s9, s6 9828; GFX940-NEXT: s_mov_b32 s10, s4 9829; GFX940-NEXT: s_mov_b32 s11, s1 9830; GFX940-NEXT: ;;#ASMSTART 9831; GFX940-NEXT: ; use s[8:11] 9832; GFX940-NEXT: ;;#ASMEND 9833; GFX940-NEXT: s_setpc_b64 s[30:31] 9834 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9835 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9836 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1> 9837 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9838 ret void 9839} 9840 9841define void @s_shuffle_v4i32_v3i32__5_5_4_1() { 9842; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_1: 9843; GFX900: ; %bb.0: 9844; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9845; GFX900-NEXT: ;;#ASMSTART 9846; GFX900-NEXT: ; def s[4:6] 9847; GFX900-NEXT: ;;#ASMEND 9848; GFX900-NEXT: ;;#ASMSTART 9849; GFX900-NEXT: ; def s[12:14] 9850; GFX900-NEXT: ;;#ASMEND 9851; GFX900-NEXT: s_mov_b32 s8, s14 9852; GFX900-NEXT: s_mov_b32 s9, s14 9853; GFX900-NEXT: s_mov_b32 s10, s13 9854; GFX900-NEXT: s_mov_b32 s11, s5 9855; GFX900-NEXT: ;;#ASMSTART 9856; GFX900-NEXT: ; use s[8:11] 9857; GFX900-NEXT: ;;#ASMEND 9858; GFX900-NEXT: s_setpc_b64 s[30:31] 9859; 9860; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_1: 9861; GFX90A: ; %bb.0: 9862; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9863; GFX90A-NEXT: ;;#ASMSTART 9864; GFX90A-NEXT: ; def s[4:6] 9865; GFX90A-NEXT: ;;#ASMEND 9866; GFX90A-NEXT: ;;#ASMSTART 9867; GFX90A-NEXT: ; def s[12:14] 9868; GFX90A-NEXT: ;;#ASMEND 9869; GFX90A-NEXT: s_mov_b32 s8, s14 9870; GFX90A-NEXT: s_mov_b32 s9, s14 9871; GFX90A-NEXT: s_mov_b32 s10, s13 9872; GFX90A-NEXT: s_mov_b32 s11, s5 9873; GFX90A-NEXT: ;;#ASMSTART 9874; GFX90A-NEXT: ; use s[8:11] 9875; GFX90A-NEXT: ;;#ASMEND 9876; GFX90A-NEXT: s_setpc_b64 s[30:31] 9877; 9878; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_1: 9879; GFX940: ; %bb.0: 9880; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9881; GFX940-NEXT: ;;#ASMSTART 9882; GFX940-NEXT: ; def s[0:2] 9883; GFX940-NEXT: ;;#ASMEND 9884; GFX940-NEXT: ;;#ASMSTART 9885; GFX940-NEXT: ; def s[4:6] 9886; GFX940-NEXT: ;;#ASMEND 9887; GFX940-NEXT: s_mov_b32 s8, s6 9888; GFX940-NEXT: s_mov_b32 s9, s6 9889; GFX940-NEXT: s_mov_b32 s10, s5 9890; GFX940-NEXT: s_mov_b32 s11, s1 9891; GFX940-NEXT: ;;#ASMSTART 9892; GFX940-NEXT: ; use s[8:11] 9893; GFX940-NEXT: ;;#ASMEND 9894; GFX940-NEXT: s_setpc_b64 s[30:31] 9895 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9896 %vec1 = call <3 x i32> asm "; def $0", "=s"() 9897 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1> 9898 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9899 ret void 9900} 9901 9902define void @s_shuffle_v4i32_v3i32__u_2_2_2() { 9903; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_2_2_2: 9904; GFX9: ; %bb.0: 9905; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9906; GFX9-NEXT: ;;#ASMSTART 9907; GFX9-NEXT: ; def s[8:10] 9908; GFX9-NEXT: ;;#ASMEND 9909; GFX9-NEXT: s_mov_b32 s9, s10 9910; GFX9-NEXT: s_mov_b32 s11, s10 9911; GFX9-NEXT: ;;#ASMSTART 9912; GFX9-NEXT: ; use s[8:11] 9913; GFX9-NEXT: ;;#ASMEND 9914; GFX9-NEXT: s_setpc_b64 s[30:31] 9915 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9916 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2> 9917 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9918 ret void 9919} 9920 9921define void @s_shuffle_v4i32_v3i32__0_2_2_2() { 9922; GFX9-LABEL: s_shuffle_v4i32_v3i32__0_2_2_2: 9923; GFX9: ; %bb.0: 9924; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9925; GFX9-NEXT: ;;#ASMSTART 9926; GFX9-NEXT: ; def s[8:10] 9927; GFX9-NEXT: ;;#ASMEND 9928; GFX9-NEXT: s_mov_b32 s9, s10 9929; GFX9-NEXT: s_mov_b32 s11, s10 9930; GFX9-NEXT: ;;#ASMSTART 9931; GFX9-NEXT: ; use s[8:11] 9932; GFX9-NEXT: ;;#ASMEND 9933; GFX9-NEXT: s_setpc_b64 s[30:31] 9934 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9935 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2> 9936 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9937 ret void 9938} 9939 9940define void @s_shuffle_v4i32_v3i32__1_2_2_2() { 9941; GFX9-LABEL: s_shuffle_v4i32_v3i32__1_2_2_2: 9942; GFX9: ; %bb.0: 9943; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9944; GFX9-NEXT: ;;#ASMSTART 9945; GFX9-NEXT: ; def s[8:10] 9946; GFX9-NEXT: ;;#ASMEND 9947; GFX9-NEXT: s_mov_b32 s8, s9 9948; GFX9-NEXT: s_mov_b32 s9, s10 9949; GFX9-NEXT: s_mov_b32 s11, s10 9950; GFX9-NEXT: ;;#ASMSTART 9951; GFX9-NEXT: ; use s[8:11] 9952; GFX9-NEXT: ;;#ASMEND 9953; GFX9-NEXT: s_setpc_b64 s[30:31] 9954 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9955 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2> 9956 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9957 ret void 9958} 9959 9960define void @s_shuffle_v4i32_v3i32__2_2_2_2() { 9961; GFX9-LABEL: s_shuffle_v4i32_v3i32__2_2_2_2: 9962; GFX9: ; %bb.0: 9963; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9964; GFX9-NEXT: ;;#ASMSTART 9965; GFX9-NEXT: ; def s[8:10] 9966; GFX9-NEXT: ;;#ASMEND 9967; GFX9-NEXT: s_mov_b32 s8, s10 9968; GFX9-NEXT: s_mov_b32 s9, s10 9969; GFX9-NEXT: s_mov_b32 s11, s10 9970; GFX9-NEXT: ;;#ASMSTART 9971; GFX9-NEXT: ; use s[8:11] 9972; GFX9-NEXT: ;;#ASMEND 9973; GFX9-NEXT: s_setpc_b64 s[30:31] 9974 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9975 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 9976 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9977 ret void 9978} 9979 9980define void @s_shuffle_v4i32_v3i32__3_2_2_2() { 9981; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_2_2_2: 9982; GFX9: ; %bb.0: 9983; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9984; GFX9-NEXT: ;;#ASMSTART 9985; GFX9-NEXT: ; def s[8:10] 9986; GFX9-NEXT: ;;#ASMEND 9987; GFX9-NEXT: s_mov_b32 s9, s10 9988; GFX9-NEXT: s_mov_b32 s11, s10 9989; GFX9-NEXT: ;;#ASMSTART 9990; GFX9-NEXT: ; use s[8:11] 9991; GFX9-NEXT: ;;#ASMEND 9992; GFX9-NEXT: s_setpc_b64 s[30:31] 9993 %vec0 = call <3 x i32> asm "; def $0", "=s"() 9994 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2> 9995 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 9996 ret void 9997} 9998 9999define void @s_shuffle_v4i32_v3i32__4_2_2_2() { 10000; GFX900-LABEL: s_shuffle_v4i32_v3i32__4_2_2_2: 10001; GFX900: ; %bb.0: 10002; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10003; GFX900-NEXT: ;;#ASMSTART 10004; GFX900-NEXT: ; def s[8:10] 10005; GFX900-NEXT: ;;#ASMEND 10006; GFX900-NEXT: ;;#ASMSTART 10007; GFX900-NEXT: ; def s[4:6] 10008; GFX900-NEXT: ;;#ASMEND 10009; GFX900-NEXT: s_mov_b32 s8, s5 10010; GFX900-NEXT: s_mov_b32 s9, s10 10011; GFX900-NEXT: s_mov_b32 s11, s10 10012; GFX900-NEXT: ;;#ASMSTART 10013; GFX900-NEXT: ; use s[8:11] 10014; GFX900-NEXT: ;;#ASMEND 10015; GFX900-NEXT: s_setpc_b64 s[30:31] 10016; 10017; GFX90A-LABEL: s_shuffle_v4i32_v3i32__4_2_2_2: 10018; GFX90A: ; %bb.0: 10019; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10020; GFX90A-NEXT: ;;#ASMSTART 10021; GFX90A-NEXT: ; def s[8:10] 10022; GFX90A-NEXT: ;;#ASMEND 10023; GFX90A-NEXT: ;;#ASMSTART 10024; GFX90A-NEXT: ; def s[4:6] 10025; GFX90A-NEXT: ;;#ASMEND 10026; GFX90A-NEXT: s_mov_b32 s8, s5 10027; GFX90A-NEXT: s_mov_b32 s9, s10 10028; GFX90A-NEXT: s_mov_b32 s11, s10 10029; GFX90A-NEXT: ;;#ASMSTART 10030; GFX90A-NEXT: ; use s[8:11] 10031; GFX90A-NEXT: ;;#ASMEND 10032; GFX90A-NEXT: s_setpc_b64 s[30:31] 10033; 10034; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_2_2_2: 10035; GFX940: ; %bb.0: 10036; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10037; GFX940-NEXT: ;;#ASMSTART 10038; GFX940-NEXT: ; def s[8:10] 10039; GFX940-NEXT: ;;#ASMEND 10040; GFX940-NEXT: ;;#ASMSTART 10041; GFX940-NEXT: ; def s[0:2] 10042; GFX940-NEXT: ;;#ASMEND 10043; GFX940-NEXT: s_mov_b32 s8, s1 10044; GFX940-NEXT: s_mov_b32 s9, s10 10045; GFX940-NEXT: s_mov_b32 s11, s10 10046; GFX940-NEXT: ;;#ASMSTART 10047; GFX940-NEXT: ; use s[8:11] 10048; GFX940-NEXT: ;;#ASMEND 10049; GFX940-NEXT: s_setpc_b64 s[30:31] 10050 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10051 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10052 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2> 10053 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10054 ret void 10055} 10056 10057define void @s_shuffle_v4i32_v3i32__5_2_2_2() { 10058; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_2_2: 10059; GFX900: ; %bb.0: 10060; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10061; GFX900-NEXT: ;;#ASMSTART 10062; GFX900-NEXT: ; def s[8:10] 10063; GFX900-NEXT: ;;#ASMEND 10064; GFX900-NEXT: ;;#ASMSTART 10065; GFX900-NEXT: ; def s[4:6] 10066; GFX900-NEXT: ;;#ASMEND 10067; GFX900-NEXT: s_mov_b32 s8, s6 10068; GFX900-NEXT: s_mov_b32 s9, s10 10069; GFX900-NEXT: s_mov_b32 s11, s10 10070; GFX900-NEXT: ;;#ASMSTART 10071; GFX900-NEXT: ; use s[8:11] 10072; GFX900-NEXT: ;;#ASMEND 10073; GFX900-NEXT: s_setpc_b64 s[30:31] 10074; 10075; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_2_2: 10076; GFX90A: ; %bb.0: 10077; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10078; GFX90A-NEXT: ;;#ASMSTART 10079; GFX90A-NEXT: ; def s[8:10] 10080; GFX90A-NEXT: ;;#ASMEND 10081; GFX90A-NEXT: ;;#ASMSTART 10082; GFX90A-NEXT: ; def s[4:6] 10083; GFX90A-NEXT: ;;#ASMEND 10084; GFX90A-NEXT: s_mov_b32 s8, s6 10085; GFX90A-NEXT: s_mov_b32 s9, s10 10086; GFX90A-NEXT: s_mov_b32 s11, s10 10087; GFX90A-NEXT: ;;#ASMSTART 10088; GFX90A-NEXT: ; use s[8:11] 10089; GFX90A-NEXT: ;;#ASMEND 10090; GFX90A-NEXT: s_setpc_b64 s[30:31] 10091; 10092; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_2_2: 10093; GFX940: ; %bb.0: 10094; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10095; GFX940-NEXT: ;;#ASMSTART 10096; GFX940-NEXT: ; def s[8:10] 10097; GFX940-NEXT: ;;#ASMEND 10098; GFX940-NEXT: ;;#ASMSTART 10099; GFX940-NEXT: ; def s[0:2] 10100; GFX940-NEXT: ;;#ASMEND 10101; GFX940-NEXT: s_mov_b32 s8, s2 10102; GFX940-NEXT: s_mov_b32 s9, s10 10103; GFX940-NEXT: s_mov_b32 s11, s10 10104; GFX940-NEXT: ;;#ASMSTART 10105; GFX940-NEXT: ; use s[8:11] 10106; GFX940-NEXT: ;;#ASMEND 10107; GFX940-NEXT: s_setpc_b64 s[30:31] 10108 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10109 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10110 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2> 10111 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10112 ret void 10113} 10114 10115define void @s_shuffle_v4i32_v3i32__5_u_2_2() { 10116; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_2_2: 10117; GFX900: ; %bb.0: 10118; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10119; GFX900-NEXT: ;;#ASMSTART 10120; GFX900-NEXT: ; def s[8:10] 10121; GFX900-NEXT: ;;#ASMEND 10122; GFX900-NEXT: ;;#ASMSTART 10123; GFX900-NEXT: ; def s[4:6] 10124; GFX900-NEXT: ;;#ASMEND 10125; GFX900-NEXT: s_mov_b32 s8, s6 10126; GFX900-NEXT: s_mov_b32 s11, s10 10127; GFX900-NEXT: ;;#ASMSTART 10128; GFX900-NEXT: ; use s[8:11] 10129; GFX900-NEXT: ;;#ASMEND 10130; GFX900-NEXT: s_setpc_b64 s[30:31] 10131; 10132; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_2_2: 10133; GFX90A: ; %bb.0: 10134; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10135; GFX90A-NEXT: ;;#ASMSTART 10136; GFX90A-NEXT: ; def s[8:10] 10137; GFX90A-NEXT: ;;#ASMEND 10138; GFX90A-NEXT: ;;#ASMSTART 10139; GFX90A-NEXT: ; def s[4:6] 10140; GFX90A-NEXT: ;;#ASMEND 10141; GFX90A-NEXT: s_mov_b32 s8, s6 10142; GFX90A-NEXT: s_mov_b32 s11, s10 10143; GFX90A-NEXT: ;;#ASMSTART 10144; GFX90A-NEXT: ; use s[8:11] 10145; GFX90A-NEXT: ;;#ASMEND 10146; GFX90A-NEXT: s_setpc_b64 s[30:31] 10147; 10148; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_2_2: 10149; GFX940: ; %bb.0: 10150; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10151; GFX940-NEXT: ;;#ASMSTART 10152; GFX940-NEXT: ; def s[8:10] 10153; GFX940-NEXT: ;;#ASMEND 10154; GFX940-NEXT: ;;#ASMSTART 10155; GFX940-NEXT: ; def s[0:2] 10156; GFX940-NEXT: ;;#ASMEND 10157; GFX940-NEXT: s_mov_b32 s8, s2 10158; GFX940-NEXT: s_mov_b32 s11, s10 10159; GFX940-NEXT: ;;#ASMSTART 10160; GFX940-NEXT: ; use s[8:11] 10161; GFX940-NEXT: ;;#ASMEND 10162; GFX940-NEXT: s_setpc_b64 s[30:31] 10163 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10164 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10165 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2> 10166 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10167 ret void 10168} 10169 10170define void @s_shuffle_v4i32_v3i32__5_0_2_2() { 10171; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_2_2: 10172; GFX900: ; %bb.0: 10173; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10174; GFX900-NEXT: ;;#ASMSTART 10175; GFX900-NEXT: ; def s[8:10] 10176; GFX900-NEXT: ;;#ASMEND 10177; GFX900-NEXT: ;;#ASMSTART 10178; GFX900-NEXT: ; def s[4:6] 10179; GFX900-NEXT: ;;#ASMEND 10180; GFX900-NEXT: s_mov_b32 s8, s10 10181; GFX900-NEXT: s_mov_b32 s9, s4 10182; GFX900-NEXT: s_mov_b32 s10, s6 10183; GFX900-NEXT: s_mov_b32 s11, s6 10184; GFX900-NEXT: ;;#ASMSTART 10185; GFX900-NEXT: ; use s[8:11] 10186; GFX900-NEXT: ;;#ASMEND 10187; GFX900-NEXT: s_setpc_b64 s[30:31] 10188; 10189; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_2_2: 10190; GFX90A: ; %bb.0: 10191; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10192; GFX90A-NEXT: ;;#ASMSTART 10193; GFX90A-NEXT: ; def s[8:10] 10194; GFX90A-NEXT: ;;#ASMEND 10195; GFX90A-NEXT: ;;#ASMSTART 10196; GFX90A-NEXT: ; def s[4:6] 10197; GFX90A-NEXT: ;;#ASMEND 10198; GFX90A-NEXT: s_mov_b32 s8, s10 10199; GFX90A-NEXT: s_mov_b32 s9, s4 10200; GFX90A-NEXT: s_mov_b32 s10, s6 10201; GFX90A-NEXT: s_mov_b32 s11, s6 10202; GFX90A-NEXT: ;;#ASMSTART 10203; GFX90A-NEXT: ; use s[8:11] 10204; GFX90A-NEXT: ;;#ASMEND 10205; GFX90A-NEXT: s_setpc_b64 s[30:31] 10206; 10207; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_2_2: 10208; GFX940: ; %bb.0: 10209; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10210; GFX940-NEXT: ;;#ASMSTART 10211; GFX940-NEXT: ; def s[0:2] 10212; GFX940-NEXT: ;;#ASMEND 10213; GFX940-NEXT: ;;#ASMSTART 10214; GFX940-NEXT: ; def s[4:6] 10215; GFX940-NEXT: ;;#ASMEND 10216; GFX940-NEXT: s_mov_b32 s8, s6 10217; GFX940-NEXT: s_mov_b32 s9, s0 10218; GFX940-NEXT: s_mov_b32 s10, s2 10219; GFX940-NEXT: s_mov_b32 s11, s2 10220; GFX940-NEXT: ;;#ASMSTART 10221; GFX940-NEXT: ; use s[8:11] 10222; GFX940-NEXT: ;;#ASMEND 10223; GFX940-NEXT: s_setpc_b64 s[30:31] 10224 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10225 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10226 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2> 10227 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10228 ret void 10229} 10230 10231define void @s_shuffle_v4i32_v3i32__5_1_2_2() { 10232; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_2_2: 10233; GFX900: ; %bb.0: 10234; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10235; GFX900-NEXT: ;;#ASMSTART 10236; GFX900-NEXT: ; def s[8:10] 10237; GFX900-NEXT: ;;#ASMEND 10238; GFX900-NEXT: ;;#ASMSTART 10239; GFX900-NEXT: ; def s[4:6] 10240; GFX900-NEXT: ;;#ASMEND 10241; GFX900-NEXT: s_mov_b32 s8, s6 10242; GFX900-NEXT: s_mov_b32 s11, s10 10243; GFX900-NEXT: ;;#ASMSTART 10244; GFX900-NEXT: ; use s[8:11] 10245; GFX900-NEXT: ;;#ASMEND 10246; GFX900-NEXT: s_setpc_b64 s[30:31] 10247; 10248; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_2_2: 10249; GFX90A: ; %bb.0: 10250; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10251; GFX90A-NEXT: ;;#ASMSTART 10252; GFX90A-NEXT: ; def s[8:10] 10253; GFX90A-NEXT: ;;#ASMEND 10254; GFX90A-NEXT: ;;#ASMSTART 10255; GFX90A-NEXT: ; def s[4:6] 10256; GFX90A-NEXT: ;;#ASMEND 10257; GFX90A-NEXT: s_mov_b32 s8, s6 10258; GFX90A-NEXT: s_mov_b32 s11, s10 10259; GFX90A-NEXT: ;;#ASMSTART 10260; GFX90A-NEXT: ; use s[8:11] 10261; GFX90A-NEXT: ;;#ASMEND 10262; GFX90A-NEXT: s_setpc_b64 s[30:31] 10263; 10264; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_2_2: 10265; GFX940: ; %bb.0: 10266; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10267; GFX940-NEXT: ;;#ASMSTART 10268; GFX940-NEXT: ; def s[8:10] 10269; GFX940-NEXT: ;;#ASMEND 10270; GFX940-NEXT: ;;#ASMSTART 10271; GFX940-NEXT: ; def s[0:2] 10272; GFX940-NEXT: ;;#ASMEND 10273; GFX940-NEXT: s_mov_b32 s8, s2 10274; GFX940-NEXT: s_mov_b32 s11, s10 10275; GFX940-NEXT: ;;#ASMSTART 10276; GFX940-NEXT: ; use s[8:11] 10277; GFX940-NEXT: ;;#ASMEND 10278; GFX940-NEXT: s_setpc_b64 s[30:31] 10279 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10280 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10281 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2> 10282 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10283 ret void 10284} 10285 10286define void @s_shuffle_v4i32_v3i32__5_3_2_2() { 10287; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_2_2: 10288; GFX900: ; %bb.0: 10289; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10290; GFX900-NEXT: ;;#ASMSTART 10291; GFX900-NEXT: ; def s[8:10] 10292; GFX900-NEXT: ;;#ASMEND 10293; GFX900-NEXT: ;;#ASMSTART 10294; GFX900-NEXT: ; def s[4:6] 10295; GFX900-NEXT: ;;#ASMEND 10296; GFX900-NEXT: s_mov_b32 s8, s6 10297; GFX900-NEXT: s_mov_b32 s9, s4 10298; GFX900-NEXT: s_mov_b32 s11, s10 10299; GFX900-NEXT: ;;#ASMSTART 10300; GFX900-NEXT: ; use s[8:11] 10301; GFX900-NEXT: ;;#ASMEND 10302; GFX900-NEXT: s_setpc_b64 s[30:31] 10303; 10304; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_2_2: 10305; GFX90A: ; %bb.0: 10306; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10307; GFX90A-NEXT: ;;#ASMSTART 10308; GFX90A-NEXT: ; def s[8:10] 10309; GFX90A-NEXT: ;;#ASMEND 10310; GFX90A-NEXT: ;;#ASMSTART 10311; GFX90A-NEXT: ; def s[4:6] 10312; GFX90A-NEXT: ;;#ASMEND 10313; GFX90A-NEXT: s_mov_b32 s8, s6 10314; GFX90A-NEXT: s_mov_b32 s9, s4 10315; GFX90A-NEXT: s_mov_b32 s11, s10 10316; GFX90A-NEXT: ;;#ASMSTART 10317; GFX90A-NEXT: ; use s[8:11] 10318; GFX90A-NEXT: ;;#ASMEND 10319; GFX90A-NEXT: s_setpc_b64 s[30:31] 10320; 10321; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_2_2: 10322; GFX940: ; %bb.0: 10323; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10324; GFX940-NEXT: ;;#ASMSTART 10325; GFX940-NEXT: ; def s[8:10] 10326; GFX940-NEXT: ;;#ASMEND 10327; GFX940-NEXT: ;;#ASMSTART 10328; GFX940-NEXT: ; def s[0:2] 10329; GFX940-NEXT: ;;#ASMEND 10330; GFX940-NEXT: s_mov_b32 s8, s2 10331; GFX940-NEXT: s_mov_b32 s9, s0 10332; GFX940-NEXT: s_mov_b32 s11, s10 10333; GFX940-NEXT: ;;#ASMSTART 10334; GFX940-NEXT: ; use s[8:11] 10335; GFX940-NEXT: ;;#ASMEND 10336; GFX940-NEXT: s_setpc_b64 s[30:31] 10337 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10338 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10339 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2> 10340 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10341 ret void 10342} 10343 10344define void @s_shuffle_v4i32_v3i32__5_4_2_2() { 10345; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_4_2_2: 10346; GFX900: ; %bb.0: 10347; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10348; GFX900-NEXT: ;;#ASMSTART 10349; GFX900-NEXT: ; def s[8:10] 10350; GFX900-NEXT: ;;#ASMEND 10351; GFX900-NEXT: ;;#ASMSTART 10352; GFX900-NEXT: ; def s[4:6] 10353; GFX900-NEXT: ;;#ASMEND 10354; GFX900-NEXT: s_mov_b32 s8, s10 10355; GFX900-NEXT: s_mov_b32 s10, s6 10356; GFX900-NEXT: s_mov_b32 s11, s6 10357; GFX900-NEXT: ;;#ASMSTART 10358; GFX900-NEXT: ; use s[8:11] 10359; GFX900-NEXT: ;;#ASMEND 10360; GFX900-NEXT: s_setpc_b64 s[30:31] 10361; 10362; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_4_2_2: 10363; GFX90A: ; %bb.0: 10364; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10365; GFX90A-NEXT: ;;#ASMSTART 10366; GFX90A-NEXT: ; def s[8:10] 10367; GFX90A-NEXT: ;;#ASMEND 10368; GFX90A-NEXT: ;;#ASMSTART 10369; GFX90A-NEXT: ; def s[4:6] 10370; GFX90A-NEXT: ;;#ASMEND 10371; GFX90A-NEXT: s_mov_b32 s8, s10 10372; GFX90A-NEXT: s_mov_b32 s10, s6 10373; GFX90A-NEXT: s_mov_b32 s11, s6 10374; GFX90A-NEXT: ;;#ASMSTART 10375; GFX90A-NEXT: ; use s[8:11] 10376; GFX90A-NEXT: ;;#ASMEND 10377; GFX90A-NEXT: s_setpc_b64 s[30:31] 10378; 10379; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_2_2: 10380; GFX940: ; %bb.0: 10381; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10382; GFX940-NEXT: ;;#ASMSTART 10383; GFX940-NEXT: ; def s[8:10] 10384; GFX940-NEXT: ;;#ASMEND 10385; GFX940-NEXT: ;;#ASMSTART 10386; GFX940-NEXT: ; def s[0:2] 10387; GFX940-NEXT: ;;#ASMEND 10388; GFX940-NEXT: s_mov_b32 s8, s10 10389; GFX940-NEXT: s_mov_b32 s10, s2 10390; GFX940-NEXT: s_mov_b32 s11, s2 10391; GFX940-NEXT: ;;#ASMSTART 10392; GFX940-NEXT: ; use s[8:11] 10393; GFX940-NEXT: ;;#ASMEND 10394; GFX940-NEXT: s_setpc_b64 s[30:31] 10395 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10396 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10397 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2> 10398 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10399 ret void 10400} 10401 10402define void @s_shuffle_v4i32_v3i32__5_5_2_2() { 10403; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_2: 10404; GFX900: ; %bb.0: 10405; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10406; GFX900-NEXT: ;;#ASMSTART 10407; GFX900-NEXT: ; def s[8:10] 10408; GFX900-NEXT: ;;#ASMEND 10409; GFX900-NEXT: ;;#ASMSTART 10410; GFX900-NEXT: ; def s[4:6] 10411; GFX900-NEXT: ;;#ASMEND 10412; GFX900-NEXT: s_mov_b32 s8, s6 10413; GFX900-NEXT: s_mov_b32 s9, s6 10414; GFX900-NEXT: s_mov_b32 s11, s10 10415; GFX900-NEXT: ;;#ASMSTART 10416; GFX900-NEXT: ; use s[8:11] 10417; GFX900-NEXT: ;;#ASMEND 10418; GFX900-NEXT: s_setpc_b64 s[30:31] 10419; 10420; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_2: 10421; GFX90A: ; %bb.0: 10422; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10423; GFX90A-NEXT: ;;#ASMSTART 10424; GFX90A-NEXT: ; def s[8:10] 10425; GFX90A-NEXT: ;;#ASMEND 10426; GFX90A-NEXT: ;;#ASMSTART 10427; GFX90A-NEXT: ; def s[4:6] 10428; GFX90A-NEXT: ;;#ASMEND 10429; GFX90A-NEXT: s_mov_b32 s8, s6 10430; GFX90A-NEXT: s_mov_b32 s9, s6 10431; GFX90A-NEXT: s_mov_b32 s11, s10 10432; GFX90A-NEXT: ;;#ASMSTART 10433; GFX90A-NEXT: ; use s[8:11] 10434; GFX90A-NEXT: ;;#ASMEND 10435; GFX90A-NEXT: s_setpc_b64 s[30:31] 10436; 10437; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_2: 10438; GFX940: ; %bb.0: 10439; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10440; GFX940-NEXT: ;;#ASMSTART 10441; GFX940-NEXT: ; def s[8:10] 10442; GFX940-NEXT: ;;#ASMEND 10443; GFX940-NEXT: ;;#ASMSTART 10444; GFX940-NEXT: ; def s[0:2] 10445; GFX940-NEXT: ;;#ASMEND 10446; GFX940-NEXT: s_mov_b32 s8, s2 10447; GFX940-NEXT: s_mov_b32 s9, s2 10448; GFX940-NEXT: s_mov_b32 s11, s10 10449; GFX940-NEXT: ;;#ASMSTART 10450; GFX940-NEXT: ; use s[8:11] 10451; GFX940-NEXT: ;;#ASMEND 10452; GFX940-NEXT: s_setpc_b64 s[30:31] 10453 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10454 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10455 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2> 10456 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10457 ret void 10458} 10459 10460define void @s_shuffle_v4i32_v3i32__5_5_u_2() { 10461; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_2: 10462; GFX900: ; %bb.0: 10463; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10464; GFX900-NEXT: ;;#ASMSTART 10465; GFX900-NEXT: ; def s[8:10] 10466; GFX900-NEXT: ;;#ASMEND 10467; GFX900-NEXT: ;;#ASMSTART 10468; GFX900-NEXT: ; def s[4:6] 10469; GFX900-NEXT: ;;#ASMEND 10470; GFX900-NEXT: s_mov_b32 s8, s10 10471; GFX900-NEXT: s_mov_b32 s9, s10 10472; GFX900-NEXT: s_mov_b32 s11, s6 10473; GFX900-NEXT: ;;#ASMSTART 10474; GFX900-NEXT: ; use s[8:11] 10475; GFX900-NEXT: ;;#ASMEND 10476; GFX900-NEXT: s_setpc_b64 s[30:31] 10477; 10478; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_2: 10479; GFX90A: ; %bb.0: 10480; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10481; GFX90A-NEXT: ;;#ASMSTART 10482; GFX90A-NEXT: ; def s[8:10] 10483; GFX90A-NEXT: ;;#ASMEND 10484; GFX90A-NEXT: ;;#ASMSTART 10485; GFX90A-NEXT: ; def s[4:6] 10486; GFX90A-NEXT: ;;#ASMEND 10487; GFX90A-NEXT: s_mov_b32 s8, s10 10488; GFX90A-NEXT: s_mov_b32 s9, s10 10489; GFX90A-NEXT: s_mov_b32 s11, s6 10490; GFX90A-NEXT: ;;#ASMSTART 10491; GFX90A-NEXT: ; use s[8:11] 10492; GFX90A-NEXT: ;;#ASMEND 10493; GFX90A-NEXT: s_setpc_b64 s[30:31] 10494; 10495; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_2: 10496; GFX940: ; %bb.0: 10497; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10498; GFX940-NEXT: ;;#ASMSTART 10499; GFX940-NEXT: ; def s[0:2] 10500; GFX940-NEXT: ;;#ASMEND 10501; GFX940-NEXT: ;;#ASMSTART 10502; GFX940-NEXT: ; def s[4:6] 10503; GFX940-NEXT: ;;#ASMEND 10504; GFX940-NEXT: s_mov_b32 s8, s6 10505; GFX940-NEXT: s_mov_b32 s9, s6 10506; GFX940-NEXT: s_mov_b32 s11, s2 10507; GFX940-NEXT: ;;#ASMSTART 10508; GFX940-NEXT: ; use s[8:11] 10509; GFX940-NEXT: ;;#ASMEND 10510; GFX940-NEXT: s_setpc_b64 s[30:31] 10511 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10512 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10513 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2> 10514 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10515 ret void 10516} 10517 10518define void @s_shuffle_v4i32_v3i32__5_5_0_2() { 10519; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_2: 10520; GFX900: ; %bb.0: 10521; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10522; GFX900-NEXT: ;;#ASMSTART 10523; GFX900-NEXT: ; def s[8:10] 10524; GFX900-NEXT: ;;#ASMEND 10525; GFX900-NEXT: ;;#ASMSTART 10526; GFX900-NEXT: ; def s[4:6] 10527; GFX900-NEXT: ;;#ASMEND 10528; GFX900-NEXT: s_mov_b32 s8, s10 10529; GFX900-NEXT: s_mov_b32 s9, s10 10530; GFX900-NEXT: s_mov_b32 s10, s4 10531; GFX900-NEXT: s_mov_b32 s11, s6 10532; GFX900-NEXT: ;;#ASMSTART 10533; GFX900-NEXT: ; use s[8:11] 10534; GFX900-NEXT: ;;#ASMEND 10535; GFX900-NEXT: s_setpc_b64 s[30:31] 10536; 10537; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_2: 10538; GFX90A: ; %bb.0: 10539; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10540; GFX90A-NEXT: ;;#ASMSTART 10541; GFX90A-NEXT: ; def s[8:10] 10542; GFX90A-NEXT: ;;#ASMEND 10543; GFX90A-NEXT: ;;#ASMSTART 10544; GFX90A-NEXT: ; def s[4:6] 10545; GFX90A-NEXT: ;;#ASMEND 10546; GFX90A-NEXT: s_mov_b32 s8, s10 10547; GFX90A-NEXT: s_mov_b32 s9, s10 10548; GFX90A-NEXT: s_mov_b32 s10, s4 10549; GFX90A-NEXT: s_mov_b32 s11, s6 10550; GFX90A-NEXT: ;;#ASMSTART 10551; GFX90A-NEXT: ; use s[8:11] 10552; GFX90A-NEXT: ;;#ASMEND 10553; GFX90A-NEXT: s_setpc_b64 s[30:31] 10554; 10555; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_2: 10556; GFX940: ; %bb.0: 10557; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10558; GFX940-NEXT: ;;#ASMSTART 10559; GFX940-NEXT: ; def s[0:2] 10560; GFX940-NEXT: ;;#ASMEND 10561; GFX940-NEXT: ;;#ASMSTART 10562; GFX940-NEXT: ; def s[4:6] 10563; GFX940-NEXT: ;;#ASMEND 10564; GFX940-NEXT: s_mov_b32 s8, s6 10565; GFX940-NEXT: s_mov_b32 s9, s6 10566; GFX940-NEXT: s_mov_b32 s10, s0 10567; GFX940-NEXT: s_mov_b32 s11, s2 10568; GFX940-NEXT: ;;#ASMSTART 10569; GFX940-NEXT: ; use s[8:11] 10570; GFX940-NEXT: ;;#ASMEND 10571; GFX940-NEXT: s_setpc_b64 s[30:31] 10572 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10573 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10574 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2> 10575 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10576 ret void 10577} 10578 10579define void @s_shuffle_v4i32_v3i32__5_5_1_2() { 10580; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_2: 10581; GFX900: ; %bb.0: 10582; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10583; GFX900-NEXT: ;;#ASMSTART 10584; GFX900-NEXT: ; def s[8:10] 10585; GFX900-NEXT: ;;#ASMEND 10586; GFX900-NEXT: ;;#ASMSTART 10587; GFX900-NEXT: ; def s[4:6] 10588; GFX900-NEXT: ;;#ASMEND 10589; GFX900-NEXT: s_mov_b32 s8, s10 10590; GFX900-NEXT: s_mov_b32 s9, s10 10591; GFX900-NEXT: s_mov_b32 s10, s5 10592; GFX900-NEXT: s_mov_b32 s11, s6 10593; GFX900-NEXT: ;;#ASMSTART 10594; GFX900-NEXT: ; use s[8:11] 10595; GFX900-NEXT: ;;#ASMEND 10596; GFX900-NEXT: s_setpc_b64 s[30:31] 10597; 10598; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_2: 10599; GFX90A: ; %bb.0: 10600; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10601; GFX90A-NEXT: ;;#ASMSTART 10602; GFX90A-NEXT: ; def s[8:10] 10603; GFX90A-NEXT: ;;#ASMEND 10604; GFX90A-NEXT: ;;#ASMSTART 10605; GFX90A-NEXT: ; def s[4:6] 10606; GFX90A-NEXT: ;;#ASMEND 10607; GFX90A-NEXT: s_mov_b32 s8, s10 10608; GFX90A-NEXT: s_mov_b32 s9, s10 10609; GFX90A-NEXT: s_mov_b32 s10, s5 10610; GFX90A-NEXT: s_mov_b32 s11, s6 10611; GFX90A-NEXT: ;;#ASMSTART 10612; GFX90A-NEXT: ; use s[8:11] 10613; GFX90A-NEXT: ;;#ASMEND 10614; GFX90A-NEXT: s_setpc_b64 s[30:31] 10615; 10616; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_2: 10617; GFX940: ; %bb.0: 10618; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10619; GFX940-NEXT: ;;#ASMSTART 10620; GFX940-NEXT: ; def s[0:2] 10621; GFX940-NEXT: ;;#ASMEND 10622; GFX940-NEXT: ;;#ASMSTART 10623; GFX940-NEXT: ; def s[4:6] 10624; GFX940-NEXT: ;;#ASMEND 10625; GFX940-NEXT: s_mov_b32 s8, s6 10626; GFX940-NEXT: s_mov_b32 s9, s6 10627; GFX940-NEXT: s_mov_b32 s10, s1 10628; GFX940-NEXT: s_mov_b32 s11, s2 10629; GFX940-NEXT: ;;#ASMSTART 10630; GFX940-NEXT: ; use s[8:11] 10631; GFX940-NEXT: ;;#ASMEND 10632; GFX940-NEXT: s_setpc_b64 s[30:31] 10633 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10634 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10635 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2> 10636 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10637 ret void 10638} 10639 10640define void @s_shuffle_v4i32_v3i32__5_5_3_2() { 10641; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_2: 10642; GFX900: ; %bb.0: 10643; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10644; GFX900-NEXT: ;;#ASMSTART 10645; GFX900-NEXT: ; def s[4:6] 10646; GFX900-NEXT: ;;#ASMEND 10647; GFX900-NEXT: ;;#ASMSTART 10648; GFX900-NEXT: ; def s[12:14] 10649; GFX900-NEXT: ;;#ASMEND 10650; GFX900-NEXT: s_mov_b32 s8, s14 10651; GFX900-NEXT: s_mov_b32 s9, s14 10652; GFX900-NEXT: s_mov_b32 s10, s12 10653; GFX900-NEXT: s_mov_b32 s11, s6 10654; GFX900-NEXT: ;;#ASMSTART 10655; GFX900-NEXT: ; use s[8:11] 10656; GFX900-NEXT: ;;#ASMEND 10657; GFX900-NEXT: s_setpc_b64 s[30:31] 10658; 10659; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_2: 10660; GFX90A: ; %bb.0: 10661; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10662; GFX90A-NEXT: ;;#ASMSTART 10663; GFX90A-NEXT: ; def s[4:6] 10664; GFX90A-NEXT: ;;#ASMEND 10665; GFX90A-NEXT: ;;#ASMSTART 10666; GFX90A-NEXT: ; def s[12:14] 10667; GFX90A-NEXT: ;;#ASMEND 10668; GFX90A-NEXT: s_mov_b32 s8, s14 10669; GFX90A-NEXT: s_mov_b32 s9, s14 10670; GFX90A-NEXT: s_mov_b32 s10, s12 10671; GFX90A-NEXT: s_mov_b32 s11, s6 10672; GFX90A-NEXT: ;;#ASMSTART 10673; GFX90A-NEXT: ; use s[8:11] 10674; GFX90A-NEXT: ;;#ASMEND 10675; GFX90A-NEXT: s_setpc_b64 s[30:31] 10676; 10677; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_2: 10678; GFX940: ; %bb.0: 10679; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10680; GFX940-NEXT: ;;#ASMSTART 10681; GFX940-NEXT: ; def s[0:2] 10682; GFX940-NEXT: ;;#ASMEND 10683; GFX940-NEXT: ;;#ASMSTART 10684; GFX940-NEXT: ; def s[4:6] 10685; GFX940-NEXT: ;;#ASMEND 10686; GFX940-NEXT: s_mov_b32 s8, s6 10687; GFX940-NEXT: s_mov_b32 s9, s6 10688; GFX940-NEXT: s_mov_b32 s10, s4 10689; GFX940-NEXT: s_mov_b32 s11, s2 10690; GFX940-NEXT: ;;#ASMSTART 10691; GFX940-NEXT: ; use s[8:11] 10692; GFX940-NEXT: ;;#ASMEND 10693; GFX940-NEXT: s_setpc_b64 s[30:31] 10694 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10695 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10696 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2> 10697 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10698 ret void 10699} 10700 10701define void @s_shuffle_v4i32_v3i32__5_5_4_2() { 10702; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_2: 10703; GFX900: ; %bb.0: 10704; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10705; GFX900-NEXT: ;;#ASMSTART 10706; GFX900-NEXT: ; def s[4:6] 10707; GFX900-NEXT: ;;#ASMEND 10708; GFX900-NEXT: ;;#ASMSTART 10709; GFX900-NEXT: ; def s[12:14] 10710; GFX900-NEXT: ;;#ASMEND 10711; GFX900-NEXT: s_mov_b32 s8, s14 10712; GFX900-NEXT: s_mov_b32 s9, s14 10713; GFX900-NEXT: s_mov_b32 s10, s13 10714; GFX900-NEXT: s_mov_b32 s11, s6 10715; GFX900-NEXT: ;;#ASMSTART 10716; GFX900-NEXT: ; use s[8:11] 10717; GFX900-NEXT: ;;#ASMEND 10718; GFX900-NEXT: s_setpc_b64 s[30:31] 10719; 10720; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_2: 10721; GFX90A: ; %bb.0: 10722; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10723; GFX90A-NEXT: ;;#ASMSTART 10724; GFX90A-NEXT: ; def s[4:6] 10725; GFX90A-NEXT: ;;#ASMEND 10726; GFX90A-NEXT: ;;#ASMSTART 10727; GFX90A-NEXT: ; def s[12:14] 10728; GFX90A-NEXT: ;;#ASMEND 10729; GFX90A-NEXT: s_mov_b32 s8, s14 10730; GFX90A-NEXT: s_mov_b32 s9, s14 10731; GFX90A-NEXT: s_mov_b32 s10, s13 10732; GFX90A-NEXT: s_mov_b32 s11, s6 10733; GFX90A-NEXT: ;;#ASMSTART 10734; GFX90A-NEXT: ; use s[8:11] 10735; GFX90A-NEXT: ;;#ASMEND 10736; GFX90A-NEXT: s_setpc_b64 s[30:31] 10737; 10738; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_2: 10739; GFX940: ; %bb.0: 10740; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10741; GFX940-NEXT: ;;#ASMSTART 10742; GFX940-NEXT: ; def s[0:2] 10743; GFX940-NEXT: ;;#ASMEND 10744; GFX940-NEXT: ;;#ASMSTART 10745; GFX940-NEXT: ; def s[4:6] 10746; GFX940-NEXT: ;;#ASMEND 10747; GFX940-NEXT: s_mov_b32 s8, s6 10748; GFX940-NEXT: s_mov_b32 s9, s6 10749; GFX940-NEXT: s_mov_b32 s10, s5 10750; GFX940-NEXT: s_mov_b32 s11, s2 10751; GFX940-NEXT: ;;#ASMSTART 10752; GFX940-NEXT: ; use s[8:11] 10753; GFX940-NEXT: ;;#ASMEND 10754; GFX940-NEXT: s_setpc_b64 s[30:31] 10755 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10756 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10757 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2> 10758 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10759 ret void 10760} 10761 10762define void @s_shuffle_v4i32_v3i32__u_3_3_3() { 10763; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_3_3_3: 10764; GFX9: ; %bb.0: 10765; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10766; GFX9-NEXT: ;;#ASMSTART 10767; GFX9-NEXT: ; use s[8:11] 10768; GFX9-NEXT: ;;#ASMEND 10769; GFX9-NEXT: s_setpc_b64 s[30:31] 10770 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10771 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3> 10772 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10773 ret void 10774} 10775 10776define void @s_shuffle_v4i32_v3i32__0_3_3_3() { 10777; GFX900-LABEL: s_shuffle_v4i32_v3i32__0_3_3_3: 10778; GFX900: ; %bb.0: 10779; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10780; GFX900-NEXT: ;;#ASMSTART 10781; GFX900-NEXT: ; def s[8:10] 10782; GFX900-NEXT: ;;#ASMEND 10783; GFX900-NEXT: ;;#ASMSTART 10784; GFX900-NEXT: ; use s[8:11] 10785; GFX900-NEXT: ;;#ASMEND 10786; GFX900-NEXT: s_setpc_b64 s[30:31] 10787; 10788; GFX90A-LABEL: s_shuffle_v4i32_v3i32__0_3_3_3: 10789; GFX90A: ; %bb.0: 10790; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10791; GFX90A-NEXT: ;;#ASMSTART 10792; GFX90A-NEXT: ; def s[8:10] 10793; GFX90A-NEXT: ;;#ASMEND 10794; GFX90A-NEXT: ;;#ASMSTART 10795; GFX90A-NEXT: ; use s[8:11] 10796; GFX90A-NEXT: ;;#ASMEND 10797; GFX90A-NEXT: s_setpc_b64 s[30:31] 10798; 10799; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_3_3_3: 10800; GFX940: ; %bb.0: 10801; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10802; GFX940-NEXT: ;;#ASMSTART 10803; GFX940-NEXT: ; def s[8:10] 10804; GFX940-NEXT: ;;#ASMEND 10805; GFX940-NEXT: s_nop 0 10806; GFX940-NEXT: ;;#ASMSTART 10807; GFX940-NEXT: ; use s[8:11] 10808; GFX940-NEXT: ;;#ASMEND 10809; GFX940-NEXT: s_setpc_b64 s[30:31] 10810 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10811 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3> 10812 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10813 ret void 10814} 10815 10816define void @s_shuffle_v4i32_v3i32__1_3_3_3() { 10817; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_3_3_3: 10818; GFX900: ; %bb.0: 10819; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10820; GFX900-NEXT: ;;#ASMSTART 10821; GFX900-NEXT: ; def s[4:6] 10822; GFX900-NEXT: ;;#ASMEND 10823; GFX900-NEXT: s_mov_b32 s8, s5 10824; GFX900-NEXT: ;;#ASMSTART 10825; GFX900-NEXT: ; use s[8:11] 10826; GFX900-NEXT: ;;#ASMEND 10827; GFX900-NEXT: s_setpc_b64 s[30:31] 10828; 10829; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_3_3_3: 10830; GFX90A: ; %bb.0: 10831; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10832; GFX90A-NEXT: ;;#ASMSTART 10833; GFX90A-NEXT: ; def s[4:6] 10834; GFX90A-NEXT: ;;#ASMEND 10835; GFX90A-NEXT: s_mov_b32 s8, s5 10836; GFX90A-NEXT: ;;#ASMSTART 10837; GFX90A-NEXT: ; use s[8:11] 10838; GFX90A-NEXT: ;;#ASMEND 10839; GFX90A-NEXT: s_setpc_b64 s[30:31] 10840; 10841; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_3_3_3: 10842; GFX940: ; %bb.0: 10843; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10844; GFX940-NEXT: ;;#ASMSTART 10845; GFX940-NEXT: ; def s[0:2] 10846; GFX940-NEXT: ;;#ASMEND 10847; GFX940-NEXT: s_mov_b32 s8, s1 10848; GFX940-NEXT: ;;#ASMSTART 10849; GFX940-NEXT: ; use s[8:11] 10850; GFX940-NEXT: ;;#ASMEND 10851; GFX940-NEXT: s_setpc_b64 s[30:31] 10852 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10853 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3> 10854 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10855 ret void 10856} 10857 10858define void @s_shuffle_v4i32_v3i32__2_3_3_3() { 10859; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_3_3_3: 10860; GFX900: ; %bb.0: 10861; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10862; GFX900-NEXT: ;;#ASMSTART 10863; GFX900-NEXT: ; def s[4:6] 10864; GFX900-NEXT: ;;#ASMEND 10865; GFX900-NEXT: s_mov_b32 s8, s6 10866; GFX900-NEXT: ;;#ASMSTART 10867; GFX900-NEXT: ; use s[8:11] 10868; GFX900-NEXT: ;;#ASMEND 10869; GFX900-NEXT: s_setpc_b64 s[30:31] 10870; 10871; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_3_3_3: 10872; GFX90A: ; %bb.0: 10873; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10874; GFX90A-NEXT: ;;#ASMSTART 10875; GFX90A-NEXT: ; def s[4:6] 10876; GFX90A-NEXT: ;;#ASMEND 10877; GFX90A-NEXT: s_mov_b32 s8, s6 10878; GFX90A-NEXT: ;;#ASMSTART 10879; GFX90A-NEXT: ; use s[8:11] 10880; GFX90A-NEXT: ;;#ASMEND 10881; GFX90A-NEXT: s_setpc_b64 s[30:31] 10882; 10883; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_3_3_3: 10884; GFX940: ; %bb.0: 10885; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10886; GFX940-NEXT: ;;#ASMSTART 10887; GFX940-NEXT: ; def s[0:2] 10888; GFX940-NEXT: ;;#ASMEND 10889; GFX940-NEXT: s_mov_b32 s8, s2 10890; GFX940-NEXT: ;;#ASMSTART 10891; GFX940-NEXT: ; use s[8:11] 10892; GFX940-NEXT: ;;#ASMEND 10893; GFX940-NEXT: s_setpc_b64 s[30:31] 10894 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10895 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3> 10896 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10897 ret void 10898} 10899 10900define void @s_shuffle_v4i32_v3i32__3_3_3_3() { 10901; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_3_3_3: 10902; GFX9: ; %bb.0: 10903; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10904; GFX9-NEXT: ;;#ASMSTART 10905; GFX9-NEXT: ; use s[8:11] 10906; GFX9-NEXT: ;;#ASMEND 10907; GFX9-NEXT: s_setpc_b64 s[30:31] 10908 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10909 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 10910 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10911 ret void 10912} 10913 10914define void @s_shuffle_v4i32_v3i32__4_3_3_3() { 10915; GFX900-LABEL: s_shuffle_v4i32_v3i32__4_3_3_3: 10916; GFX900: ; %bb.0: 10917; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10918; GFX900-NEXT: ;;#ASMSTART 10919; GFX900-NEXT: ; def s[4:6] 10920; GFX900-NEXT: ;;#ASMEND 10921; GFX900-NEXT: s_mov_b32 s8, s5 10922; GFX900-NEXT: s_mov_b32 s9, s4 10923; GFX900-NEXT: s_mov_b32 s10, s4 10924; GFX900-NEXT: s_mov_b32 s11, s4 10925; GFX900-NEXT: ;;#ASMSTART 10926; GFX900-NEXT: ; use s[8:11] 10927; GFX900-NEXT: ;;#ASMEND 10928; GFX900-NEXT: s_setpc_b64 s[30:31] 10929; 10930; GFX90A-LABEL: s_shuffle_v4i32_v3i32__4_3_3_3: 10931; GFX90A: ; %bb.0: 10932; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10933; GFX90A-NEXT: ;;#ASMSTART 10934; GFX90A-NEXT: ; def s[4:6] 10935; GFX90A-NEXT: ;;#ASMEND 10936; GFX90A-NEXT: s_mov_b32 s8, s5 10937; GFX90A-NEXT: s_mov_b32 s9, s4 10938; GFX90A-NEXT: s_mov_b32 s10, s4 10939; GFX90A-NEXT: s_mov_b32 s11, s4 10940; GFX90A-NEXT: ;;#ASMSTART 10941; GFX90A-NEXT: ; use s[8:11] 10942; GFX90A-NEXT: ;;#ASMEND 10943; GFX90A-NEXT: s_setpc_b64 s[30:31] 10944; 10945; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_3_3_3: 10946; GFX940: ; %bb.0: 10947; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10948; GFX940-NEXT: ;;#ASMSTART 10949; GFX940-NEXT: ; def s[0:2] 10950; GFX940-NEXT: ;;#ASMEND 10951; GFX940-NEXT: s_mov_b32 s8, s1 10952; GFX940-NEXT: s_mov_b32 s9, s0 10953; GFX940-NEXT: s_mov_b32 s10, s0 10954; GFX940-NEXT: s_mov_b32 s11, s0 10955; GFX940-NEXT: ;;#ASMSTART 10956; GFX940-NEXT: ; use s[8:11] 10957; GFX940-NEXT: ;;#ASMEND 10958; GFX940-NEXT: s_setpc_b64 s[30:31] 10959 %vec0 = call <3 x i32> asm "; def $0", "=s"() 10960 %vec1 = call <3 x i32> asm "; def $0", "=s"() 10961 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3> 10962 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 10963 ret void 10964} 10965 10966define void @s_shuffle_v4i32_v3i32__5_3_3_3() { 10967; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_3_3: 10968; GFX900: ; %bb.0: 10969; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10970; GFX900-NEXT: ;;#ASMSTART 10971; GFX900-NEXT: ; def s[4:6] 10972; GFX900-NEXT: ;;#ASMEND 10973; GFX900-NEXT: s_mov_b32 s8, s6 10974; GFX900-NEXT: s_mov_b32 s9, s4 10975; GFX900-NEXT: s_mov_b32 s10, s4 10976; GFX900-NEXT: s_mov_b32 s11, s4 10977; GFX900-NEXT: ;;#ASMSTART 10978; GFX900-NEXT: ; use s[8:11] 10979; GFX900-NEXT: ;;#ASMEND 10980; GFX900-NEXT: s_setpc_b64 s[30:31] 10981; 10982; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_3_3: 10983; GFX90A: ; %bb.0: 10984; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10985; GFX90A-NEXT: ;;#ASMSTART 10986; GFX90A-NEXT: ; def s[4:6] 10987; GFX90A-NEXT: ;;#ASMEND 10988; GFX90A-NEXT: s_mov_b32 s8, s6 10989; GFX90A-NEXT: s_mov_b32 s9, s4 10990; GFX90A-NEXT: s_mov_b32 s10, s4 10991; GFX90A-NEXT: s_mov_b32 s11, s4 10992; GFX90A-NEXT: ;;#ASMSTART 10993; GFX90A-NEXT: ; use s[8:11] 10994; GFX90A-NEXT: ;;#ASMEND 10995; GFX90A-NEXT: s_setpc_b64 s[30:31] 10996; 10997; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_3_3: 10998; GFX940: ; %bb.0: 10999; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11000; GFX940-NEXT: ;;#ASMSTART 11001; GFX940-NEXT: ; def s[0:2] 11002; GFX940-NEXT: ;;#ASMEND 11003; GFX940-NEXT: s_mov_b32 s8, s2 11004; GFX940-NEXT: s_mov_b32 s9, s0 11005; GFX940-NEXT: s_mov_b32 s10, s0 11006; GFX940-NEXT: s_mov_b32 s11, s0 11007; GFX940-NEXT: ;;#ASMSTART 11008; GFX940-NEXT: ; use s[8:11] 11009; GFX940-NEXT: ;;#ASMEND 11010; GFX940-NEXT: s_setpc_b64 s[30:31] 11011 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11012 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11013 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3> 11014 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11015 ret void 11016} 11017 11018define void @s_shuffle_v4i32_v3i32__5_u_3_3() { 11019; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_3_3: 11020; GFX900: ; %bb.0: 11021; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11022; GFX900-NEXT: ;;#ASMSTART 11023; GFX900-NEXT: ; def s[4:6] 11024; GFX900-NEXT: ;;#ASMEND 11025; GFX900-NEXT: s_mov_b32 s8, s6 11026; GFX900-NEXT: s_mov_b32 s10, s4 11027; GFX900-NEXT: s_mov_b32 s11, s4 11028; GFX900-NEXT: ;;#ASMSTART 11029; GFX900-NEXT: ; use s[8:11] 11030; GFX900-NEXT: ;;#ASMEND 11031; GFX900-NEXT: s_setpc_b64 s[30:31] 11032; 11033; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_3_3: 11034; GFX90A: ; %bb.0: 11035; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11036; GFX90A-NEXT: ;;#ASMSTART 11037; GFX90A-NEXT: ; def s[4:6] 11038; GFX90A-NEXT: ;;#ASMEND 11039; GFX90A-NEXT: s_mov_b32 s8, s6 11040; GFX90A-NEXT: s_mov_b32 s10, s4 11041; GFX90A-NEXT: s_mov_b32 s11, s4 11042; GFX90A-NEXT: ;;#ASMSTART 11043; GFX90A-NEXT: ; use s[8:11] 11044; GFX90A-NEXT: ;;#ASMEND 11045; GFX90A-NEXT: s_setpc_b64 s[30:31] 11046; 11047; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_3_3: 11048; GFX940: ; %bb.0: 11049; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11050; GFX940-NEXT: ;;#ASMSTART 11051; GFX940-NEXT: ; def s[0:2] 11052; GFX940-NEXT: ;;#ASMEND 11053; GFX940-NEXT: s_mov_b32 s8, s2 11054; GFX940-NEXT: s_mov_b32 s10, s0 11055; GFX940-NEXT: s_mov_b32 s11, s0 11056; GFX940-NEXT: ;;#ASMSTART 11057; GFX940-NEXT: ; use s[8:11] 11058; GFX940-NEXT: ;;#ASMEND 11059; GFX940-NEXT: s_setpc_b64 s[30:31] 11060 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11061 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11062 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3> 11063 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11064 ret void 11065} 11066 11067define void @s_shuffle_v4i32_v3i32__5_0_3_3() { 11068; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_3_3: 11069; GFX900: ; %bb.0: 11070; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11071; GFX900-NEXT: ;;#ASMSTART 11072; GFX900-NEXT: ; def s[4:6] 11073; GFX900-NEXT: ;;#ASMEND 11074; GFX900-NEXT: ;;#ASMSTART 11075; GFX900-NEXT: ; def s[12:14] 11076; GFX900-NEXT: ;;#ASMEND 11077; GFX900-NEXT: s_mov_b32 s8, s14 11078; GFX900-NEXT: s_mov_b32 s9, s4 11079; GFX900-NEXT: s_mov_b32 s10, s12 11080; GFX900-NEXT: s_mov_b32 s11, s12 11081; GFX900-NEXT: ;;#ASMSTART 11082; GFX900-NEXT: ; use s[8:11] 11083; GFX900-NEXT: ;;#ASMEND 11084; GFX900-NEXT: s_setpc_b64 s[30:31] 11085; 11086; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_3_3: 11087; GFX90A: ; %bb.0: 11088; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11089; GFX90A-NEXT: ;;#ASMSTART 11090; GFX90A-NEXT: ; def s[4:6] 11091; GFX90A-NEXT: ;;#ASMEND 11092; GFX90A-NEXT: ;;#ASMSTART 11093; GFX90A-NEXT: ; def s[12:14] 11094; GFX90A-NEXT: ;;#ASMEND 11095; GFX90A-NEXT: s_mov_b32 s8, s14 11096; GFX90A-NEXT: s_mov_b32 s9, s4 11097; GFX90A-NEXT: s_mov_b32 s10, s12 11098; GFX90A-NEXT: s_mov_b32 s11, s12 11099; GFX90A-NEXT: ;;#ASMSTART 11100; GFX90A-NEXT: ; use s[8:11] 11101; GFX90A-NEXT: ;;#ASMEND 11102; GFX90A-NEXT: s_setpc_b64 s[30:31] 11103; 11104; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_3_3: 11105; GFX940: ; %bb.0: 11106; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11107; GFX940-NEXT: ;;#ASMSTART 11108; GFX940-NEXT: ; def s[0:2] 11109; GFX940-NEXT: ;;#ASMEND 11110; GFX940-NEXT: ;;#ASMSTART 11111; GFX940-NEXT: ; def s[4:6] 11112; GFX940-NEXT: ;;#ASMEND 11113; GFX940-NEXT: s_mov_b32 s8, s6 11114; GFX940-NEXT: s_mov_b32 s9, s0 11115; GFX940-NEXT: s_mov_b32 s10, s4 11116; GFX940-NEXT: s_mov_b32 s11, s4 11117; GFX940-NEXT: ;;#ASMSTART 11118; GFX940-NEXT: ; use s[8:11] 11119; GFX940-NEXT: ;;#ASMEND 11120; GFX940-NEXT: s_setpc_b64 s[30:31] 11121 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11122 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11123 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3> 11124 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11125 ret void 11126} 11127 11128define void @s_shuffle_v4i32_v3i32__5_1_3_3() { 11129; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_3_3: 11130; GFX900: ; %bb.0: 11131; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11132; GFX900-NEXT: ;;#ASMSTART 11133; GFX900-NEXT: ; def s[8:10] 11134; GFX900-NEXT: ;;#ASMEND 11135; GFX900-NEXT: ;;#ASMSTART 11136; GFX900-NEXT: ; def s[4:6] 11137; GFX900-NEXT: ;;#ASMEND 11138; GFX900-NEXT: s_mov_b32 s8, s6 11139; GFX900-NEXT: s_mov_b32 s10, s4 11140; GFX900-NEXT: s_mov_b32 s11, s4 11141; GFX900-NEXT: ;;#ASMSTART 11142; GFX900-NEXT: ; use s[8:11] 11143; GFX900-NEXT: ;;#ASMEND 11144; GFX900-NEXT: s_setpc_b64 s[30:31] 11145; 11146; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_3_3: 11147; GFX90A: ; %bb.0: 11148; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11149; GFX90A-NEXT: ;;#ASMSTART 11150; GFX90A-NEXT: ; def s[8:10] 11151; GFX90A-NEXT: ;;#ASMEND 11152; GFX90A-NEXT: ;;#ASMSTART 11153; GFX90A-NEXT: ; def s[4:6] 11154; GFX90A-NEXT: ;;#ASMEND 11155; GFX90A-NEXT: s_mov_b32 s8, s6 11156; GFX90A-NEXT: s_mov_b32 s10, s4 11157; GFX90A-NEXT: s_mov_b32 s11, s4 11158; GFX90A-NEXT: ;;#ASMSTART 11159; GFX90A-NEXT: ; use s[8:11] 11160; GFX90A-NEXT: ;;#ASMEND 11161; GFX90A-NEXT: s_setpc_b64 s[30:31] 11162; 11163; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_3_3: 11164; GFX940: ; %bb.0: 11165; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11166; GFX940-NEXT: ;;#ASMSTART 11167; GFX940-NEXT: ; def s[8:10] 11168; GFX940-NEXT: ;;#ASMEND 11169; GFX940-NEXT: ;;#ASMSTART 11170; GFX940-NEXT: ; def s[0:2] 11171; GFX940-NEXT: ;;#ASMEND 11172; GFX940-NEXT: s_mov_b32 s8, s2 11173; GFX940-NEXT: s_mov_b32 s10, s0 11174; GFX940-NEXT: s_mov_b32 s11, s0 11175; GFX940-NEXT: ;;#ASMSTART 11176; GFX940-NEXT: ; use s[8:11] 11177; GFX940-NEXT: ;;#ASMEND 11178; GFX940-NEXT: s_setpc_b64 s[30:31] 11179 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11180 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11181 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3> 11182 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11183 ret void 11184} 11185 11186define void @s_shuffle_v4i32_v3i32__5_2_3_3() { 11187; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_3_3: 11188; GFX900: ; %bb.0: 11189; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11190; GFX900-NEXT: ;;#ASMSTART 11191; GFX900-NEXT: ; def s[4:6] 11192; GFX900-NEXT: ;;#ASMEND 11193; GFX900-NEXT: ;;#ASMSTART 11194; GFX900-NEXT: ; def s[12:14] 11195; GFX900-NEXT: ;;#ASMEND 11196; GFX900-NEXT: s_mov_b32 s8, s14 11197; GFX900-NEXT: s_mov_b32 s9, s6 11198; GFX900-NEXT: s_mov_b32 s10, s12 11199; GFX900-NEXT: s_mov_b32 s11, s12 11200; GFX900-NEXT: ;;#ASMSTART 11201; GFX900-NEXT: ; use s[8:11] 11202; GFX900-NEXT: ;;#ASMEND 11203; GFX900-NEXT: s_setpc_b64 s[30:31] 11204; 11205; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_3_3: 11206; GFX90A: ; %bb.0: 11207; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11208; GFX90A-NEXT: ;;#ASMSTART 11209; GFX90A-NEXT: ; def s[4:6] 11210; GFX90A-NEXT: ;;#ASMEND 11211; GFX90A-NEXT: ;;#ASMSTART 11212; GFX90A-NEXT: ; def s[12:14] 11213; GFX90A-NEXT: ;;#ASMEND 11214; GFX90A-NEXT: s_mov_b32 s8, s14 11215; GFX90A-NEXT: s_mov_b32 s9, s6 11216; GFX90A-NEXT: s_mov_b32 s10, s12 11217; GFX90A-NEXT: s_mov_b32 s11, s12 11218; GFX90A-NEXT: ;;#ASMSTART 11219; GFX90A-NEXT: ; use s[8:11] 11220; GFX90A-NEXT: ;;#ASMEND 11221; GFX90A-NEXT: s_setpc_b64 s[30:31] 11222; 11223; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_3_3: 11224; GFX940: ; %bb.0: 11225; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11226; GFX940-NEXT: ;;#ASMSTART 11227; GFX940-NEXT: ; def s[0:2] 11228; GFX940-NEXT: ;;#ASMEND 11229; GFX940-NEXT: ;;#ASMSTART 11230; GFX940-NEXT: ; def s[4:6] 11231; GFX940-NEXT: ;;#ASMEND 11232; GFX940-NEXT: s_mov_b32 s8, s6 11233; GFX940-NEXT: s_mov_b32 s9, s2 11234; GFX940-NEXT: s_mov_b32 s10, s4 11235; GFX940-NEXT: s_mov_b32 s11, s4 11236; GFX940-NEXT: ;;#ASMSTART 11237; GFX940-NEXT: ; use s[8:11] 11238; GFX940-NEXT: ;;#ASMEND 11239; GFX940-NEXT: s_setpc_b64 s[30:31] 11240 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11241 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11242 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3> 11243 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11244 ret void 11245} 11246 11247define void @s_shuffle_v4i32_v3i32__5_4_3_3() { 11248; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_4_3_3: 11249; GFX900: ; %bb.0: 11250; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11251; GFX900-NEXT: ;;#ASMSTART 11252; GFX900-NEXT: ; def s[4:6] 11253; GFX900-NEXT: ;;#ASMEND 11254; GFX900-NEXT: s_mov_b32 s8, s6 11255; GFX900-NEXT: s_mov_b32 s9, s5 11256; GFX900-NEXT: s_mov_b32 s10, s4 11257; GFX900-NEXT: s_mov_b32 s11, s4 11258; GFX900-NEXT: ;;#ASMSTART 11259; GFX900-NEXT: ; use s[8:11] 11260; GFX900-NEXT: ;;#ASMEND 11261; GFX900-NEXT: s_setpc_b64 s[30:31] 11262; 11263; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_4_3_3: 11264; GFX90A: ; %bb.0: 11265; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11266; GFX90A-NEXT: ;;#ASMSTART 11267; GFX90A-NEXT: ; def s[4:6] 11268; GFX90A-NEXT: ;;#ASMEND 11269; GFX90A-NEXT: s_mov_b32 s8, s6 11270; GFX90A-NEXT: s_mov_b32 s9, s5 11271; GFX90A-NEXT: s_mov_b32 s10, s4 11272; GFX90A-NEXT: s_mov_b32 s11, s4 11273; GFX90A-NEXT: ;;#ASMSTART 11274; GFX90A-NEXT: ; use s[8:11] 11275; GFX90A-NEXT: ;;#ASMEND 11276; GFX90A-NEXT: s_setpc_b64 s[30:31] 11277; 11278; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_3_3: 11279; GFX940: ; %bb.0: 11280; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11281; GFX940-NEXT: ;;#ASMSTART 11282; GFX940-NEXT: ; def s[0:2] 11283; GFX940-NEXT: ;;#ASMEND 11284; GFX940-NEXT: s_mov_b32 s8, s2 11285; GFX940-NEXT: s_mov_b32 s9, s1 11286; GFX940-NEXT: s_mov_b32 s10, s0 11287; GFX940-NEXT: s_mov_b32 s11, s0 11288; GFX940-NEXT: ;;#ASMSTART 11289; GFX940-NEXT: ; use s[8:11] 11290; GFX940-NEXT: ;;#ASMEND 11291; GFX940-NEXT: s_setpc_b64 s[30:31] 11292 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11293 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11294 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3> 11295 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11296 ret void 11297} 11298 11299define void @s_shuffle_v4i32_v3i32__5_5_3_3() { 11300; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_3: 11301; GFX900: ; %bb.0: 11302; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11303; GFX900-NEXT: ;;#ASMSTART 11304; GFX900-NEXT: ; def s[4:6] 11305; GFX900-NEXT: ;;#ASMEND 11306; GFX900-NEXT: s_mov_b32 s8, s6 11307; GFX900-NEXT: s_mov_b32 s9, s6 11308; GFX900-NEXT: s_mov_b32 s10, s4 11309; GFX900-NEXT: s_mov_b32 s11, s4 11310; GFX900-NEXT: ;;#ASMSTART 11311; GFX900-NEXT: ; use s[8:11] 11312; GFX900-NEXT: ;;#ASMEND 11313; GFX900-NEXT: s_setpc_b64 s[30:31] 11314; 11315; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_3: 11316; GFX90A: ; %bb.0: 11317; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11318; GFX90A-NEXT: ;;#ASMSTART 11319; GFX90A-NEXT: ; def s[4:6] 11320; GFX90A-NEXT: ;;#ASMEND 11321; GFX90A-NEXT: s_mov_b32 s8, s6 11322; GFX90A-NEXT: s_mov_b32 s9, s6 11323; GFX90A-NEXT: s_mov_b32 s10, s4 11324; GFX90A-NEXT: s_mov_b32 s11, s4 11325; GFX90A-NEXT: ;;#ASMSTART 11326; GFX90A-NEXT: ; use s[8:11] 11327; GFX90A-NEXT: ;;#ASMEND 11328; GFX90A-NEXT: s_setpc_b64 s[30:31] 11329; 11330; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_3: 11331; GFX940: ; %bb.0: 11332; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11333; GFX940-NEXT: ;;#ASMSTART 11334; GFX940-NEXT: ; def s[0:2] 11335; GFX940-NEXT: ;;#ASMEND 11336; GFX940-NEXT: s_mov_b32 s8, s2 11337; GFX940-NEXT: s_mov_b32 s9, s2 11338; GFX940-NEXT: s_mov_b32 s10, s0 11339; GFX940-NEXT: s_mov_b32 s11, s0 11340; GFX940-NEXT: ;;#ASMSTART 11341; GFX940-NEXT: ; use s[8:11] 11342; GFX940-NEXT: ;;#ASMEND 11343; GFX940-NEXT: s_setpc_b64 s[30:31] 11344 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11345 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11346 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3> 11347 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11348 ret void 11349} 11350 11351define void @s_shuffle_v4i32_v3i32__5_5_u_3() { 11352; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_3: 11353; GFX900: ; %bb.0: 11354; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11355; GFX900-NEXT: ;;#ASMSTART 11356; GFX900-NEXT: ; def s[4:6] 11357; GFX900-NEXT: ;;#ASMEND 11358; GFX900-NEXT: s_mov_b32 s8, s6 11359; GFX900-NEXT: s_mov_b32 s9, s6 11360; GFX900-NEXT: s_mov_b32 s11, s4 11361; GFX900-NEXT: ;;#ASMSTART 11362; GFX900-NEXT: ; use s[8:11] 11363; GFX900-NEXT: ;;#ASMEND 11364; GFX900-NEXT: s_setpc_b64 s[30:31] 11365; 11366; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_3: 11367; GFX90A: ; %bb.0: 11368; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11369; GFX90A-NEXT: ;;#ASMSTART 11370; GFX90A-NEXT: ; def s[4:6] 11371; GFX90A-NEXT: ;;#ASMEND 11372; GFX90A-NEXT: s_mov_b32 s8, s6 11373; GFX90A-NEXT: s_mov_b32 s9, s6 11374; GFX90A-NEXT: s_mov_b32 s11, s4 11375; GFX90A-NEXT: ;;#ASMSTART 11376; GFX90A-NEXT: ; use s[8:11] 11377; GFX90A-NEXT: ;;#ASMEND 11378; GFX90A-NEXT: s_setpc_b64 s[30:31] 11379; 11380; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_3: 11381; GFX940: ; %bb.0: 11382; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11383; GFX940-NEXT: ;;#ASMSTART 11384; GFX940-NEXT: ; def s[0:2] 11385; GFX940-NEXT: ;;#ASMEND 11386; GFX940-NEXT: s_mov_b32 s8, s2 11387; GFX940-NEXT: s_mov_b32 s9, s2 11388; GFX940-NEXT: s_mov_b32 s11, s0 11389; GFX940-NEXT: ;;#ASMSTART 11390; GFX940-NEXT: ; use s[8:11] 11391; GFX940-NEXT: ;;#ASMEND 11392; GFX940-NEXT: s_setpc_b64 s[30:31] 11393 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11394 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11395 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3> 11396 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11397 ret void 11398} 11399 11400define void @s_shuffle_v4i32_v3i32__5_5_0_3() { 11401; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_3: 11402; GFX900: ; %bb.0: 11403; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11404; GFX900-NEXT: ;;#ASMSTART 11405; GFX900-NEXT: ; def s[4:6] 11406; GFX900-NEXT: ;;#ASMEND 11407; GFX900-NEXT: ;;#ASMSTART 11408; GFX900-NEXT: ; def s[12:14] 11409; GFX900-NEXT: ;;#ASMEND 11410; GFX900-NEXT: s_mov_b32 s8, s14 11411; GFX900-NEXT: s_mov_b32 s9, s14 11412; GFX900-NEXT: s_mov_b32 s10, s4 11413; GFX900-NEXT: s_mov_b32 s11, s12 11414; GFX900-NEXT: ;;#ASMSTART 11415; GFX900-NEXT: ; use s[8:11] 11416; GFX900-NEXT: ;;#ASMEND 11417; GFX900-NEXT: s_setpc_b64 s[30:31] 11418; 11419; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_3: 11420; GFX90A: ; %bb.0: 11421; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11422; GFX90A-NEXT: ;;#ASMSTART 11423; GFX90A-NEXT: ; def s[4:6] 11424; GFX90A-NEXT: ;;#ASMEND 11425; GFX90A-NEXT: ;;#ASMSTART 11426; GFX90A-NEXT: ; def s[12:14] 11427; GFX90A-NEXT: ;;#ASMEND 11428; GFX90A-NEXT: s_mov_b32 s8, s14 11429; GFX90A-NEXT: s_mov_b32 s9, s14 11430; GFX90A-NEXT: s_mov_b32 s10, s4 11431; GFX90A-NEXT: s_mov_b32 s11, s12 11432; GFX90A-NEXT: ;;#ASMSTART 11433; GFX90A-NEXT: ; use s[8:11] 11434; GFX90A-NEXT: ;;#ASMEND 11435; GFX90A-NEXT: s_setpc_b64 s[30:31] 11436; 11437; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_3: 11438; GFX940: ; %bb.0: 11439; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11440; GFX940-NEXT: ;;#ASMSTART 11441; GFX940-NEXT: ; def s[0:2] 11442; GFX940-NEXT: ;;#ASMEND 11443; GFX940-NEXT: ;;#ASMSTART 11444; GFX940-NEXT: ; def s[4:6] 11445; GFX940-NEXT: ;;#ASMEND 11446; GFX940-NEXT: s_mov_b32 s8, s6 11447; GFX940-NEXT: s_mov_b32 s9, s6 11448; GFX940-NEXT: s_mov_b32 s10, s0 11449; GFX940-NEXT: s_mov_b32 s11, s4 11450; GFX940-NEXT: ;;#ASMSTART 11451; GFX940-NEXT: ; use s[8:11] 11452; GFX940-NEXT: ;;#ASMEND 11453; GFX940-NEXT: s_setpc_b64 s[30:31] 11454 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11455 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11456 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3> 11457 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11458 ret void 11459} 11460 11461define void @s_shuffle_v4i32_v3i32__5_5_1_3() { 11462; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_3: 11463; GFX900: ; %bb.0: 11464; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11465; GFX900-NEXT: ;;#ASMSTART 11466; GFX900-NEXT: ; def s[4:6] 11467; GFX900-NEXT: ;;#ASMEND 11468; GFX900-NEXT: ;;#ASMSTART 11469; GFX900-NEXT: ; def s[12:14] 11470; GFX900-NEXT: ;;#ASMEND 11471; GFX900-NEXT: s_mov_b32 s8, s14 11472; GFX900-NEXT: s_mov_b32 s9, s14 11473; GFX900-NEXT: s_mov_b32 s10, s5 11474; GFX900-NEXT: s_mov_b32 s11, s12 11475; GFX900-NEXT: ;;#ASMSTART 11476; GFX900-NEXT: ; use s[8:11] 11477; GFX900-NEXT: ;;#ASMEND 11478; GFX900-NEXT: s_setpc_b64 s[30:31] 11479; 11480; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_3: 11481; GFX90A: ; %bb.0: 11482; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11483; GFX90A-NEXT: ;;#ASMSTART 11484; GFX90A-NEXT: ; def s[4:6] 11485; GFX90A-NEXT: ;;#ASMEND 11486; GFX90A-NEXT: ;;#ASMSTART 11487; GFX90A-NEXT: ; def s[12:14] 11488; GFX90A-NEXT: ;;#ASMEND 11489; GFX90A-NEXT: s_mov_b32 s8, s14 11490; GFX90A-NEXT: s_mov_b32 s9, s14 11491; GFX90A-NEXT: s_mov_b32 s10, s5 11492; GFX90A-NEXT: s_mov_b32 s11, s12 11493; GFX90A-NEXT: ;;#ASMSTART 11494; GFX90A-NEXT: ; use s[8:11] 11495; GFX90A-NEXT: ;;#ASMEND 11496; GFX90A-NEXT: s_setpc_b64 s[30:31] 11497; 11498; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_3: 11499; GFX940: ; %bb.0: 11500; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11501; GFX940-NEXT: ;;#ASMSTART 11502; GFX940-NEXT: ; def s[0:2] 11503; GFX940-NEXT: ;;#ASMEND 11504; GFX940-NEXT: ;;#ASMSTART 11505; GFX940-NEXT: ; def s[4:6] 11506; GFX940-NEXT: ;;#ASMEND 11507; GFX940-NEXT: s_mov_b32 s8, s6 11508; GFX940-NEXT: s_mov_b32 s9, s6 11509; GFX940-NEXT: s_mov_b32 s10, s1 11510; GFX940-NEXT: s_mov_b32 s11, s4 11511; GFX940-NEXT: ;;#ASMSTART 11512; GFX940-NEXT: ; use s[8:11] 11513; GFX940-NEXT: ;;#ASMEND 11514; GFX940-NEXT: s_setpc_b64 s[30:31] 11515 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11516 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11517 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3> 11518 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11519 ret void 11520} 11521 11522define void @s_shuffle_v4i32_v3i32__5_5_2_3() { 11523; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_3: 11524; GFX900: ; %bb.0: 11525; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11526; GFX900-NEXT: ;;#ASMSTART 11527; GFX900-NEXT: ; def s[8:10] 11528; GFX900-NEXT: ;;#ASMEND 11529; GFX900-NEXT: ;;#ASMSTART 11530; GFX900-NEXT: ; def s[4:6] 11531; GFX900-NEXT: ;;#ASMEND 11532; GFX900-NEXT: s_mov_b32 s8, s6 11533; GFX900-NEXT: s_mov_b32 s9, s6 11534; GFX900-NEXT: s_mov_b32 s11, s4 11535; GFX900-NEXT: ;;#ASMSTART 11536; GFX900-NEXT: ; use s[8:11] 11537; GFX900-NEXT: ;;#ASMEND 11538; GFX900-NEXT: s_setpc_b64 s[30:31] 11539; 11540; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_3: 11541; GFX90A: ; %bb.0: 11542; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11543; GFX90A-NEXT: ;;#ASMSTART 11544; GFX90A-NEXT: ; def s[8:10] 11545; GFX90A-NEXT: ;;#ASMEND 11546; GFX90A-NEXT: ;;#ASMSTART 11547; GFX90A-NEXT: ; def s[4:6] 11548; GFX90A-NEXT: ;;#ASMEND 11549; GFX90A-NEXT: s_mov_b32 s8, s6 11550; GFX90A-NEXT: s_mov_b32 s9, s6 11551; GFX90A-NEXT: s_mov_b32 s11, s4 11552; GFX90A-NEXT: ;;#ASMSTART 11553; GFX90A-NEXT: ; use s[8:11] 11554; GFX90A-NEXT: ;;#ASMEND 11555; GFX90A-NEXT: s_setpc_b64 s[30:31] 11556; 11557; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_3: 11558; GFX940: ; %bb.0: 11559; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11560; GFX940-NEXT: ;;#ASMSTART 11561; GFX940-NEXT: ; def s[8:10] 11562; GFX940-NEXT: ;;#ASMEND 11563; GFX940-NEXT: ;;#ASMSTART 11564; GFX940-NEXT: ; def s[0:2] 11565; GFX940-NEXT: ;;#ASMEND 11566; GFX940-NEXT: s_mov_b32 s8, s2 11567; GFX940-NEXT: s_mov_b32 s9, s2 11568; GFX940-NEXT: s_mov_b32 s11, s0 11569; GFX940-NEXT: ;;#ASMSTART 11570; GFX940-NEXT: ; use s[8:11] 11571; GFX940-NEXT: ;;#ASMEND 11572; GFX940-NEXT: s_setpc_b64 s[30:31] 11573 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11574 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11575 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3> 11576 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11577 ret void 11578} 11579 11580define void @s_shuffle_v4i32_v3i32__5_5_4_3() { 11581; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_3: 11582; GFX900: ; %bb.0: 11583; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11584; GFX900-NEXT: ;;#ASMSTART 11585; GFX900-NEXT: ; def s[4:6] 11586; GFX900-NEXT: ;;#ASMEND 11587; GFX900-NEXT: s_mov_b32 s8, s6 11588; GFX900-NEXT: s_mov_b32 s9, s6 11589; GFX900-NEXT: s_mov_b32 s10, s5 11590; GFX900-NEXT: s_mov_b32 s11, s4 11591; GFX900-NEXT: ;;#ASMSTART 11592; GFX900-NEXT: ; use s[8:11] 11593; GFX900-NEXT: ;;#ASMEND 11594; GFX900-NEXT: s_setpc_b64 s[30:31] 11595; 11596; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_3: 11597; GFX90A: ; %bb.0: 11598; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11599; GFX90A-NEXT: ;;#ASMSTART 11600; GFX90A-NEXT: ; def s[4:6] 11601; GFX90A-NEXT: ;;#ASMEND 11602; GFX90A-NEXT: s_mov_b32 s8, s6 11603; GFX90A-NEXT: s_mov_b32 s9, s6 11604; GFX90A-NEXT: s_mov_b32 s10, s5 11605; GFX90A-NEXT: s_mov_b32 s11, s4 11606; GFX90A-NEXT: ;;#ASMSTART 11607; GFX90A-NEXT: ; use s[8:11] 11608; GFX90A-NEXT: ;;#ASMEND 11609; GFX90A-NEXT: s_setpc_b64 s[30:31] 11610; 11611; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_3: 11612; GFX940: ; %bb.0: 11613; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11614; GFX940-NEXT: ;;#ASMSTART 11615; GFX940-NEXT: ; def s[0:2] 11616; GFX940-NEXT: ;;#ASMEND 11617; GFX940-NEXT: s_mov_b32 s8, s2 11618; GFX940-NEXT: s_mov_b32 s9, s2 11619; GFX940-NEXT: s_mov_b32 s10, s1 11620; GFX940-NEXT: s_mov_b32 s11, s0 11621; GFX940-NEXT: ;;#ASMSTART 11622; GFX940-NEXT: ; use s[8:11] 11623; GFX940-NEXT: ;;#ASMEND 11624; GFX940-NEXT: s_setpc_b64 s[30:31] 11625 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11626 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11627 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3> 11628 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11629 ret void 11630} 11631 11632define void @s_shuffle_v4i32_v3i32__u_4_4_4() { 11633; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_4_4_4: 11634; GFX9: ; %bb.0: 11635; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11636; GFX9-NEXT: ;;#ASMSTART 11637; GFX9-NEXT: ; def s[8:10] 11638; GFX9-NEXT: ;;#ASMEND 11639; GFX9-NEXT: s_mov_b32 s10, s9 11640; GFX9-NEXT: s_mov_b32 s11, s9 11641; GFX9-NEXT: ;;#ASMSTART 11642; GFX9-NEXT: ; use s[8:11] 11643; GFX9-NEXT: ;;#ASMEND 11644; GFX9-NEXT: s_setpc_b64 s[30:31] 11645 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11646 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11647 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4> 11648 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11649 ret void 11650} 11651 11652define void @s_shuffle_v4i32_v3i32__0_4_4_4() { 11653; GFX900-LABEL: s_shuffle_v4i32_v3i32__0_4_4_4: 11654; GFX900: ; %bb.0: 11655; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11656; GFX900-NEXT: ;;#ASMSTART 11657; GFX900-NEXT: ; def s[8:10] 11658; GFX900-NEXT: ;;#ASMEND 11659; GFX900-NEXT: ;;#ASMSTART 11660; GFX900-NEXT: ; def s[4:6] 11661; GFX900-NEXT: ;;#ASMEND 11662; GFX900-NEXT: s_mov_b32 s9, s5 11663; GFX900-NEXT: s_mov_b32 s10, s5 11664; GFX900-NEXT: s_mov_b32 s11, s5 11665; GFX900-NEXT: ;;#ASMSTART 11666; GFX900-NEXT: ; use s[8:11] 11667; GFX900-NEXT: ;;#ASMEND 11668; GFX900-NEXT: s_setpc_b64 s[30:31] 11669; 11670; GFX90A-LABEL: s_shuffle_v4i32_v3i32__0_4_4_4: 11671; GFX90A: ; %bb.0: 11672; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11673; GFX90A-NEXT: ;;#ASMSTART 11674; GFX90A-NEXT: ; def s[8:10] 11675; GFX90A-NEXT: ;;#ASMEND 11676; GFX90A-NEXT: ;;#ASMSTART 11677; GFX90A-NEXT: ; def s[4:6] 11678; GFX90A-NEXT: ;;#ASMEND 11679; GFX90A-NEXT: s_mov_b32 s9, s5 11680; GFX90A-NEXT: s_mov_b32 s10, s5 11681; GFX90A-NEXT: s_mov_b32 s11, s5 11682; GFX90A-NEXT: ;;#ASMSTART 11683; GFX90A-NEXT: ; use s[8:11] 11684; GFX90A-NEXT: ;;#ASMEND 11685; GFX90A-NEXT: s_setpc_b64 s[30:31] 11686; 11687; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_4_4_4: 11688; GFX940: ; %bb.0: 11689; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11690; GFX940-NEXT: ;;#ASMSTART 11691; GFX940-NEXT: ; def s[8:10] 11692; GFX940-NEXT: ;;#ASMEND 11693; GFX940-NEXT: ;;#ASMSTART 11694; GFX940-NEXT: ; def s[0:2] 11695; GFX940-NEXT: ;;#ASMEND 11696; GFX940-NEXT: s_mov_b32 s9, s1 11697; GFX940-NEXT: s_mov_b32 s10, s1 11698; GFX940-NEXT: s_mov_b32 s11, s1 11699; GFX940-NEXT: ;;#ASMSTART 11700; GFX940-NEXT: ; use s[8:11] 11701; GFX940-NEXT: ;;#ASMEND 11702; GFX940-NEXT: s_setpc_b64 s[30:31] 11703 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11704 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11705 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 11706 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11707 ret void 11708} 11709 11710define void @s_shuffle_v4i32_v3i32__1_4_4_4() { 11711; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_4_4_4: 11712; GFX900: ; %bb.0: 11713; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11714; GFX900-NEXT: ;;#ASMSTART 11715; GFX900-NEXT: ; def s[8:10] 11716; GFX900-NEXT: ;;#ASMEND 11717; GFX900-NEXT: ;;#ASMSTART 11718; GFX900-NEXT: ; def s[4:6] 11719; GFX900-NEXT: ;;#ASMEND 11720; GFX900-NEXT: s_mov_b32 s8, s5 11721; GFX900-NEXT: s_mov_b32 s10, s9 11722; GFX900-NEXT: s_mov_b32 s11, s9 11723; GFX900-NEXT: ;;#ASMSTART 11724; GFX900-NEXT: ; use s[8:11] 11725; GFX900-NEXT: ;;#ASMEND 11726; GFX900-NEXT: s_setpc_b64 s[30:31] 11727; 11728; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_4_4_4: 11729; GFX90A: ; %bb.0: 11730; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11731; GFX90A-NEXT: ;;#ASMSTART 11732; GFX90A-NEXT: ; def s[8:10] 11733; GFX90A-NEXT: ;;#ASMEND 11734; GFX90A-NEXT: ;;#ASMSTART 11735; GFX90A-NEXT: ; def s[4:6] 11736; GFX90A-NEXT: ;;#ASMEND 11737; GFX90A-NEXT: s_mov_b32 s8, s5 11738; GFX90A-NEXT: s_mov_b32 s10, s9 11739; GFX90A-NEXT: s_mov_b32 s11, s9 11740; GFX90A-NEXT: ;;#ASMSTART 11741; GFX90A-NEXT: ; use s[8:11] 11742; GFX90A-NEXT: ;;#ASMEND 11743; GFX90A-NEXT: s_setpc_b64 s[30:31] 11744; 11745; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_4_4_4: 11746; GFX940: ; %bb.0: 11747; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11748; GFX940-NEXT: ;;#ASMSTART 11749; GFX940-NEXT: ; def s[8:10] 11750; GFX940-NEXT: ;;#ASMEND 11751; GFX940-NEXT: ;;#ASMSTART 11752; GFX940-NEXT: ; def s[0:2] 11753; GFX940-NEXT: ;;#ASMEND 11754; GFX940-NEXT: s_mov_b32 s8, s1 11755; GFX940-NEXT: s_mov_b32 s10, s9 11756; GFX940-NEXT: s_mov_b32 s11, s9 11757; GFX940-NEXT: ;;#ASMSTART 11758; GFX940-NEXT: ; use s[8:11] 11759; GFX940-NEXT: ;;#ASMEND 11760; GFX940-NEXT: s_setpc_b64 s[30:31] 11761 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11762 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11763 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4> 11764 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11765 ret void 11766} 11767 11768define void @s_shuffle_v4i32_v3i32__2_4_4_4() { 11769; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_4_4_4: 11770; GFX900: ; %bb.0: 11771; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11772; GFX900-NEXT: ;;#ASMSTART 11773; GFX900-NEXT: ; def s[8:10] 11774; GFX900-NEXT: ;;#ASMEND 11775; GFX900-NEXT: ;;#ASMSTART 11776; GFX900-NEXT: ; def s[4:6] 11777; GFX900-NEXT: ;;#ASMEND 11778; GFX900-NEXT: s_mov_b32 s8, s6 11779; GFX900-NEXT: s_mov_b32 s10, s9 11780; GFX900-NEXT: s_mov_b32 s11, s9 11781; GFX900-NEXT: ;;#ASMSTART 11782; GFX900-NEXT: ; use s[8:11] 11783; GFX900-NEXT: ;;#ASMEND 11784; GFX900-NEXT: s_setpc_b64 s[30:31] 11785; 11786; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_4_4_4: 11787; GFX90A: ; %bb.0: 11788; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11789; GFX90A-NEXT: ;;#ASMSTART 11790; GFX90A-NEXT: ; def s[8:10] 11791; GFX90A-NEXT: ;;#ASMEND 11792; GFX90A-NEXT: ;;#ASMSTART 11793; GFX90A-NEXT: ; def s[4:6] 11794; GFX90A-NEXT: ;;#ASMEND 11795; GFX90A-NEXT: s_mov_b32 s8, s6 11796; GFX90A-NEXT: s_mov_b32 s10, s9 11797; GFX90A-NEXT: s_mov_b32 s11, s9 11798; GFX90A-NEXT: ;;#ASMSTART 11799; GFX90A-NEXT: ; use s[8:11] 11800; GFX90A-NEXT: ;;#ASMEND 11801; GFX90A-NEXT: s_setpc_b64 s[30:31] 11802; 11803; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_4_4_4: 11804; GFX940: ; %bb.0: 11805; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11806; GFX940-NEXT: ;;#ASMSTART 11807; GFX940-NEXT: ; def s[8:10] 11808; GFX940-NEXT: ;;#ASMEND 11809; GFX940-NEXT: ;;#ASMSTART 11810; GFX940-NEXT: ; def s[0:2] 11811; GFX940-NEXT: ;;#ASMEND 11812; GFX940-NEXT: s_mov_b32 s8, s2 11813; GFX940-NEXT: s_mov_b32 s10, s9 11814; GFX940-NEXT: s_mov_b32 s11, s9 11815; GFX940-NEXT: ;;#ASMSTART 11816; GFX940-NEXT: ; use s[8:11] 11817; GFX940-NEXT: ;;#ASMEND 11818; GFX940-NEXT: s_setpc_b64 s[30:31] 11819 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11820 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11821 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4> 11822 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11823 ret void 11824} 11825 11826define void @s_shuffle_v4i32_v3i32__3_4_4_4() { 11827; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_4_4_4: 11828; GFX9: ; %bb.0: 11829; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11830; GFX9-NEXT: ;;#ASMSTART 11831; GFX9-NEXT: ; def s[8:10] 11832; GFX9-NEXT: ;;#ASMEND 11833; GFX9-NEXT: s_mov_b32 s10, s9 11834; GFX9-NEXT: s_mov_b32 s11, s9 11835; GFX9-NEXT: ;;#ASMSTART 11836; GFX9-NEXT: ; use s[8:11] 11837; GFX9-NEXT: ;;#ASMEND 11838; GFX9-NEXT: s_setpc_b64 s[30:31] 11839 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11840 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11841 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4> 11842 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11843 ret void 11844} 11845 11846define void @s_shuffle_v4i32_v3i32__4_4_4_4() { 11847; GFX9-LABEL: s_shuffle_v4i32_v3i32__4_4_4_4: 11848; GFX9: ; %bb.0: 11849; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11850; GFX9-NEXT: ;;#ASMSTART 11851; GFX9-NEXT: ; def s[8:10] 11852; GFX9-NEXT: ;;#ASMEND 11853; GFX9-NEXT: s_mov_b32 s8, s9 11854; GFX9-NEXT: s_mov_b32 s10, s9 11855; GFX9-NEXT: s_mov_b32 s11, s9 11856; GFX9-NEXT: ;;#ASMSTART 11857; GFX9-NEXT: ; use s[8:11] 11858; GFX9-NEXT: ;;#ASMEND 11859; GFX9-NEXT: s_setpc_b64 s[30:31] 11860 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11861 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11862 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4> 11863 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11864 ret void 11865} 11866 11867define void @s_shuffle_v4i32_v3i32__5_4_4_4() { 11868; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_4_4_4: 11869; GFX9: ; %bb.0: 11870; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11871; GFX9-NEXT: ;;#ASMSTART 11872; GFX9-NEXT: ; def s[8:10] 11873; GFX9-NEXT: ;;#ASMEND 11874; GFX9-NEXT: s_mov_b32 s8, s10 11875; GFX9-NEXT: s_mov_b32 s10, s9 11876; GFX9-NEXT: s_mov_b32 s11, s9 11877; GFX9-NEXT: ;;#ASMSTART 11878; GFX9-NEXT: ; use s[8:11] 11879; GFX9-NEXT: ;;#ASMEND 11880; GFX9-NEXT: s_setpc_b64 s[30:31] 11881 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11882 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11883 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4> 11884 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11885 ret void 11886} 11887 11888define void @s_shuffle_v4i32_v3i32__5_u_4_4() { 11889; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_4_4: 11890; GFX900: ; %bb.0: 11891; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11892; GFX900-NEXT: ;;#ASMSTART 11893; GFX900-NEXT: ; def s[4:6] 11894; GFX900-NEXT: ;;#ASMEND 11895; GFX900-NEXT: s_mov_b32 s8, s6 11896; GFX900-NEXT: s_mov_b32 s10, s5 11897; GFX900-NEXT: s_mov_b32 s11, s5 11898; GFX900-NEXT: ;;#ASMSTART 11899; GFX900-NEXT: ; use s[8:11] 11900; GFX900-NEXT: ;;#ASMEND 11901; GFX900-NEXT: s_setpc_b64 s[30:31] 11902; 11903; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_4_4: 11904; GFX90A: ; %bb.0: 11905; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11906; GFX90A-NEXT: ;;#ASMSTART 11907; GFX90A-NEXT: ; def s[4:6] 11908; GFX90A-NEXT: ;;#ASMEND 11909; GFX90A-NEXT: s_mov_b32 s8, s6 11910; GFX90A-NEXT: s_mov_b32 s10, s5 11911; GFX90A-NEXT: s_mov_b32 s11, s5 11912; GFX90A-NEXT: ;;#ASMSTART 11913; GFX90A-NEXT: ; use s[8:11] 11914; GFX90A-NEXT: ;;#ASMEND 11915; GFX90A-NEXT: s_setpc_b64 s[30:31] 11916; 11917; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_4_4: 11918; GFX940: ; %bb.0: 11919; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11920; GFX940-NEXT: ;;#ASMSTART 11921; GFX940-NEXT: ; def s[0:2] 11922; GFX940-NEXT: ;;#ASMEND 11923; GFX940-NEXT: s_mov_b32 s8, s2 11924; GFX940-NEXT: s_mov_b32 s10, s1 11925; GFX940-NEXT: s_mov_b32 s11, s1 11926; GFX940-NEXT: ;;#ASMSTART 11927; GFX940-NEXT: ; use s[8:11] 11928; GFX940-NEXT: ;;#ASMEND 11929; GFX940-NEXT: s_setpc_b64 s[30:31] 11930 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11931 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11932 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4> 11933 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11934 ret void 11935} 11936 11937define void @s_shuffle_v4i32_v3i32__5_0_4_4() { 11938; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_4_4: 11939; GFX900: ; %bb.0: 11940; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11941; GFX900-NEXT: ;;#ASMSTART 11942; GFX900-NEXT: ; def s[4:6] 11943; GFX900-NEXT: ;;#ASMEND 11944; GFX900-NEXT: ;;#ASMSTART 11945; GFX900-NEXT: ; def s[12:14] 11946; GFX900-NEXT: ;;#ASMEND 11947; GFX900-NEXT: s_mov_b32 s8, s14 11948; GFX900-NEXT: s_mov_b32 s9, s4 11949; GFX900-NEXT: s_mov_b32 s10, s13 11950; GFX900-NEXT: s_mov_b32 s11, s13 11951; GFX900-NEXT: ;;#ASMSTART 11952; GFX900-NEXT: ; use s[8:11] 11953; GFX900-NEXT: ;;#ASMEND 11954; GFX900-NEXT: s_setpc_b64 s[30:31] 11955; 11956; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_4_4: 11957; GFX90A: ; %bb.0: 11958; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11959; GFX90A-NEXT: ;;#ASMSTART 11960; GFX90A-NEXT: ; def s[4:6] 11961; GFX90A-NEXT: ;;#ASMEND 11962; GFX90A-NEXT: ;;#ASMSTART 11963; GFX90A-NEXT: ; def s[12:14] 11964; GFX90A-NEXT: ;;#ASMEND 11965; GFX90A-NEXT: s_mov_b32 s8, s14 11966; GFX90A-NEXT: s_mov_b32 s9, s4 11967; GFX90A-NEXT: s_mov_b32 s10, s13 11968; GFX90A-NEXT: s_mov_b32 s11, s13 11969; GFX90A-NEXT: ;;#ASMSTART 11970; GFX90A-NEXT: ; use s[8:11] 11971; GFX90A-NEXT: ;;#ASMEND 11972; GFX90A-NEXT: s_setpc_b64 s[30:31] 11973; 11974; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_4_4: 11975; GFX940: ; %bb.0: 11976; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11977; GFX940-NEXT: ;;#ASMSTART 11978; GFX940-NEXT: ; def s[0:2] 11979; GFX940-NEXT: ;;#ASMEND 11980; GFX940-NEXT: ;;#ASMSTART 11981; GFX940-NEXT: ; def s[4:6] 11982; GFX940-NEXT: ;;#ASMEND 11983; GFX940-NEXT: s_mov_b32 s8, s6 11984; GFX940-NEXT: s_mov_b32 s9, s0 11985; GFX940-NEXT: s_mov_b32 s10, s5 11986; GFX940-NEXT: s_mov_b32 s11, s5 11987; GFX940-NEXT: ;;#ASMSTART 11988; GFX940-NEXT: ; use s[8:11] 11989; GFX940-NEXT: ;;#ASMEND 11990; GFX940-NEXT: s_setpc_b64 s[30:31] 11991 %vec0 = call <3 x i32> asm "; def $0", "=s"() 11992 %vec1 = call <3 x i32> asm "; def $0", "=s"() 11993 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4> 11994 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 11995 ret void 11996} 11997 11998define void @s_shuffle_v4i32_v3i32__5_1_4_4() { 11999; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_4_4: 12000; GFX900: ; %bb.0: 12001; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12002; GFX900-NEXT: ;;#ASMSTART 12003; GFX900-NEXT: ; def s[8:10] 12004; GFX900-NEXT: ;;#ASMEND 12005; GFX900-NEXT: ;;#ASMSTART 12006; GFX900-NEXT: ; def s[4:6] 12007; GFX900-NEXT: ;;#ASMEND 12008; GFX900-NEXT: s_mov_b32 s8, s6 12009; GFX900-NEXT: s_mov_b32 s10, s5 12010; GFX900-NEXT: s_mov_b32 s11, s5 12011; GFX900-NEXT: ;;#ASMSTART 12012; GFX900-NEXT: ; use s[8:11] 12013; GFX900-NEXT: ;;#ASMEND 12014; GFX900-NEXT: s_setpc_b64 s[30:31] 12015; 12016; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_4_4: 12017; GFX90A: ; %bb.0: 12018; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12019; GFX90A-NEXT: ;;#ASMSTART 12020; GFX90A-NEXT: ; def s[8:10] 12021; GFX90A-NEXT: ;;#ASMEND 12022; GFX90A-NEXT: ;;#ASMSTART 12023; GFX90A-NEXT: ; def s[4:6] 12024; GFX90A-NEXT: ;;#ASMEND 12025; GFX90A-NEXT: s_mov_b32 s8, s6 12026; GFX90A-NEXT: s_mov_b32 s10, s5 12027; GFX90A-NEXT: s_mov_b32 s11, s5 12028; GFX90A-NEXT: ;;#ASMSTART 12029; GFX90A-NEXT: ; use s[8:11] 12030; GFX90A-NEXT: ;;#ASMEND 12031; GFX90A-NEXT: s_setpc_b64 s[30:31] 12032; 12033; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_4_4: 12034; GFX940: ; %bb.0: 12035; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12036; GFX940-NEXT: ;;#ASMSTART 12037; GFX940-NEXT: ; def s[8:10] 12038; GFX940-NEXT: ;;#ASMEND 12039; GFX940-NEXT: ;;#ASMSTART 12040; GFX940-NEXT: ; def s[0:2] 12041; GFX940-NEXT: ;;#ASMEND 12042; GFX940-NEXT: s_mov_b32 s8, s2 12043; GFX940-NEXT: s_mov_b32 s10, s1 12044; GFX940-NEXT: s_mov_b32 s11, s1 12045; GFX940-NEXT: ;;#ASMSTART 12046; GFX940-NEXT: ; use s[8:11] 12047; GFX940-NEXT: ;;#ASMEND 12048; GFX940-NEXT: s_setpc_b64 s[30:31] 12049 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12050 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12051 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4> 12052 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12053 ret void 12054} 12055 12056define void @s_shuffle_v4i32_v3i32__5_2_4_4() { 12057; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_4_4: 12058; GFX900: ; %bb.0: 12059; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12060; GFX900-NEXT: ;;#ASMSTART 12061; GFX900-NEXT: ; def s[4:6] 12062; GFX900-NEXT: ;;#ASMEND 12063; GFX900-NEXT: ;;#ASMSTART 12064; GFX900-NEXT: ; def s[12:14] 12065; GFX900-NEXT: ;;#ASMEND 12066; GFX900-NEXT: s_mov_b32 s8, s14 12067; GFX900-NEXT: s_mov_b32 s9, s6 12068; GFX900-NEXT: s_mov_b32 s10, s13 12069; GFX900-NEXT: s_mov_b32 s11, s13 12070; GFX900-NEXT: ;;#ASMSTART 12071; GFX900-NEXT: ; use s[8:11] 12072; GFX900-NEXT: ;;#ASMEND 12073; GFX900-NEXT: s_setpc_b64 s[30:31] 12074; 12075; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_4_4: 12076; GFX90A: ; %bb.0: 12077; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12078; GFX90A-NEXT: ;;#ASMSTART 12079; GFX90A-NEXT: ; def s[4:6] 12080; GFX90A-NEXT: ;;#ASMEND 12081; GFX90A-NEXT: ;;#ASMSTART 12082; GFX90A-NEXT: ; def s[12:14] 12083; GFX90A-NEXT: ;;#ASMEND 12084; GFX90A-NEXT: s_mov_b32 s8, s14 12085; GFX90A-NEXT: s_mov_b32 s9, s6 12086; GFX90A-NEXT: s_mov_b32 s10, s13 12087; GFX90A-NEXT: s_mov_b32 s11, s13 12088; GFX90A-NEXT: ;;#ASMSTART 12089; GFX90A-NEXT: ; use s[8:11] 12090; GFX90A-NEXT: ;;#ASMEND 12091; GFX90A-NEXT: s_setpc_b64 s[30:31] 12092; 12093; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_4_4: 12094; GFX940: ; %bb.0: 12095; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12096; GFX940-NEXT: ;;#ASMSTART 12097; GFX940-NEXT: ; def s[0:2] 12098; GFX940-NEXT: ;;#ASMEND 12099; GFX940-NEXT: ;;#ASMSTART 12100; GFX940-NEXT: ; def s[4:6] 12101; GFX940-NEXT: ;;#ASMEND 12102; GFX940-NEXT: s_mov_b32 s8, s6 12103; GFX940-NEXT: s_mov_b32 s9, s2 12104; GFX940-NEXT: s_mov_b32 s10, s5 12105; GFX940-NEXT: s_mov_b32 s11, s5 12106; GFX940-NEXT: ;;#ASMSTART 12107; GFX940-NEXT: ; use s[8:11] 12108; GFX940-NEXT: ;;#ASMEND 12109; GFX940-NEXT: s_setpc_b64 s[30:31] 12110 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12111 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12112 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4> 12113 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12114 ret void 12115} 12116 12117define void @s_shuffle_v4i32_v3i32__5_3_4_4() { 12118; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_4_4: 12119; GFX900: ; %bb.0: 12120; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12121; GFX900-NEXT: ;;#ASMSTART 12122; GFX900-NEXT: ; def s[4:6] 12123; GFX900-NEXT: ;;#ASMEND 12124; GFX900-NEXT: s_mov_b32 s8, s6 12125; GFX900-NEXT: s_mov_b32 s9, s4 12126; GFX900-NEXT: s_mov_b32 s10, s5 12127; GFX900-NEXT: s_mov_b32 s11, s5 12128; GFX900-NEXT: ;;#ASMSTART 12129; GFX900-NEXT: ; use s[8:11] 12130; GFX900-NEXT: ;;#ASMEND 12131; GFX900-NEXT: s_setpc_b64 s[30:31] 12132; 12133; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_4_4: 12134; GFX90A: ; %bb.0: 12135; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12136; GFX90A-NEXT: ;;#ASMSTART 12137; GFX90A-NEXT: ; def s[4:6] 12138; GFX90A-NEXT: ;;#ASMEND 12139; GFX90A-NEXT: s_mov_b32 s8, s6 12140; GFX90A-NEXT: s_mov_b32 s9, s4 12141; GFX90A-NEXT: s_mov_b32 s10, s5 12142; GFX90A-NEXT: s_mov_b32 s11, s5 12143; GFX90A-NEXT: ;;#ASMSTART 12144; GFX90A-NEXT: ; use s[8:11] 12145; GFX90A-NEXT: ;;#ASMEND 12146; GFX90A-NEXT: s_setpc_b64 s[30:31] 12147; 12148; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_4_4: 12149; GFX940: ; %bb.0: 12150; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12151; GFX940-NEXT: ;;#ASMSTART 12152; GFX940-NEXT: ; def s[0:2] 12153; GFX940-NEXT: ;;#ASMEND 12154; GFX940-NEXT: s_mov_b32 s8, s2 12155; GFX940-NEXT: s_mov_b32 s9, s0 12156; GFX940-NEXT: s_mov_b32 s10, s1 12157; GFX940-NEXT: s_mov_b32 s11, s1 12158; GFX940-NEXT: ;;#ASMSTART 12159; GFX940-NEXT: ; use s[8:11] 12160; GFX940-NEXT: ;;#ASMEND 12161; GFX940-NEXT: s_setpc_b64 s[30:31] 12162 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12163 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12164 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4> 12165 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12166 ret void 12167} 12168 12169define void @s_shuffle_v4i32_v3i32__5_5_4_4() { 12170; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_4: 12171; GFX900: ; %bb.0: 12172; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12173; GFX900-NEXT: ;;#ASMSTART 12174; GFX900-NEXT: ; def s[4:6] 12175; GFX900-NEXT: ;;#ASMEND 12176; GFX900-NEXT: s_mov_b32 s8, s6 12177; GFX900-NEXT: s_mov_b32 s9, s6 12178; GFX900-NEXT: s_mov_b32 s10, s5 12179; GFX900-NEXT: s_mov_b32 s11, s5 12180; GFX900-NEXT: ;;#ASMSTART 12181; GFX900-NEXT: ; use s[8:11] 12182; GFX900-NEXT: ;;#ASMEND 12183; GFX900-NEXT: s_setpc_b64 s[30:31] 12184; 12185; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_4: 12186; GFX90A: ; %bb.0: 12187; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12188; GFX90A-NEXT: ;;#ASMSTART 12189; GFX90A-NEXT: ; def s[4:6] 12190; GFX90A-NEXT: ;;#ASMEND 12191; GFX90A-NEXT: s_mov_b32 s8, s6 12192; GFX90A-NEXT: s_mov_b32 s9, s6 12193; GFX90A-NEXT: s_mov_b32 s10, s5 12194; GFX90A-NEXT: s_mov_b32 s11, s5 12195; GFX90A-NEXT: ;;#ASMSTART 12196; GFX90A-NEXT: ; use s[8:11] 12197; GFX90A-NEXT: ;;#ASMEND 12198; GFX90A-NEXT: s_setpc_b64 s[30:31] 12199; 12200; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_4: 12201; GFX940: ; %bb.0: 12202; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12203; GFX940-NEXT: ;;#ASMSTART 12204; GFX940-NEXT: ; def s[0:2] 12205; GFX940-NEXT: ;;#ASMEND 12206; GFX940-NEXT: s_mov_b32 s8, s2 12207; GFX940-NEXT: s_mov_b32 s9, s2 12208; GFX940-NEXT: s_mov_b32 s10, s1 12209; GFX940-NEXT: s_mov_b32 s11, s1 12210; GFX940-NEXT: ;;#ASMSTART 12211; GFX940-NEXT: ; use s[8:11] 12212; GFX940-NEXT: ;;#ASMEND 12213; GFX940-NEXT: s_setpc_b64 s[30:31] 12214 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12215 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12216 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4> 12217 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12218 ret void 12219} 12220 12221define void @s_shuffle_v4i32_v3i32__5_5_u_4() { 12222; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_4: 12223; GFX900: ; %bb.0: 12224; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12225; GFX900-NEXT: ;;#ASMSTART 12226; GFX900-NEXT: ; def s[4:6] 12227; GFX900-NEXT: ;;#ASMEND 12228; GFX900-NEXT: s_mov_b32 s8, s6 12229; GFX900-NEXT: s_mov_b32 s9, s6 12230; GFX900-NEXT: s_mov_b32 s11, s5 12231; GFX900-NEXT: ;;#ASMSTART 12232; GFX900-NEXT: ; use s[8:11] 12233; GFX900-NEXT: ;;#ASMEND 12234; GFX900-NEXT: s_setpc_b64 s[30:31] 12235; 12236; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_4: 12237; GFX90A: ; %bb.0: 12238; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12239; GFX90A-NEXT: ;;#ASMSTART 12240; GFX90A-NEXT: ; def s[4:6] 12241; GFX90A-NEXT: ;;#ASMEND 12242; GFX90A-NEXT: s_mov_b32 s8, s6 12243; GFX90A-NEXT: s_mov_b32 s9, s6 12244; GFX90A-NEXT: s_mov_b32 s11, s5 12245; GFX90A-NEXT: ;;#ASMSTART 12246; GFX90A-NEXT: ; use s[8:11] 12247; GFX90A-NEXT: ;;#ASMEND 12248; GFX90A-NEXT: s_setpc_b64 s[30:31] 12249; 12250; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_4: 12251; GFX940: ; %bb.0: 12252; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12253; GFX940-NEXT: ;;#ASMSTART 12254; GFX940-NEXT: ; def s[0:2] 12255; GFX940-NEXT: ;;#ASMEND 12256; GFX940-NEXT: s_mov_b32 s8, s2 12257; GFX940-NEXT: s_mov_b32 s9, s2 12258; GFX940-NEXT: s_mov_b32 s11, s1 12259; GFX940-NEXT: ;;#ASMSTART 12260; GFX940-NEXT: ; use s[8:11] 12261; GFX940-NEXT: ;;#ASMEND 12262; GFX940-NEXT: s_setpc_b64 s[30:31] 12263 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12264 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12265 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4> 12266 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12267 ret void 12268} 12269 12270define void @s_shuffle_v4i32_v3i32__5_5_0_4() { 12271; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_4: 12272; GFX900: ; %bb.0: 12273; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12274; GFX900-NEXT: ;;#ASMSTART 12275; GFX900-NEXT: ; def s[4:6] 12276; GFX900-NEXT: ;;#ASMEND 12277; GFX900-NEXT: ;;#ASMSTART 12278; GFX900-NEXT: ; def s[12:14] 12279; GFX900-NEXT: ;;#ASMEND 12280; GFX900-NEXT: s_mov_b32 s8, s14 12281; GFX900-NEXT: s_mov_b32 s9, s14 12282; GFX900-NEXT: s_mov_b32 s10, s4 12283; GFX900-NEXT: s_mov_b32 s11, s13 12284; GFX900-NEXT: ;;#ASMSTART 12285; GFX900-NEXT: ; use s[8:11] 12286; GFX900-NEXT: ;;#ASMEND 12287; GFX900-NEXT: s_setpc_b64 s[30:31] 12288; 12289; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_4: 12290; GFX90A: ; %bb.0: 12291; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12292; GFX90A-NEXT: ;;#ASMSTART 12293; GFX90A-NEXT: ; def s[4:6] 12294; GFX90A-NEXT: ;;#ASMEND 12295; GFX90A-NEXT: ;;#ASMSTART 12296; GFX90A-NEXT: ; def s[12:14] 12297; GFX90A-NEXT: ;;#ASMEND 12298; GFX90A-NEXT: s_mov_b32 s8, s14 12299; GFX90A-NEXT: s_mov_b32 s9, s14 12300; GFX90A-NEXT: s_mov_b32 s10, s4 12301; GFX90A-NEXT: s_mov_b32 s11, s13 12302; GFX90A-NEXT: ;;#ASMSTART 12303; GFX90A-NEXT: ; use s[8:11] 12304; GFX90A-NEXT: ;;#ASMEND 12305; GFX90A-NEXT: s_setpc_b64 s[30:31] 12306; 12307; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_4: 12308; GFX940: ; %bb.0: 12309; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12310; GFX940-NEXT: ;;#ASMSTART 12311; GFX940-NEXT: ; def s[0:2] 12312; GFX940-NEXT: ;;#ASMEND 12313; GFX940-NEXT: ;;#ASMSTART 12314; GFX940-NEXT: ; def s[4:6] 12315; GFX940-NEXT: ;;#ASMEND 12316; GFX940-NEXT: s_mov_b32 s8, s6 12317; GFX940-NEXT: s_mov_b32 s9, s6 12318; GFX940-NEXT: s_mov_b32 s10, s0 12319; GFX940-NEXT: s_mov_b32 s11, s5 12320; GFX940-NEXT: ;;#ASMSTART 12321; GFX940-NEXT: ; use s[8:11] 12322; GFX940-NEXT: ;;#ASMEND 12323; GFX940-NEXT: s_setpc_b64 s[30:31] 12324 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12325 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12326 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4> 12327 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12328 ret void 12329} 12330 12331define void @s_shuffle_v4i32_v3i32__5_5_1_4() { 12332; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_4: 12333; GFX900: ; %bb.0: 12334; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12335; GFX900-NEXT: ;;#ASMSTART 12336; GFX900-NEXT: ; def s[4:6] 12337; GFX900-NEXT: ;;#ASMEND 12338; GFX900-NEXT: ;;#ASMSTART 12339; GFX900-NEXT: ; def s[12:14] 12340; GFX900-NEXT: ;;#ASMEND 12341; GFX900-NEXT: s_mov_b32 s8, s14 12342; GFX900-NEXT: s_mov_b32 s9, s14 12343; GFX900-NEXT: s_mov_b32 s10, s5 12344; GFX900-NEXT: s_mov_b32 s11, s13 12345; GFX900-NEXT: ;;#ASMSTART 12346; GFX900-NEXT: ; use s[8:11] 12347; GFX900-NEXT: ;;#ASMEND 12348; GFX900-NEXT: s_setpc_b64 s[30:31] 12349; 12350; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_4: 12351; GFX90A: ; %bb.0: 12352; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12353; GFX90A-NEXT: ;;#ASMSTART 12354; GFX90A-NEXT: ; def s[4:6] 12355; GFX90A-NEXT: ;;#ASMEND 12356; GFX90A-NEXT: ;;#ASMSTART 12357; GFX90A-NEXT: ; def s[12:14] 12358; GFX90A-NEXT: ;;#ASMEND 12359; GFX90A-NEXT: s_mov_b32 s8, s14 12360; GFX90A-NEXT: s_mov_b32 s9, s14 12361; GFX90A-NEXT: s_mov_b32 s10, s5 12362; GFX90A-NEXT: s_mov_b32 s11, s13 12363; GFX90A-NEXT: ;;#ASMSTART 12364; GFX90A-NEXT: ; use s[8:11] 12365; GFX90A-NEXT: ;;#ASMEND 12366; GFX90A-NEXT: s_setpc_b64 s[30:31] 12367; 12368; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_4: 12369; GFX940: ; %bb.0: 12370; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12371; GFX940-NEXT: ;;#ASMSTART 12372; GFX940-NEXT: ; def s[0:2] 12373; GFX940-NEXT: ;;#ASMEND 12374; GFX940-NEXT: ;;#ASMSTART 12375; GFX940-NEXT: ; def s[4:6] 12376; GFX940-NEXT: ;;#ASMEND 12377; GFX940-NEXT: s_mov_b32 s8, s6 12378; GFX940-NEXT: s_mov_b32 s9, s6 12379; GFX940-NEXT: s_mov_b32 s10, s1 12380; GFX940-NEXT: s_mov_b32 s11, s5 12381; GFX940-NEXT: ;;#ASMSTART 12382; GFX940-NEXT: ; use s[8:11] 12383; GFX940-NEXT: ;;#ASMEND 12384; GFX940-NEXT: s_setpc_b64 s[30:31] 12385 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12386 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12387 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4> 12388 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12389 ret void 12390} 12391 12392define void @s_shuffle_v4i32_v3i32__5_5_2_4() { 12393; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_4: 12394; GFX900: ; %bb.0: 12395; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12396; GFX900-NEXT: ;;#ASMSTART 12397; GFX900-NEXT: ; def s[8:10] 12398; GFX900-NEXT: ;;#ASMEND 12399; GFX900-NEXT: ;;#ASMSTART 12400; GFX900-NEXT: ; def s[4:6] 12401; GFX900-NEXT: ;;#ASMEND 12402; GFX900-NEXT: s_mov_b32 s8, s6 12403; GFX900-NEXT: s_mov_b32 s9, s6 12404; GFX900-NEXT: s_mov_b32 s11, s5 12405; GFX900-NEXT: ;;#ASMSTART 12406; GFX900-NEXT: ; use s[8:11] 12407; GFX900-NEXT: ;;#ASMEND 12408; GFX900-NEXT: s_setpc_b64 s[30:31] 12409; 12410; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_4: 12411; GFX90A: ; %bb.0: 12412; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12413; GFX90A-NEXT: ;;#ASMSTART 12414; GFX90A-NEXT: ; def s[8:10] 12415; GFX90A-NEXT: ;;#ASMEND 12416; GFX90A-NEXT: ;;#ASMSTART 12417; GFX90A-NEXT: ; def s[4:6] 12418; GFX90A-NEXT: ;;#ASMEND 12419; GFX90A-NEXT: s_mov_b32 s8, s6 12420; GFX90A-NEXT: s_mov_b32 s9, s6 12421; GFX90A-NEXT: s_mov_b32 s11, s5 12422; GFX90A-NEXT: ;;#ASMSTART 12423; GFX90A-NEXT: ; use s[8:11] 12424; GFX90A-NEXT: ;;#ASMEND 12425; GFX90A-NEXT: s_setpc_b64 s[30:31] 12426; 12427; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_4: 12428; GFX940: ; %bb.0: 12429; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12430; GFX940-NEXT: ;;#ASMSTART 12431; GFX940-NEXT: ; def s[8:10] 12432; GFX940-NEXT: ;;#ASMEND 12433; GFX940-NEXT: ;;#ASMSTART 12434; GFX940-NEXT: ; def s[0:2] 12435; GFX940-NEXT: ;;#ASMEND 12436; GFX940-NEXT: s_mov_b32 s8, s2 12437; GFX940-NEXT: s_mov_b32 s9, s2 12438; GFX940-NEXT: s_mov_b32 s11, s1 12439; GFX940-NEXT: ;;#ASMSTART 12440; GFX940-NEXT: ; use s[8:11] 12441; GFX940-NEXT: ;;#ASMEND 12442; GFX940-NEXT: s_setpc_b64 s[30:31] 12443 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12444 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12445 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4> 12446 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12447 ret void 12448} 12449 12450define void @s_shuffle_v4i32_v3i32__5_5_3_4() { 12451; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_4: 12452; GFX900: ; %bb.0: 12453; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12454; GFX900-NEXT: ;;#ASMSTART 12455; GFX900-NEXT: ; def s[4:6] 12456; GFX900-NEXT: ;;#ASMEND 12457; GFX900-NEXT: s_mov_b32 s8, s6 12458; GFX900-NEXT: s_mov_b32 s9, s6 12459; GFX900-NEXT: s_mov_b32 s10, s4 12460; GFX900-NEXT: s_mov_b32 s11, s5 12461; GFX900-NEXT: ;;#ASMSTART 12462; GFX900-NEXT: ; use s[8:11] 12463; GFX900-NEXT: ;;#ASMEND 12464; GFX900-NEXT: s_setpc_b64 s[30:31] 12465; 12466; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_4: 12467; GFX90A: ; %bb.0: 12468; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12469; GFX90A-NEXT: ;;#ASMSTART 12470; GFX90A-NEXT: ; def s[4:6] 12471; GFX90A-NEXT: ;;#ASMEND 12472; GFX90A-NEXT: s_mov_b32 s8, s6 12473; GFX90A-NEXT: s_mov_b32 s9, s6 12474; GFX90A-NEXT: s_mov_b32 s10, s4 12475; GFX90A-NEXT: s_mov_b32 s11, s5 12476; GFX90A-NEXT: ;;#ASMSTART 12477; GFX90A-NEXT: ; use s[8:11] 12478; GFX90A-NEXT: ;;#ASMEND 12479; GFX90A-NEXT: s_setpc_b64 s[30:31] 12480; 12481; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_4: 12482; GFX940: ; %bb.0: 12483; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12484; GFX940-NEXT: ;;#ASMSTART 12485; GFX940-NEXT: ; def s[0:2] 12486; GFX940-NEXT: ;;#ASMEND 12487; GFX940-NEXT: s_mov_b32 s8, s2 12488; GFX940-NEXT: s_mov_b32 s9, s2 12489; GFX940-NEXT: s_mov_b32 s10, s0 12490; GFX940-NEXT: s_mov_b32 s11, s1 12491; GFX940-NEXT: ;;#ASMSTART 12492; GFX940-NEXT: ; use s[8:11] 12493; GFX940-NEXT: ;;#ASMEND 12494; GFX940-NEXT: s_setpc_b64 s[30:31] 12495 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12496 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12497 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4> 12498 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12499 ret void 12500} 12501 12502define void @s_shuffle_v4i32_v3i32__u_5_5_5() { 12503; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_5_5_5: 12504; GFX9: ; %bb.0: 12505; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12506; GFX9-NEXT: ;;#ASMSTART 12507; GFX9-NEXT: ; def s[8:10] 12508; GFX9-NEXT: ;;#ASMEND 12509; GFX9-NEXT: s_mov_b32 s9, s10 12510; GFX9-NEXT: s_mov_b32 s11, s10 12511; GFX9-NEXT: ;;#ASMSTART 12512; GFX9-NEXT: ; use s[8:11] 12513; GFX9-NEXT: ;;#ASMEND 12514; GFX9-NEXT: s_setpc_b64 s[30:31] 12515 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12516 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12517 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5> 12518 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12519 ret void 12520} 12521 12522define void @s_shuffle_v4i32_v3i32__0_5_5_5() { 12523; GFX900-LABEL: s_shuffle_v4i32_v3i32__0_5_5_5: 12524; GFX900: ; %bb.0: 12525; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12526; GFX900-NEXT: ;;#ASMSTART 12527; GFX900-NEXT: ; def s[8:10] 12528; GFX900-NEXT: ;;#ASMEND 12529; GFX900-NEXT: ;;#ASMSTART 12530; GFX900-NEXT: ; def s[4:6] 12531; GFX900-NEXT: ;;#ASMEND 12532; GFX900-NEXT: s_mov_b32 s9, s6 12533; GFX900-NEXT: s_mov_b32 s10, s6 12534; GFX900-NEXT: s_mov_b32 s11, s6 12535; GFX900-NEXT: ;;#ASMSTART 12536; GFX900-NEXT: ; use s[8:11] 12537; GFX900-NEXT: ;;#ASMEND 12538; GFX900-NEXT: s_setpc_b64 s[30:31] 12539; 12540; GFX90A-LABEL: s_shuffle_v4i32_v3i32__0_5_5_5: 12541; GFX90A: ; %bb.0: 12542; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12543; GFX90A-NEXT: ;;#ASMSTART 12544; GFX90A-NEXT: ; def s[8:10] 12545; GFX90A-NEXT: ;;#ASMEND 12546; GFX90A-NEXT: ;;#ASMSTART 12547; GFX90A-NEXT: ; def s[4:6] 12548; GFX90A-NEXT: ;;#ASMEND 12549; GFX90A-NEXT: s_mov_b32 s9, s6 12550; GFX90A-NEXT: s_mov_b32 s10, s6 12551; GFX90A-NEXT: s_mov_b32 s11, s6 12552; GFX90A-NEXT: ;;#ASMSTART 12553; GFX90A-NEXT: ; use s[8:11] 12554; GFX90A-NEXT: ;;#ASMEND 12555; GFX90A-NEXT: s_setpc_b64 s[30:31] 12556; 12557; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_5_5_5: 12558; GFX940: ; %bb.0: 12559; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12560; GFX940-NEXT: ;;#ASMSTART 12561; GFX940-NEXT: ; def s[8:10] 12562; GFX940-NEXT: ;;#ASMEND 12563; GFX940-NEXT: ;;#ASMSTART 12564; GFX940-NEXT: ; def s[0:2] 12565; GFX940-NEXT: ;;#ASMEND 12566; GFX940-NEXT: s_mov_b32 s9, s2 12567; GFX940-NEXT: s_mov_b32 s10, s2 12568; GFX940-NEXT: s_mov_b32 s11, s2 12569; GFX940-NEXT: ;;#ASMSTART 12570; GFX940-NEXT: ; use s[8:11] 12571; GFX940-NEXT: ;;#ASMEND 12572; GFX940-NEXT: s_setpc_b64 s[30:31] 12573 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12574 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12575 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5> 12576 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12577 ret void 12578} 12579 12580define void @s_shuffle_v4i32_v3i32__1_5_5_5() { 12581; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_5_5_5: 12582; GFX900: ; %bb.0: 12583; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12584; GFX900-NEXT: ;;#ASMSTART 12585; GFX900-NEXT: ; def s[8:10] 12586; GFX900-NEXT: ;;#ASMEND 12587; GFX900-NEXT: ;;#ASMSTART 12588; GFX900-NEXT: ; def s[4:6] 12589; GFX900-NEXT: ;;#ASMEND 12590; GFX900-NEXT: s_mov_b32 s8, s5 12591; GFX900-NEXT: s_mov_b32 s9, s10 12592; GFX900-NEXT: s_mov_b32 s11, s10 12593; GFX900-NEXT: ;;#ASMSTART 12594; GFX900-NEXT: ; use s[8:11] 12595; GFX900-NEXT: ;;#ASMEND 12596; GFX900-NEXT: s_setpc_b64 s[30:31] 12597; 12598; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_5_5_5: 12599; GFX90A: ; %bb.0: 12600; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12601; GFX90A-NEXT: ;;#ASMSTART 12602; GFX90A-NEXT: ; def s[8:10] 12603; GFX90A-NEXT: ;;#ASMEND 12604; GFX90A-NEXT: ;;#ASMSTART 12605; GFX90A-NEXT: ; def s[4:6] 12606; GFX90A-NEXT: ;;#ASMEND 12607; GFX90A-NEXT: s_mov_b32 s8, s5 12608; GFX90A-NEXT: s_mov_b32 s9, s10 12609; GFX90A-NEXT: s_mov_b32 s11, s10 12610; GFX90A-NEXT: ;;#ASMSTART 12611; GFX90A-NEXT: ; use s[8:11] 12612; GFX90A-NEXT: ;;#ASMEND 12613; GFX90A-NEXT: s_setpc_b64 s[30:31] 12614; 12615; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_5_5_5: 12616; GFX940: ; %bb.0: 12617; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12618; GFX940-NEXT: ;;#ASMSTART 12619; GFX940-NEXT: ; def s[8:10] 12620; GFX940-NEXT: ;;#ASMEND 12621; GFX940-NEXT: ;;#ASMSTART 12622; GFX940-NEXT: ; def s[0:2] 12623; GFX940-NEXT: ;;#ASMEND 12624; GFX940-NEXT: s_mov_b32 s8, s1 12625; GFX940-NEXT: s_mov_b32 s9, s10 12626; GFX940-NEXT: s_mov_b32 s11, s10 12627; GFX940-NEXT: ;;#ASMSTART 12628; GFX940-NEXT: ; use s[8:11] 12629; GFX940-NEXT: ;;#ASMEND 12630; GFX940-NEXT: s_setpc_b64 s[30:31] 12631 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12632 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12633 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5> 12634 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12635 ret void 12636} 12637 12638define void @s_shuffle_v4i32_v3i32__2_5_5_5() { 12639; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_5_5_5: 12640; GFX900: ; %bb.0: 12641; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12642; GFX900-NEXT: ;;#ASMSTART 12643; GFX900-NEXT: ; def s[8:10] 12644; GFX900-NEXT: ;;#ASMEND 12645; GFX900-NEXT: ;;#ASMSTART 12646; GFX900-NEXT: ; def s[4:6] 12647; GFX900-NEXT: ;;#ASMEND 12648; GFX900-NEXT: s_mov_b32 s8, s6 12649; GFX900-NEXT: s_mov_b32 s9, s10 12650; GFX900-NEXT: s_mov_b32 s11, s10 12651; GFX900-NEXT: ;;#ASMSTART 12652; GFX900-NEXT: ; use s[8:11] 12653; GFX900-NEXT: ;;#ASMEND 12654; GFX900-NEXT: s_setpc_b64 s[30:31] 12655; 12656; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_5_5_5: 12657; GFX90A: ; %bb.0: 12658; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12659; GFX90A-NEXT: ;;#ASMSTART 12660; GFX90A-NEXT: ; def s[8:10] 12661; GFX90A-NEXT: ;;#ASMEND 12662; GFX90A-NEXT: ;;#ASMSTART 12663; GFX90A-NEXT: ; def s[4:6] 12664; GFX90A-NEXT: ;;#ASMEND 12665; GFX90A-NEXT: s_mov_b32 s8, s6 12666; GFX90A-NEXT: s_mov_b32 s9, s10 12667; GFX90A-NEXT: s_mov_b32 s11, s10 12668; GFX90A-NEXT: ;;#ASMSTART 12669; GFX90A-NEXT: ; use s[8:11] 12670; GFX90A-NEXT: ;;#ASMEND 12671; GFX90A-NEXT: s_setpc_b64 s[30:31] 12672; 12673; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_5_5_5: 12674; GFX940: ; %bb.0: 12675; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12676; GFX940-NEXT: ;;#ASMSTART 12677; GFX940-NEXT: ; def s[8:10] 12678; GFX940-NEXT: ;;#ASMEND 12679; GFX940-NEXT: ;;#ASMSTART 12680; GFX940-NEXT: ; def s[0:2] 12681; GFX940-NEXT: ;;#ASMEND 12682; GFX940-NEXT: s_mov_b32 s8, s2 12683; GFX940-NEXT: s_mov_b32 s9, s10 12684; GFX940-NEXT: s_mov_b32 s11, s10 12685; GFX940-NEXT: ;;#ASMSTART 12686; GFX940-NEXT: ; use s[8:11] 12687; GFX940-NEXT: ;;#ASMEND 12688; GFX940-NEXT: s_setpc_b64 s[30:31] 12689 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12690 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12691 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5> 12692 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12693 ret void 12694} 12695 12696define void @s_shuffle_v4i32_v3i32__3_5_5_5() { 12697; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_5_5_5: 12698; GFX9: ; %bb.0: 12699; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12700; GFX9-NEXT: ;;#ASMSTART 12701; GFX9-NEXT: ; def s[8:10] 12702; GFX9-NEXT: ;;#ASMEND 12703; GFX9-NEXT: s_mov_b32 s9, s10 12704; GFX9-NEXT: s_mov_b32 s11, s10 12705; GFX9-NEXT: ;;#ASMSTART 12706; GFX9-NEXT: ; use s[8:11] 12707; GFX9-NEXT: ;;#ASMEND 12708; GFX9-NEXT: s_setpc_b64 s[30:31] 12709 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12710 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12711 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5> 12712 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12713 ret void 12714} 12715 12716define void @s_shuffle_v4i32_v3i32__4_5_5_5() { 12717; GFX9-LABEL: s_shuffle_v4i32_v3i32__4_5_5_5: 12718; GFX9: ; %bb.0: 12719; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12720; GFX9-NEXT: ;;#ASMSTART 12721; GFX9-NEXT: ; def s[8:10] 12722; GFX9-NEXT: ;;#ASMEND 12723; GFX9-NEXT: s_mov_b32 s8, s9 12724; GFX9-NEXT: s_mov_b32 s9, s10 12725; GFX9-NEXT: s_mov_b32 s11, s10 12726; GFX9-NEXT: ;;#ASMSTART 12727; GFX9-NEXT: ; use s[8:11] 12728; GFX9-NEXT: ;;#ASMEND 12729; GFX9-NEXT: s_setpc_b64 s[30:31] 12730 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12731 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12732 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5> 12733 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12734 ret void 12735} 12736 12737define void @s_shuffle_v4i32_v3i32__5_u_5_5() { 12738; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_u_5_5: 12739; GFX9: ; %bb.0: 12740; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12741; GFX9-NEXT: ;;#ASMSTART 12742; GFX9-NEXT: ; def s[8:10] 12743; GFX9-NEXT: ;;#ASMEND 12744; GFX9-NEXT: s_mov_b32 s8, s10 12745; GFX9-NEXT: s_mov_b32 s11, s10 12746; GFX9-NEXT: ;;#ASMSTART 12747; GFX9-NEXT: ; use s[8:11] 12748; GFX9-NEXT: ;;#ASMEND 12749; GFX9-NEXT: s_setpc_b64 s[30:31] 12750 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12751 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12752 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5> 12753 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12754 ret void 12755} 12756 12757define void @s_shuffle_v4i32_v3i32__5_0_5_5() { 12758; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_5_5: 12759; GFX900: ; %bb.0: 12760; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12761; GFX900-NEXT: ;;#ASMSTART 12762; GFX900-NEXT: ; def s[8:10] 12763; GFX900-NEXT: ;;#ASMEND 12764; GFX900-NEXT: ;;#ASMSTART 12765; GFX900-NEXT: ; def s[4:6] 12766; GFX900-NEXT: ;;#ASMEND 12767; GFX900-NEXT: s_mov_b32 s8, s10 12768; GFX900-NEXT: s_mov_b32 s9, s4 12769; GFX900-NEXT: s_mov_b32 s11, s10 12770; GFX900-NEXT: ;;#ASMSTART 12771; GFX900-NEXT: ; use s[8:11] 12772; GFX900-NEXT: ;;#ASMEND 12773; GFX900-NEXT: s_setpc_b64 s[30:31] 12774; 12775; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_5_5: 12776; GFX90A: ; %bb.0: 12777; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12778; GFX90A-NEXT: ;;#ASMSTART 12779; GFX90A-NEXT: ; def s[8:10] 12780; GFX90A-NEXT: ;;#ASMEND 12781; GFX90A-NEXT: ;;#ASMSTART 12782; GFX90A-NEXT: ; def s[4:6] 12783; GFX90A-NEXT: ;;#ASMEND 12784; GFX90A-NEXT: s_mov_b32 s8, s10 12785; GFX90A-NEXT: s_mov_b32 s9, s4 12786; GFX90A-NEXT: s_mov_b32 s11, s10 12787; GFX90A-NEXT: ;;#ASMSTART 12788; GFX90A-NEXT: ; use s[8:11] 12789; GFX90A-NEXT: ;;#ASMEND 12790; GFX90A-NEXT: s_setpc_b64 s[30:31] 12791; 12792; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_5_5: 12793; GFX940: ; %bb.0: 12794; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12795; GFX940-NEXT: ;;#ASMSTART 12796; GFX940-NEXT: ; def s[8:10] 12797; GFX940-NEXT: ;;#ASMEND 12798; GFX940-NEXT: ;;#ASMSTART 12799; GFX940-NEXT: ; def s[0:2] 12800; GFX940-NEXT: ;;#ASMEND 12801; GFX940-NEXT: s_mov_b32 s8, s10 12802; GFX940-NEXT: s_mov_b32 s9, s0 12803; GFX940-NEXT: s_mov_b32 s11, s10 12804; GFX940-NEXT: ;;#ASMSTART 12805; GFX940-NEXT: ; use s[8:11] 12806; GFX940-NEXT: ;;#ASMEND 12807; GFX940-NEXT: s_setpc_b64 s[30:31] 12808 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12809 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12810 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5> 12811 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12812 ret void 12813} 12814 12815define void @s_shuffle_v4i32_v3i32__5_1_5_5() { 12816; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_5_5: 12817; GFX900: ; %bb.0: 12818; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12819; GFX900-NEXT: ;;#ASMSTART 12820; GFX900-NEXT: ; def s[8:10] 12821; GFX900-NEXT: ;;#ASMEND 12822; GFX900-NEXT: ;;#ASMSTART 12823; GFX900-NEXT: ; def s[4:6] 12824; GFX900-NEXT: ;;#ASMEND 12825; GFX900-NEXT: s_mov_b32 s8, s6 12826; GFX900-NEXT: s_mov_b32 s10, s6 12827; GFX900-NEXT: s_mov_b32 s11, s6 12828; GFX900-NEXT: ;;#ASMSTART 12829; GFX900-NEXT: ; use s[8:11] 12830; GFX900-NEXT: ;;#ASMEND 12831; GFX900-NEXT: s_setpc_b64 s[30:31] 12832; 12833; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_5_5: 12834; GFX90A: ; %bb.0: 12835; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12836; GFX90A-NEXT: ;;#ASMSTART 12837; GFX90A-NEXT: ; def s[8:10] 12838; GFX90A-NEXT: ;;#ASMEND 12839; GFX90A-NEXT: ;;#ASMSTART 12840; GFX90A-NEXT: ; def s[4:6] 12841; GFX90A-NEXT: ;;#ASMEND 12842; GFX90A-NEXT: s_mov_b32 s8, s6 12843; GFX90A-NEXT: s_mov_b32 s10, s6 12844; GFX90A-NEXT: s_mov_b32 s11, s6 12845; GFX90A-NEXT: ;;#ASMSTART 12846; GFX90A-NEXT: ; use s[8:11] 12847; GFX90A-NEXT: ;;#ASMEND 12848; GFX90A-NEXT: s_setpc_b64 s[30:31] 12849; 12850; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_5_5: 12851; GFX940: ; %bb.0: 12852; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12853; GFX940-NEXT: ;;#ASMSTART 12854; GFX940-NEXT: ; def s[8:10] 12855; GFX940-NEXT: ;;#ASMEND 12856; GFX940-NEXT: ;;#ASMSTART 12857; GFX940-NEXT: ; def s[0:2] 12858; GFX940-NEXT: ;;#ASMEND 12859; GFX940-NEXT: s_mov_b32 s8, s2 12860; GFX940-NEXT: s_mov_b32 s10, s2 12861; GFX940-NEXT: s_mov_b32 s11, s2 12862; GFX940-NEXT: ;;#ASMSTART 12863; GFX940-NEXT: ; use s[8:11] 12864; GFX940-NEXT: ;;#ASMEND 12865; GFX940-NEXT: s_setpc_b64 s[30:31] 12866 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12867 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12868 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5> 12869 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12870 ret void 12871} 12872 12873define void @s_shuffle_v4i32_v3i32__5_2_5_5() { 12874; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_5_5: 12875; GFX900: ; %bb.0: 12876; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12877; GFX900-NEXT: ;;#ASMSTART 12878; GFX900-NEXT: ; def s[8:10] 12879; GFX900-NEXT: ;;#ASMEND 12880; GFX900-NEXT: ;;#ASMSTART 12881; GFX900-NEXT: ; def s[4:6] 12882; GFX900-NEXT: ;;#ASMEND 12883; GFX900-NEXT: s_mov_b32 s8, s10 12884; GFX900-NEXT: s_mov_b32 s9, s6 12885; GFX900-NEXT: s_mov_b32 s11, s10 12886; GFX900-NEXT: ;;#ASMSTART 12887; GFX900-NEXT: ; use s[8:11] 12888; GFX900-NEXT: ;;#ASMEND 12889; GFX900-NEXT: s_setpc_b64 s[30:31] 12890; 12891; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_5_5: 12892; GFX90A: ; %bb.0: 12893; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12894; GFX90A-NEXT: ;;#ASMSTART 12895; GFX90A-NEXT: ; def s[8:10] 12896; GFX90A-NEXT: ;;#ASMEND 12897; GFX90A-NEXT: ;;#ASMSTART 12898; GFX90A-NEXT: ; def s[4:6] 12899; GFX90A-NEXT: ;;#ASMEND 12900; GFX90A-NEXT: s_mov_b32 s8, s10 12901; GFX90A-NEXT: s_mov_b32 s9, s6 12902; GFX90A-NEXT: s_mov_b32 s11, s10 12903; GFX90A-NEXT: ;;#ASMSTART 12904; GFX90A-NEXT: ; use s[8:11] 12905; GFX90A-NEXT: ;;#ASMEND 12906; GFX90A-NEXT: s_setpc_b64 s[30:31] 12907; 12908; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_5_5: 12909; GFX940: ; %bb.0: 12910; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12911; GFX940-NEXT: ;;#ASMSTART 12912; GFX940-NEXT: ; def s[8:10] 12913; GFX940-NEXT: ;;#ASMEND 12914; GFX940-NEXT: ;;#ASMSTART 12915; GFX940-NEXT: ; def s[0:2] 12916; GFX940-NEXT: ;;#ASMEND 12917; GFX940-NEXT: s_mov_b32 s8, s10 12918; GFX940-NEXT: s_mov_b32 s9, s2 12919; GFX940-NEXT: s_mov_b32 s11, s10 12920; GFX940-NEXT: ;;#ASMSTART 12921; GFX940-NEXT: ; use s[8:11] 12922; GFX940-NEXT: ;;#ASMEND 12923; GFX940-NEXT: s_setpc_b64 s[30:31] 12924 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12925 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12926 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5> 12927 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12928 ret void 12929} 12930 12931define void @s_shuffle_v4i32_v3i32__5_3_5_5() { 12932; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_5_5: 12933; GFX900: ; %bb.0: 12934; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12935; GFX900-NEXT: ;;#ASMSTART 12936; GFX900-NEXT: ; def s[4:6] 12937; GFX900-NEXT: ;;#ASMEND 12938; GFX900-NEXT: s_mov_b32 s8, s6 12939; GFX900-NEXT: s_mov_b32 s9, s4 12940; GFX900-NEXT: s_mov_b32 s10, s6 12941; GFX900-NEXT: s_mov_b32 s11, s6 12942; GFX900-NEXT: ;;#ASMSTART 12943; GFX900-NEXT: ; use s[8:11] 12944; GFX900-NEXT: ;;#ASMEND 12945; GFX900-NEXT: s_setpc_b64 s[30:31] 12946; 12947; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_5_5: 12948; GFX90A: ; %bb.0: 12949; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12950; GFX90A-NEXT: ;;#ASMSTART 12951; GFX90A-NEXT: ; def s[4:6] 12952; GFX90A-NEXT: ;;#ASMEND 12953; GFX90A-NEXT: s_mov_b32 s8, s6 12954; GFX90A-NEXT: s_mov_b32 s9, s4 12955; GFX90A-NEXT: s_mov_b32 s10, s6 12956; GFX90A-NEXT: s_mov_b32 s11, s6 12957; GFX90A-NEXT: ;;#ASMSTART 12958; GFX90A-NEXT: ; use s[8:11] 12959; GFX90A-NEXT: ;;#ASMEND 12960; GFX90A-NEXT: s_setpc_b64 s[30:31] 12961; 12962; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_5_5: 12963; GFX940: ; %bb.0: 12964; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12965; GFX940-NEXT: ;;#ASMSTART 12966; GFX940-NEXT: ; def s[0:2] 12967; GFX940-NEXT: ;;#ASMEND 12968; GFX940-NEXT: s_mov_b32 s8, s2 12969; GFX940-NEXT: s_mov_b32 s9, s0 12970; GFX940-NEXT: s_mov_b32 s10, s2 12971; GFX940-NEXT: s_mov_b32 s11, s2 12972; GFX940-NEXT: ;;#ASMSTART 12973; GFX940-NEXT: ; use s[8:11] 12974; GFX940-NEXT: ;;#ASMEND 12975; GFX940-NEXT: s_setpc_b64 s[30:31] 12976 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12977 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12978 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5> 12979 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 12980 ret void 12981} 12982 12983define void @s_shuffle_v4i32_v3i32__5_4_5_5() { 12984; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_4_5_5: 12985; GFX9: ; %bb.0: 12986; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12987; GFX9-NEXT: ;;#ASMSTART 12988; GFX9-NEXT: ; def s[8:10] 12989; GFX9-NEXT: ;;#ASMEND 12990; GFX9-NEXT: s_mov_b32 s8, s10 12991; GFX9-NEXT: s_mov_b32 s11, s10 12992; GFX9-NEXT: ;;#ASMSTART 12993; GFX9-NEXT: ; use s[8:11] 12994; GFX9-NEXT: ;;#ASMEND 12995; GFX9-NEXT: s_setpc_b64 s[30:31] 12996 %vec0 = call <3 x i32> asm "; def $0", "=s"() 12997 %vec1 = call <3 x i32> asm "; def $0", "=s"() 12998 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5> 12999 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 13000 ret void 13001} 13002 13003define void @s_shuffle_v4i32_v3i32__5_5_u_5() { 13004; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_5: 13005; GFX900: ; %bb.0: 13006; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13007; GFX900-NEXT: ;;#ASMSTART 13008; GFX900-NEXT: ; def s[4:6] 13009; GFX900-NEXT: ;;#ASMEND 13010; GFX900-NEXT: s_mov_b32 s8, s6 13011; GFX900-NEXT: s_mov_b32 s9, s6 13012; GFX900-NEXT: s_mov_b32 s11, s6 13013; GFX900-NEXT: ;;#ASMSTART 13014; GFX900-NEXT: ; use s[8:11] 13015; GFX900-NEXT: ;;#ASMEND 13016; GFX900-NEXT: s_setpc_b64 s[30:31] 13017; 13018; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_5: 13019; GFX90A: ; %bb.0: 13020; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13021; GFX90A-NEXT: ;;#ASMSTART 13022; GFX90A-NEXT: ; def s[4:6] 13023; GFX90A-NEXT: ;;#ASMEND 13024; GFX90A-NEXT: s_mov_b32 s8, s6 13025; GFX90A-NEXT: s_mov_b32 s9, s6 13026; GFX90A-NEXT: s_mov_b32 s11, s6 13027; GFX90A-NEXT: ;;#ASMSTART 13028; GFX90A-NEXT: ; use s[8:11] 13029; GFX90A-NEXT: ;;#ASMEND 13030; GFX90A-NEXT: s_setpc_b64 s[30:31] 13031; 13032; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_5: 13033; GFX940: ; %bb.0: 13034; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13035; GFX940-NEXT: ;;#ASMSTART 13036; GFX940-NEXT: ; def s[0:2] 13037; GFX940-NEXT: ;;#ASMEND 13038; GFX940-NEXT: s_mov_b32 s8, s2 13039; GFX940-NEXT: s_mov_b32 s9, s2 13040; GFX940-NEXT: s_mov_b32 s11, s2 13041; GFX940-NEXT: ;;#ASMSTART 13042; GFX940-NEXT: ; use s[8:11] 13043; GFX940-NEXT: ;;#ASMEND 13044; GFX940-NEXT: s_setpc_b64 s[30:31] 13045 %vec0 = call <3 x i32> asm "; def $0", "=s"() 13046 %vec1 = call <3 x i32> asm "; def $0", "=s"() 13047 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5> 13048 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 13049 ret void 13050} 13051 13052define void @s_shuffle_v4i32_v3i32__5_5_0_5() { 13053; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_5: 13054; GFX900: ; %bb.0: 13055; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13056; GFX900-NEXT: ;;#ASMSTART 13057; GFX900-NEXT: ; def s[4:6] 13058; GFX900-NEXT: ;;#ASMEND 13059; GFX900-NEXT: ;;#ASMSTART 13060; GFX900-NEXT: ; def s[12:14] 13061; GFX900-NEXT: ;;#ASMEND 13062; GFX900-NEXT: s_mov_b32 s8, s14 13063; GFX900-NEXT: s_mov_b32 s9, s14 13064; GFX900-NEXT: s_mov_b32 s10, s4 13065; GFX900-NEXT: s_mov_b32 s11, s14 13066; GFX900-NEXT: ;;#ASMSTART 13067; GFX900-NEXT: ; use s[8:11] 13068; GFX900-NEXT: ;;#ASMEND 13069; GFX900-NEXT: s_setpc_b64 s[30:31] 13070; 13071; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_5: 13072; GFX90A: ; %bb.0: 13073; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13074; GFX90A-NEXT: ;;#ASMSTART 13075; GFX90A-NEXT: ; def s[4:6] 13076; GFX90A-NEXT: ;;#ASMEND 13077; GFX90A-NEXT: ;;#ASMSTART 13078; GFX90A-NEXT: ; def s[12:14] 13079; GFX90A-NEXT: ;;#ASMEND 13080; GFX90A-NEXT: s_mov_b32 s8, s14 13081; GFX90A-NEXT: s_mov_b32 s9, s14 13082; GFX90A-NEXT: s_mov_b32 s10, s4 13083; GFX90A-NEXT: s_mov_b32 s11, s14 13084; GFX90A-NEXT: ;;#ASMSTART 13085; GFX90A-NEXT: ; use s[8:11] 13086; GFX90A-NEXT: ;;#ASMEND 13087; GFX90A-NEXT: s_setpc_b64 s[30:31] 13088; 13089; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_5: 13090; GFX940: ; %bb.0: 13091; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13092; GFX940-NEXT: ;;#ASMSTART 13093; GFX940-NEXT: ; def s[0:2] 13094; GFX940-NEXT: ;;#ASMEND 13095; GFX940-NEXT: ;;#ASMSTART 13096; GFX940-NEXT: ; def s[4:6] 13097; GFX940-NEXT: ;;#ASMEND 13098; GFX940-NEXT: s_mov_b32 s8, s6 13099; GFX940-NEXT: s_mov_b32 s9, s6 13100; GFX940-NEXT: s_mov_b32 s10, s0 13101; GFX940-NEXT: s_mov_b32 s11, s6 13102; GFX940-NEXT: ;;#ASMSTART 13103; GFX940-NEXT: ; use s[8:11] 13104; GFX940-NEXT: ;;#ASMEND 13105; GFX940-NEXT: s_setpc_b64 s[30:31] 13106 %vec0 = call <3 x i32> asm "; def $0", "=s"() 13107 %vec1 = call <3 x i32> asm "; def $0", "=s"() 13108 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5> 13109 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 13110 ret void 13111} 13112 13113define void @s_shuffle_v4i32_v3i32__5_5_1_5() { 13114; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_5: 13115; GFX900: ; %bb.0: 13116; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13117; GFX900-NEXT: ;;#ASMSTART 13118; GFX900-NEXT: ; def s[4:6] 13119; GFX900-NEXT: ;;#ASMEND 13120; GFX900-NEXT: ;;#ASMSTART 13121; GFX900-NEXT: ; def s[12:14] 13122; GFX900-NEXT: ;;#ASMEND 13123; GFX900-NEXT: s_mov_b32 s8, s14 13124; GFX900-NEXT: s_mov_b32 s9, s14 13125; GFX900-NEXT: s_mov_b32 s10, s5 13126; GFX900-NEXT: s_mov_b32 s11, s14 13127; GFX900-NEXT: ;;#ASMSTART 13128; GFX900-NEXT: ; use s[8:11] 13129; GFX900-NEXT: ;;#ASMEND 13130; GFX900-NEXT: s_setpc_b64 s[30:31] 13131; 13132; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_5: 13133; GFX90A: ; %bb.0: 13134; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13135; GFX90A-NEXT: ;;#ASMSTART 13136; GFX90A-NEXT: ; def s[4:6] 13137; GFX90A-NEXT: ;;#ASMEND 13138; GFX90A-NEXT: ;;#ASMSTART 13139; GFX90A-NEXT: ; def s[12:14] 13140; GFX90A-NEXT: ;;#ASMEND 13141; GFX90A-NEXT: s_mov_b32 s8, s14 13142; GFX90A-NEXT: s_mov_b32 s9, s14 13143; GFX90A-NEXT: s_mov_b32 s10, s5 13144; GFX90A-NEXT: s_mov_b32 s11, s14 13145; GFX90A-NEXT: ;;#ASMSTART 13146; GFX90A-NEXT: ; use s[8:11] 13147; GFX90A-NEXT: ;;#ASMEND 13148; GFX90A-NEXT: s_setpc_b64 s[30:31] 13149; 13150; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_5: 13151; GFX940: ; %bb.0: 13152; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13153; GFX940-NEXT: ;;#ASMSTART 13154; GFX940-NEXT: ; def s[0:2] 13155; GFX940-NEXT: ;;#ASMEND 13156; GFX940-NEXT: ;;#ASMSTART 13157; GFX940-NEXT: ; def s[4:6] 13158; GFX940-NEXT: ;;#ASMEND 13159; GFX940-NEXT: s_mov_b32 s8, s6 13160; GFX940-NEXT: s_mov_b32 s9, s6 13161; GFX940-NEXT: s_mov_b32 s10, s1 13162; GFX940-NEXT: s_mov_b32 s11, s6 13163; GFX940-NEXT: ;;#ASMSTART 13164; GFX940-NEXT: ; use s[8:11] 13165; GFX940-NEXT: ;;#ASMEND 13166; GFX940-NEXT: s_setpc_b64 s[30:31] 13167 %vec0 = call <3 x i32> asm "; def $0", "=s"() 13168 %vec1 = call <3 x i32> asm "; def $0", "=s"() 13169 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5> 13170 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 13171 ret void 13172} 13173 13174define void @s_shuffle_v4i32_v3i32__5_5_2_5() { 13175; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_5: 13176; GFX900: ; %bb.0: 13177; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13178; GFX900-NEXT: ;;#ASMSTART 13179; GFX900-NEXT: ; def s[8:10] 13180; GFX900-NEXT: ;;#ASMEND 13181; GFX900-NEXT: ;;#ASMSTART 13182; GFX900-NEXT: ; def s[4:6] 13183; GFX900-NEXT: ;;#ASMEND 13184; GFX900-NEXT: s_mov_b32 s8, s6 13185; GFX900-NEXT: s_mov_b32 s9, s6 13186; GFX900-NEXT: s_mov_b32 s11, s6 13187; GFX900-NEXT: ;;#ASMSTART 13188; GFX900-NEXT: ; use s[8:11] 13189; GFX900-NEXT: ;;#ASMEND 13190; GFX900-NEXT: s_setpc_b64 s[30:31] 13191; 13192; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_5: 13193; GFX90A: ; %bb.0: 13194; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13195; GFX90A-NEXT: ;;#ASMSTART 13196; GFX90A-NEXT: ; def s[8:10] 13197; GFX90A-NEXT: ;;#ASMEND 13198; GFX90A-NEXT: ;;#ASMSTART 13199; GFX90A-NEXT: ; def s[4:6] 13200; GFX90A-NEXT: ;;#ASMEND 13201; GFX90A-NEXT: s_mov_b32 s8, s6 13202; GFX90A-NEXT: s_mov_b32 s9, s6 13203; GFX90A-NEXT: s_mov_b32 s11, s6 13204; GFX90A-NEXT: ;;#ASMSTART 13205; GFX90A-NEXT: ; use s[8:11] 13206; GFX90A-NEXT: ;;#ASMEND 13207; GFX90A-NEXT: s_setpc_b64 s[30:31] 13208; 13209; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_5: 13210; GFX940: ; %bb.0: 13211; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13212; GFX940-NEXT: ;;#ASMSTART 13213; GFX940-NEXT: ; def s[8:10] 13214; GFX940-NEXT: ;;#ASMEND 13215; GFX940-NEXT: ;;#ASMSTART 13216; GFX940-NEXT: ; def s[0:2] 13217; GFX940-NEXT: ;;#ASMEND 13218; GFX940-NEXT: s_mov_b32 s8, s2 13219; GFX940-NEXT: s_mov_b32 s9, s2 13220; GFX940-NEXT: s_mov_b32 s11, s2 13221; GFX940-NEXT: ;;#ASMSTART 13222; GFX940-NEXT: ; use s[8:11] 13223; GFX940-NEXT: ;;#ASMEND 13224; GFX940-NEXT: s_setpc_b64 s[30:31] 13225 %vec0 = call <3 x i32> asm "; def $0", "=s"() 13226 %vec1 = call <3 x i32> asm "; def $0", "=s"() 13227 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5> 13228 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 13229 ret void 13230} 13231 13232define void @s_shuffle_v4i32_v3i32__5_5_3_5() { 13233; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_5: 13234; GFX900: ; %bb.0: 13235; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13236; GFX900-NEXT: ;;#ASMSTART 13237; GFX900-NEXT: ; def s[4:6] 13238; GFX900-NEXT: ;;#ASMEND 13239; GFX900-NEXT: s_mov_b32 s8, s6 13240; GFX900-NEXT: s_mov_b32 s9, s6 13241; GFX900-NEXT: s_mov_b32 s10, s4 13242; GFX900-NEXT: s_mov_b32 s11, s6 13243; GFX900-NEXT: ;;#ASMSTART 13244; GFX900-NEXT: ; use s[8:11] 13245; GFX900-NEXT: ;;#ASMEND 13246; GFX900-NEXT: s_setpc_b64 s[30:31] 13247; 13248; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_5: 13249; GFX90A: ; %bb.0: 13250; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13251; GFX90A-NEXT: ;;#ASMSTART 13252; GFX90A-NEXT: ; def s[4:6] 13253; GFX90A-NEXT: ;;#ASMEND 13254; GFX90A-NEXT: s_mov_b32 s8, s6 13255; GFX90A-NEXT: s_mov_b32 s9, s6 13256; GFX90A-NEXT: s_mov_b32 s10, s4 13257; GFX90A-NEXT: s_mov_b32 s11, s6 13258; GFX90A-NEXT: ;;#ASMSTART 13259; GFX90A-NEXT: ; use s[8:11] 13260; GFX90A-NEXT: ;;#ASMEND 13261; GFX90A-NEXT: s_setpc_b64 s[30:31] 13262; 13263; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_5: 13264; GFX940: ; %bb.0: 13265; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13266; GFX940-NEXT: ;;#ASMSTART 13267; GFX940-NEXT: ; def s[0:2] 13268; GFX940-NEXT: ;;#ASMEND 13269; GFX940-NEXT: s_mov_b32 s8, s2 13270; GFX940-NEXT: s_mov_b32 s9, s2 13271; GFX940-NEXT: s_mov_b32 s10, s0 13272; GFX940-NEXT: s_mov_b32 s11, s2 13273; GFX940-NEXT: ;;#ASMSTART 13274; GFX940-NEXT: ; use s[8:11] 13275; GFX940-NEXT: ;;#ASMEND 13276; GFX940-NEXT: s_setpc_b64 s[30:31] 13277 %vec0 = call <3 x i32> asm "; def $0", "=s"() 13278 %vec1 = call <3 x i32> asm "; def $0", "=s"() 13279 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5> 13280 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 13281 ret void 13282} 13283 13284define void @s_shuffle_v4i32_v3i32__5_5_4_5() { 13285; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_5: 13286; GFX900: ; %bb.0: 13287; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13288; GFX900-NEXT: ;;#ASMSTART 13289; GFX900-NEXT: ; def s[4:6] 13290; GFX900-NEXT: ;;#ASMEND 13291; GFX900-NEXT: s_mov_b32 s8, s6 13292; GFX900-NEXT: s_mov_b32 s9, s6 13293; GFX900-NEXT: s_mov_b32 s10, s5 13294; GFX900-NEXT: s_mov_b32 s11, s6 13295; GFX900-NEXT: ;;#ASMSTART 13296; GFX900-NEXT: ; use s[8:11] 13297; GFX900-NEXT: ;;#ASMEND 13298; GFX900-NEXT: s_setpc_b64 s[30:31] 13299; 13300; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_5: 13301; GFX90A: ; %bb.0: 13302; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13303; GFX90A-NEXT: ;;#ASMSTART 13304; GFX90A-NEXT: ; def s[4:6] 13305; GFX90A-NEXT: ;;#ASMEND 13306; GFX90A-NEXT: s_mov_b32 s8, s6 13307; GFX90A-NEXT: s_mov_b32 s9, s6 13308; GFX90A-NEXT: s_mov_b32 s10, s5 13309; GFX90A-NEXT: s_mov_b32 s11, s6 13310; GFX90A-NEXT: ;;#ASMSTART 13311; GFX90A-NEXT: ; use s[8:11] 13312; GFX90A-NEXT: ;;#ASMEND 13313; GFX90A-NEXT: s_setpc_b64 s[30:31] 13314; 13315; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_5: 13316; GFX940: ; %bb.0: 13317; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13318; GFX940-NEXT: ;;#ASMSTART 13319; GFX940-NEXT: ; def s[0:2] 13320; GFX940-NEXT: ;;#ASMEND 13321; GFX940-NEXT: s_mov_b32 s8, s2 13322; GFX940-NEXT: s_mov_b32 s9, s2 13323; GFX940-NEXT: s_mov_b32 s10, s1 13324; GFX940-NEXT: s_mov_b32 s11, s2 13325; GFX940-NEXT: ;;#ASMSTART 13326; GFX940-NEXT: ; use s[8:11] 13327; GFX940-NEXT: ;;#ASMEND 13328; GFX940-NEXT: s_setpc_b64 s[30:31] 13329 %vec0 = call <3 x i32> asm "; def $0", "=s"() 13330 %vec1 = call <3 x i32> asm "; def $0", "=s"() 13331 %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5> 13332 call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf) 13333 ret void 13334} 13335;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 13336; GFX90APLUS: {{.*}} 13337