1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v3i32_v4i32__u_u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v3i32_v4i32__u_u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <4 x i32> asm "; def $0", "=v"() 13 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> poison 14 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 15 ret void 16} 17 18define void @v_shuffle_v3i32_v4i32__0_u_u(ptr addrspace(1) inreg %ptr) { 19; GFX900-LABEL: v_shuffle_v3i32_v4i32__0_u_u: 20; GFX900: ; %bb.0: 21; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX900-NEXT: v_mov_b32_e32 v4, 0 23; GFX900-NEXT: ;;#ASMSTART 24; GFX900-NEXT: ; def v[0:3] 25; GFX900-NEXT: ;;#ASMEND 26; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 27; GFX900-NEXT: s_waitcnt vmcnt(0) 28; GFX900-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX90A-LABEL: v_shuffle_v3i32_v4i32__0_u_u: 31; GFX90A: ; %bb.0: 32; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX90A-NEXT: v_mov_b32_e32 v4, 0 34; GFX90A-NEXT: ;;#ASMSTART 35; GFX90A-NEXT: ; def v[0:3] 36; GFX90A-NEXT: ;;#ASMEND 37; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 38; GFX90A-NEXT: s_waitcnt vmcnt(0) 39; GFX90A-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_u_u: 42; GFX940: ; %bb.0: 43; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX940-NEXT: v_mov_b32_e32 v4, 0 45; GFX940-NEXT: ;;#ASMSTART 46; GFX940-NEXT: ; def v[0:3] 47; GFX940-NEXT: ;;#ASMEND 48; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 49; GFX940-NEXT: s_waitcnt vmcnt(0) 50; GFX940-NEXT: s_setpc_b64 s[30:31] 51 %vec0 = call <4 x i32> asm "; def $0", "=v"() 52 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 poison> 53 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 54 ret void 55} 56 57define void @v_shuffle_v3i32_v4i32__1_u_u(ptr addrspace(1) inreg %ptr) { 58; GFX900-LABEL: v_shuffle_v3i32_v4i32__1_u_u: 59; GFX900: ; %bb.0: 60; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GFX900-NEXT: v_mov_b32_e32 v4, 0 62; GFX900-NEXT: ;;#ASMSTART 63; GFX900-NEXT: ; def v[0:3] 64; GFX900-NEXT: ;;#ASMEND 65; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 66; GFX900-NEXT: s_waitcnt vmcnt(0) 67; GFX900-NEXT: s_setpc_b64 s[30:31] 68; 69; GFX90A-LABEL: v_shuffle_v3i32_v4i32__1_u_u: 70; GFX90A: ; %bb.0: 71; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 72; GFX90A-NEXT: ;;#ASMSTART 73; GFX90A-NEXT: ; def v[0:3] 74; GFX90A-NEXT: ;;#ASMEND 75; GFX90A-NEXT: v_mov_b32_e32 v4, 0 76; GFX90A-NEXT: v_mov_b32_e32 v0, v1 77; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 78; GFX90A-NEXT: s_waitcnt vmcnt(0) 79; GFX90A-NEXT: s_setpc_b64 s[30:31] 80; 81; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_u_u: 82; GFX940: ; %bb.0: 83; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 84; GFX940-NEXT: ;;#ASMSTART 85; GFX940-NEXT: ; def v[0:3] 86; GFX940-NEXT: ;;#ASMEND 87; GFX940-NEXT: v_mov_b32_e32 v4, 0 88; GFX940-NEXT: v_mov_b32_e32 v0, v1 89; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 90; GFX940-NEXT: s_waitcnt vmcnt(0) 91; GFX940-NEXT: s_setpc_b64 s[30:31] 92 %vec0 = call <4 x i32> asm "; def $0", "=v"() 93 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 poison> 94 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 95 ret void 96} 97 98define void @v_shuffle_v3i32_v4i32__2_u_u(ptr addrspace(1) inreg %ptr) { 99; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_u_u: 100; GFX900: ; %bb.0: 101; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GFX900-NEXT: ;;#ASMSTART 103; GFX900-NEXT: ; def v[0:3] 104; GFX900-NEXT: ;;#ASMEND 105; GFX900-NEXT: v_mov_b32_e32 v4, 0 106; GFX900-NEXT: v_mov_b32_e32 v0, v2 107; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 108; GFX900-NEXT: s_waitcnt vmcnt(0) 109; GFX900-NEXT: s_setpc_b64 s[30:31] 110; 111; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_u_u: 112; GFX90A: ; %bb.0: 113; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; GFX90A-NEXT: ;;#ASMSTART 115; GFX90A-NEXT: ; def v[0:3] 116; GFX90A-NEXT: ;;#ASMEND 117; GFX90A-NEXT: v_mov_b32_e32 v4, 0 118; GFX90A-NEXT: v_mov_b32_e32 v0, v2 119; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 120; GFX90A-NEXT: s_waitcnt vmcnt(0) 121; GFX90A-NEXT: s_setpc_b64 s[30:31] 122; 123; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_u_u: 124; GFX940: ; %bb.0: 125; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; GFX940-NEXT: ;;#ASMSTART 127; GFX940-NEXT: ; def v[0:3] 128; GFX940-NEXT: ;;#ASMEND 129; GFX940-NEXT: v_mov_b32_e32 v4, 0 130; GFX940-NEXT: v_mov_b32_e32 v0, v2 131; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 132; GFX940-NEXT: s_waitcnt vmcnt(0) 133; GFX940-NEXT: s_setpc_b64 s[30:31] 134 %vec0 = call <4 x i32> asm "; def $0", "=v"() 135 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 poison, i32 poison> 136 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 137 ret void 138} 139 140define void @v_shuffle_v3i32_v4i32__3_u_u(ptr addrspace(1) inreg %ptr) { 141; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_u_u: 142; GFX900: ; %bb.0: 143; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144; GFX900-NEXT: ;;#ASMSTART 145; GFX900-NEXT: ; def v[0:3] 146; GFX900-NEXT: ;;#ASMEND 147; GFX900-NEXT: v_mov_b32_e32 v4, 0 148; GFX900-NEXT: v_mov_b32_e32 v0, v3 149; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 150; GFX900-NEXT: s_waitcnt vmcnt(0) 151; GFX900-NEXT: s_setpc_b64 s[30:31] 152; 153; GFX90A-LABEL: v_shuffle_v3i32_v4i32__3_u_u: 154; GFX90A: ; %bb.0: 155; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX90A-NEXT: ;;#ASMSTART 157; GFX90A-NEXT: ; def v[0:3] 158; GFX90A-NEXT: ;;#ASMEND 159; GFX90A-NEXT: v_mov_b32_e32 v4, 0 160; GFX90A-NEXT: v_mov_b32_e32 v0, v3 161; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 162; GFX90A-NEXT: s_waitcnt vmcnt(0) 163; GFX90A-NEXT: s_setpc_b64 s[30:31] 164; 165; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_u_u: 166; GFX940: ; %bb.0: 167; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GFX940-NEXT: ;;#ASMSTART 169; GFX940-NEXT: ; def v[0:3] 170; GFX940-NEXT: ;;#ASMEND 171; GFX940-NEXT: v_mov_b32_e32 v4, 0 172; GFX940-NEXT: v_mov_b32_e32 v0, v3 173; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 174; GFX940-NEXT: s_waitcnt vmcnt(0) 175; GFX940-NEXT: s_setpc_b64 s[30:31] 176 %vec0 = call <4 x i32> asm "; def $0", "=v"() 177 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 poison, i32 poison> 178 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 179 ret void 180} 181 182define void @v_shuffle_v3i32_v4i32__4_u_u(ptr addrspace(1) inreg %ptr) { 183; GFX9-LABEL: v_shuffle_v3i32_v4i32__4_u_u: 184; GFX9: ; %bb.0: 185; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX9-NEXT: s_setpc_b64 s[30:31] 187 %vec0 = call <4 x i32> asm "; def $0", "=v"() 188 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 poison, i32 poison> 189 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 190 ret void 191} 192 193define void @v_shuffle_v3i32_v4i32__5_u_u(ptr addrspace(1) inreg %ptr) { 194; GFX900-LABEL: v_shuffle_v3i32_v4i32__5_u_u: 195; GFX900: ; %bb.0: 196; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 197; GFX900-NEXT: v_mov_b32_e32 v4, 0 198; GFX900-NEXT: ;;#ASMSTART 199; GFX900-NEXT: ; def v[0:3] 200; GFX900-NEXT: ;;#ASMEND 201; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 202; GFX900-NEXT: s_waitcnt vmcnt(0) 203; GFX900-NEXT: s_setpc_b64 s[30:31] 204; 205; GFX90A-LABEL: v_shuffle_v3i32_v4i32__5_u_u: 206; GFX90A: ; %bb.0: 207; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 208; GFX90A-NEXT: ;;#ASMSTART 209; GFX90A-NEXT: ; def v[0:3] 210; GFX90A-NEXT: ;;#ASMEND 211; GFX90A-NEXT: v_mov_b32_e32 v4, 0 212; GFX90A-NEXT: v_mov_b32_e32 v0, v1 213; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 214; GFX90A-NEXT: s_waitcnt vmcnt(0) 215; GFX90A-NEXT: s_setpc_b64 s[30:31] 216; 217; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_u_u: 218; GFX940: ; %bb.0: 219; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 220; GFX940-NEXT: ;;#ASMSTART 221; GFX940-NEXT: ; def v[0:3] 222; GFX940-NEXT: ;;#ASMEND 223; GFX940-NEXT: v_mov_b32_e32 v4, 0 224; GFX940-NEXT: v_mov_b32_e32 v0, v1 225; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 226; GFX940-NEXT: s_waitcnt vmcnt(0) 227; GFX940-NEXT: s_setpc_b64 s[30:31] 228 %vec0 = call <4 x i32> asm "; def $0", "=v"() 229 %vec1 = call <4 x i32> asm "; def $0", "=v"() 230 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison> 231 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 232 ret void 233} 234 235define void @v_shuffle_v3i32_v4i32__6_u_u(ptr addrspace(1) inreg %ptr) { 236; GFX900-LABEL: v_shuffle_v3i32_v4i32__6_u_u: 237; GFX900: ; %bb.0: 238; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 239; GFX900-NEXT: ;;#ASMSTART 240; GFX900-NEXT: ; def v[0:3] 241; GFX900-NEXT: ;;#ASMEND 242; GFX900-NEXT: v_mov_b32_e32 v4, 0 243; GFX900-NEXT: v_mov_b32_e32 v0, v2 244; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 245; GFX900-NEXT: s_waitcnt vmcnt(0) 246; GFX900-NEXT: s_setpc_b64 s[30:31] 247; 248; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_u_u: 249; GFX90A: ; %bb.0: 250; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 251; GFX90A-NEXT: ;;#ASMSTART 252; GFX90A-NEXT: ; def v[0:3] 253; GFX90A-NEXT: ;;#ASMEND 254; GFX90A-NEXT: v_mov_b32_e32 v4, 0 255; GFX90A-NEXT: v_mov_b32_e32 v0, v2 256; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 257; GFX90A-NEXT: s_waitcnt vmcnt(0) 258; GFX90A-NEXT: s_setpc_b64 s[30:31] 259; 260; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_u_u: 261; GFX940: ; %bb.0: 262; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GFX940-NEXT: ;;#ASMSTART 264; GFX940-NEXT: ; def v[0:3] 265; GFX940-NEXT: ;;#ASMEND 266; GFX940-NEXT: v_mov_b32_e32 v4, 0 267; GFX940-NEXT: v_mov_b32_e32 v0, v2 268; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 269; GFX940-NEXT: s_waitcnt vmcnt(0) 270; GFX940-NEXT: s_setpc_b64 s[30:31] 271 %vec0 = call <4 x i32> asm "; def $0", "=v"() 272 %vec1 = call <4 x i32> asm "; def $0", "=v"() 273 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison> 274 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 275 ret void 276} 277 278define void @v_shuffle_v3i32_v4i32__7_u_u(ptr addrspace(1) inreg %ptr) { 279; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_u: 280; GFX900: ; %bb.0: 281; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; GFX900-NEXT: ;;#ASMSTART 283; GFX900-NEXT: ; def v[0:3] 284; GFX900-NEXT: ;;#ASMEND 285; GFX900-NEXT: v_mov_b32_e32 v4, 0 286; GFX900-NEXT: v_mov_b32_e32 v0, v3 287; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 288; GFX900-NEXT: s_waitcnt vmcnt(0) 289; GFX900-NEXT: s_setpc_b64 s[30:31] 290; 291; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_u: 292; GFX90A: ; %bb.0: 293; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; GFX90A-NEXT: ;;#ASMSTART 295; GFX90A-NEXT: ; def v[0:3] 296; GFX90A-NEXT: ;;#ASMEND 297; GFX90A-NEXT: v_mov_b32_e32 v4, 0 298; GFX90A-NEXT: v_mov_b32_e32 v0, v3 299; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 300; GFX90A-NEXT: s_waitcnt vmcnt(0) 301; GFX90A-NEXT: s_setpc_b64 s[30:31] 302; 303; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_u: 304; GFX940: ; %bb.0: 305; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 306; GFX940-NEXT: ;;#ASMSTART 307; GFX940-NEXT: ; def v[0:3] 308; GFX940-NEXT: ;;#ASMEND 309; GFX940-NEXT: v_mov_b32_e32 v4, 0 310; GFX940-NEXT: v_mov_b32_e32 v0, v3 311; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 312; GFX940-NEXT: s_waitcnt vmcnt(0) 313; GFX940-NEXT: s_setpc_b64 s[30:31] 314 %vec0 = call <4 x i32> asm "; def $0", "=v"() 315 %vec1 = call <4 x i32> asm "; def $0", "=v"() 316 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison> 317 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 318 ret void 319} 320 321define void @v_shuffle_v3i32_v4i32__7_0_u(ptr addrspace(1) inreg %ptr) { 322; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_0_u: 323; GFX900: ; %bb.0: 324; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 325; GFX900-NEXT: ;;#ASMSTART 326; GFX900-NEXT: ; def v[0:3] 327; GFX900-NEXT: ;;#ASMEND 328; GFX900-NEXT: ;;#ASMSTART 329; GFX900-NEXT: ; def v[1:4] 330; GFX900-NEXT: ;;#ASMEND 331; GFX900-NEXT: v_mov_b32_e32 v5, 0 332; GFX900-NEXT: v_mov_b32_e32 v1, v4 333; GFX900-NEXT: v_mov_b32_e32 v2, v0 334; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17] 335; GFX900-NEXT: s_waitcnt vmcnt(0) 336; GFX900-NEXT: s_setpc_b64 s[30:31] 337; 338; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_0_u: 339; GFX90A: ; %bb.0: 340; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 341; GFX90A-NEXT: ;;#ASMSTART 342; GFX90A-NEXT: ; def v[0:3] 343; GFX90A-NEXT: ;;#ASMEND 344; GFX90A-NEXT: ;;#ASMSTART 345; GFX90A-NEXT: ; def v[2:5] 346; GFX90A-NEXT: ;;#ASMEND 347; GFX90A-NEXT: v_mov_b32_e32 v6, 0 348; GFX90A-NEXT: v_mov_b32_e32 v2, v5 349; GFX90A-NEXT: v_mov_b32_e32 v3, v0 350; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 351; GFX90A-NEXT: s_waitcnt vmcnt(0) 352; GFX90A-NEXT: s_setpc_b64 s[30:31] 353; 354; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_u: 355; GFX940: ; %bb.0: 356; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 357; GFX940-NEXT: ;;#ASMSTART 358; GFX940-NEXT: ; def v[0:3] 359; GFX940-NEXT: ;;#ASMEND 360; GFX940-NEXT: v_mov_b32_e32 v6, 0 361; GFX940-NEXT: ;;#ASMSTART 362; GFX940-NEXT: ; def v[2:5] 363; GFX940-NEXT: ;;#ASMEND 364; GFX940-NEXT: s_nop 0 365; GFX940-NEXT: v_mov_b32_e32 v2, v5 366; GFX940-NEXT: v_mov_b32_e32 v3, v0 367; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 368; GFX940-NEXT: s_waitcnt vmcnt(0) 369; GFX940-NEXT: s_setpc_b64 s[30:31] 370 %vec0 = call <4 x i32> asm "; def $0", "=v"() 371 %vec1 = call <4 x i32> asm "; def $0", "=v"() 372 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 poison> 373 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 374 ret void 375} 376 377define void @v_shuffle_v3i32_v4i32__7_1_u(ptr addrspace(1) inreg %ptr) { 378; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_u: 379; GFX900: ; %bb.0: 380; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 381; GFX900-NEXT: ;;#ASMSTART 382; GFX900-NEXT: ; def v[0:3] 383; GFX900-NEXT: ;;#ASMEND 384; GFX900-NEXT: v_mov_b32_e32 v6, 0 385; GFX900-NEXT: ;;#ASMSTART 386; GFX900-NEXT: ; def v[2:5] 387; GFX900-NEXT: ;;#ASMEND 388; GFX900-NEXT: v_mov_b32_e32 v0, v5 389; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 390; GFX900-NEXT: s_waitcnt vmcnt(0) 391; GFX900-NEXT: s_setpc_b64 s[30:31] 392; 393; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_1_u: 394; GFX90A: ; %bb.0: 395; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; GFX90A-NEXT: ;;#ASMSTART 397; GFX90A-NEXT: ; def v[0:3] 398; GFX90A-NEXT: ;;#ASMEND 399; GFX90A-NEXT: v_mov_b32_e32 v6, 0 400; GFX90A-NEXT: ;;#ASMSTART 401; GFX90A-NEXT: ; def v[2:5] 402; GFX90A-NEXT: ;;#ASMEND 403; GFX90A-NEXT: v_mov_b32_e32 v0, v5 404; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 405; GFX90A-NEXT: s_waitcnt vmcnt(0) 406; GFX90A-NEXT: s_setpc_b64 s[30:31] 407; 408; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_u: 409; GFX940: ; %bb.0: 410; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 411; GFX940-NEXT: ;;#ASMSTART 412; GFX940-NEXT: ; def v[0:3] 413; GFX940-NEXT: ;;#ASMEND 414; GFX940-NEXT: v_mov_b32_e32 v6, 0 415; GFX940-NEXT: ;;#ASMSTART 416; GFX940-NEXT: ; def v[2:5] 417; GFX940-NEXT: ;;#ASMEND 418; GFX940-NEXT: s_nop 0 419; GFX940-NEXT: v_mov_b32_e32 v0, v5 420; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 421; GFX940-NEXT: s_waitcnt vmcnt(0) 422; GFX940-NEXT: s_setpc_b64 s[30:31] 423 %vec0 = call <4 x i32> asm "; def $0", "=v"() 424 %vec1 = call <4 x i32> asm "; def $0", "=v"() 425 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 poison> 426 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 427 ret void 428} 429 430define void @v_shuffle_v3i32_v4i32__7_2_u(ptr addrspace(1) inreg %ptr) { 431; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_u: 432; GFX900: ; %bb.0: 433; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX900-NEXT: ;;#ASMSTART 435; GFX900-NEXT: ; def v[0:3] 436; GFX900-NEXT: ;;#ASMEND 437; GFX900-NEXT: v_mov_b32_e32 v7, 0 438; GFX900-NEXT: ;;#ASMSTART 439; GFX900-NEXT: ; def v[3:6] 440; GFX900-NEXT: ;;#ASMEND 441; GFX900-NEXT: v_mov_b32_e32 v1, v6 442; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17] 443; GFX900-NEXT: s_waitcnt vmcnt(0) 444; GFX900-NEXT: s_setpc_b64 s[30:31] 445; 446; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_2_u: 447; GFX90A: ; %bb.0: 448; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 449; GFX90A-NEXT: ;;#ASMSTART 450; GFX90A-NEXT: ; def v[0:3] 451; GFX90A-NEXT: ;;#ASMEND 452; GFX90A-NEXT: v_mov_b32_e32 v8, 0 453; GFX90A-NEXT: ;;#ASMSTART 454; GFX90A-NEXT: ; def v[4:7] 455; GFX90A-NEXT: ;;#ASMEND 456; GFX90A-NEXT: v_mov_b32_e32 v0, v7 457; GFX90A-NEXT: v_mov_b32_e32 v1, v2 458; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 459; GFX90A-NEXT: s_waitcnt vmcnt(0) 460; GFX90A-NEXT: s_setpc_b64 s[30:31] 461; 462; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_u: 463; GFX940: ; %bb.0: 464; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 465; GFX940-NEXT: ;;#ASMSTART 466; GFX940-NEXT: ; def v[0:3] 467; GFX940-NEXT: ;;#ASMEND 468; GFX940-NEXT: v_mov_b32_e32 v8, 0 469; GFX940-NEXT: ;;#ASMSTART 470; GFX940-NEXT: ; def v[4:7] 471; GFX940-NEXT: ;;#ASMEND 472; GFX940-NEXT: v_mov_b32_e32 v1, v2 473; GFX940-NEXT: v_mov_b32_e32 v0, v7 474; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 475; GFX940-NEXT: s_waitcnt vmcnt(0) 476; GFX940-NEXT: s_setpc_b64 s[30:31] 477 %vec0 = call <4 x i32> asm "; def $0", "=v"() 478 %vec1 = call <4 x i32> asm "; def $0", "=v"() 479 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 poison> 480 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 481 ret void 482} 483 484define void @v_shuffle_v3i32_v4i32__7_3_u(ptr addrspace(1) inreg %ptr) { 485; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_u: 486; GFX900: ; %bb.0: 487; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 488; GFX900-NEXT: ;;#ASMSTART 489; GFX900-NEXT: ; def v[0:3] 490; GFX900-NEXT: ;;#ASMEND 491; GFX900-NEXT: v_mov_b32_e32 v8, 0 492; GFX900-NEXT: ;;#ASMSTART 493; GFX900-NEXT: ; def v[4:7] 494; GFX900-NEXT: ;;#ASMEND 495; GFX900-NEXT: v_mov_b32_e32 v0, v7 496; GFX900-NEXT: v_mov_b32_e32 v1, v3 497; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 498; GFX900-NEXT: s_waitcnt vmcnt(0) 499; GFX900-NEXT: s_setpc_b64 s[30:31] 500; 501; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_3_u: 502; GFX90A: ; %bb.0: 503; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 504; GFX90A-NEXT: ;;#ASMSTART 505; GFX90A-NEXT: ; def v[0:3] 506; GFX90A-NEXT: ;;#ASMEND 507; GFX90A-NEXT: v_mov_b32_e32 v8, 0 508; GFX90A-NEXT: ;;#ASMSTART 509; GFX90A-NEXT: ; def v[4:7] 510; GFX90A-NEXT: ;;#ASMEND 511; GFX90A-NEXT: v_mov_b32_e32 v0, v7 512; GFX90A-NEXT: v_mov_b32_e32 v1, v3 513; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 514; GFX90A-NEXT: s_waitcnt vmcnt(0) 515; GFX90A-NEXT: s_setpc_b64 s[30:31] 516; 517; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_u: 518; GFX940: ; %bb.0: 519; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 520; GFX940-NEXT: ;;#ASMSTART 521; GFX940-NEXT: ; def v[0:3] 522; GFX940-NEXT: ;;#ASMEND 523; GFX940-NEXT: v_mov_b32_e32 v8, 0 524; GFX940-NEXT: ;;#ASMSTART 525; GFX940-NEXT: ; def v[4:7] 526; GFX940-NEXT: ;;#ASMEND 527; GFX940-NEXT: v_mov_b32_e32 v1, v3 528; GFX940-NEXT: v_mov_b32_e32 v0, v7 529; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 530; GFX940-NEXT: s_waitcnt vmcnt(0) 531; GFX940-NEXT: s_setpc_b64 s[30:31] 532 %vec0 = call <4 x i32> asm "; def $0", "=v"() 533 %vec1 = call <4 x i32> asm "; def $0", "=v"() 534 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 poison> 535 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 536 ret void 537} 538 539define void @v_shuffle_v3i32_v4i32__7_4_u(ptr addrspace(1) inreg %ptr) { 540; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_u: 541; GFX900: ; %bb.0: 542; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 543; GFX900-NEXT: ;;#ASMSTART 544; GFX900-NEXT: ; def v[0:3] 545; GFX900-NEXT: ;;#ASMEND 546; GFX900-NEXT: v_mov_b32_e32 v4, 0 547; GFX900-NEXT: v_mov_b32_e32 v1, v3 548; GFX900-NEXT: v_mov_b32_e32 v2, v0 549; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 550; GFX900-NEXT: s_waitcnt vmcnt(0) 551; GFX900-NEXT: s_setpc_b64 s[30:31] 552; 553; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_4_u: 554; GFX90A: ; %bb.0: 555; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 556; GFX90A-NEXT: ;;#ASMSTART 557; GFX90A-NEXT: ; def v[0:3] 558; GFX90A-NEXT: ;;#ASMEND 559; GFX90A-NEXT: v_mov_b32_e32 v4, 0 560; GFX90A-NEXT: v_mov_b32_e32 v2, v3 561; GFX90A-NEXT: v_mov_b32_e32 v3, v0 562; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17] 563; GFX90A-NEXT: s_waitcnt vmcnt(0) 564; GFX90A-NEXT: s_setpc_b64 s[30:31] 565; 566; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_u: 567; GFX940: ; %bb.0: 568; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 569; GFX940-NEXT: ;;#ASMSTART 570; GFX940-NEXT: ; def v[0:3] 571; GFX940-NEXT: ;;#ASMEND 572; GFX940-NEXT: v_mov_b32_e32 v4, 0 573; GFX940-NEXT: v_mov_b32_e32 v2, v3 574; GFX940-NEXT: v_mov_b32_e32 v3, v0 575; GFX940-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1] sc0 sc1 576; GFX940-NEXT: s_waitcnt vmcnt(0) 577; GFX940-NEXT: s_setpc_b64 s[30:31] 578 %vec0 = call <4 x i32> asm "; def $0", "=v"() 579 %vec1 = call <4 x i32> asm "; def $0", "=v"() 580 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 poison> 581 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 582 ret void 583} 584 585define void @v_shuffle_v3i32_v4i32__7_5_u(ptr addrspace(1) inreg %ptr) { 586; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_u: 587; GFX900: ; %bb.0: 588; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 589; GFX900-NEXT: ;;#ASMSTART 590; GFX900-NEXT: ; def v[0:3] 591; GFX900-NEXT: ;;#ASMEND 592; GFX900-NEXT: v_mov_b32_e32 v4, 0 593; GFX900-NEXT: v_mov_b32_e32 v0, v3 594; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 595; GFX900-NEXT: s_waitcnt vmcnt(0) 596; GFX900-NEXT: s_setpc_b64 s[30:31] 597; 598; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_5_u: 599; GFX90A: ; %bb.0: 600; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 601; GFX90A-NEXT: ;;#ASMSTART 602; GFX90A-NEXT: ; def v[0:3] 603; GFX90A-NEXT: ;;#ASMEND 604; GFX90A-NEXT: v_mov_b32_e32 v4, 0 605; GFX90A-NEXT: v_mov_b32_e32 v0, v3 606; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 607; GFX90A-NEXT: s_waitcnt vmcnt(0) 608; GFX90A-NEXT: s_setpc_b64 s[30:31] 609; 610; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_u: 611; GFX940: ; %bb.0: 612; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 613; GFX940-NEXT: ;;#ASMSTART 614; GFX940-NEXT: ; def v[0:3] 615; GFX940-NEXT: ;;#ASMEND 616; GFX940-NEXT: v_mov_b32_e32 v4, 0 617; GFX940-NEXT: v_mov_b32_e32 v0, v3 618; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 619; GFX940-NEXT: s_waitcnt vmcnt(0) 620; GFX940-NEXT: s_setpc_b64 s[30:31] 621 %vec0 = call <4 x i32> asm "; def $0", "=v"() 622 %vec1 = call <4 x i32> asm "; def $0", "=v"() 623 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 poison> 624 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 625 ret void 626} 627 628define void @v_shuffle_v3i32_v4i32__7_6_u(ptr addrspace(1) inreg %ptr) { 629; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_u: 630; GFX900: ; %bb.0: 631; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 632; GFX900-NEXT: ;;#ASMSTART 633; GFX900-NEXT: ; def v[0:3] 634; GFX900-NEXT: ;;#ASMEND 635; GFX900-NEXT: v_mov_b32_e32 v4, 0 636; GFX900-NEXT: v_mov_b32_e32 v1, v3 637; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 638; GFX900-NEXT: s_waitcnt vmcnt(0) 639; GFX900-NEXT: s_setpc_b64 s[30:31] 640; 641; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_6_u: 642; GFX90A: ; %bb.0: 643; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 644; GFX90A-NEXT: ;;#ASMSTART 645; GFX90A-NEXT: ; def v[0:3] 646; GFX90A-NEXT: ;;#ASMEND 647; GFX90A-NEXT: v_mov_b32_e32 v4, 0 648; GFX90A-NEXT: v_mov_b32_e32 v0, v3 649; GFX90A-NEXT: v_mov_b32_e32 v1, v2 650; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 651; GFX90A-NEXT: s_waitcnt vmcnt(0) 652; GFX90A-NEXT: s_setpc_b64 s[30:31] 653; 654; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_u: 655; GFX940: ; %bb.0: 656; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 657; GFX940-NEXT: ;;#ASMSTART 658; GFX940-NEXT: ; def v[0:3] 659; GFX940-NEXT: ;;#ASMEND 660; GFX940-NEXT: v_mov_b32_e32 v4, 0 661; GFX940-NEXT: v_mov_b32_e32 v0, v3 662; GFX940-NEXT: v_mov_b32_e32 v1, v2 663; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 664; GFX940-NEXT: s_waitcnt vmcnt(0) 665; GFX940-NEXT: s_setpc_b64 s[30:31] 666 %vec0 = call <4 x i32> asm "; def $0", "=v"() 667 %vec1 = call <4 x i32> asm "; def $0", "=v"() 668 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 poison> 669 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 670 ret void 671} 672 673define void @v_shuffle_v3i32_v4i32__7_7_u(ptr addrspace(1) inreg %ptr) { 674; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_u: 675; GFX900: ; %bb.0: 676; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 677; GFX900-NEXT: ;;#ASMSTART 678; GFX900-NEXT: ; def v[0:3] 679; GFX900-NEXT: ;;#ASMEND 680; GFX900-NEXT: v_mov_b32_e32 v4, 0 681; GFX900-NEXT: v_mov_b32_e32 v0, v3 682; GFX900-NEXT: v_mov_b32_e32 v1, v3 683; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 684; GFX900-NEXT: s_waitcnt vmcnt(0) 685; GFX900-NEXT: s_setpc_b64 s[30:31] 686; 687; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_7_u: 688; GFX90A: ; %bb.0: 689; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 690; GFX90A-NEXT: ;;#ASMSTART 691; GFX90A-NEXT: ; def v[0:3] 692; GFX90A-NEXT: ;;#ASMEND 693; GFX90A-NEXT: v_mov_b32_e32 v4, 0 694; GFX90A-NEXT: v_mov_b32_e32 v0, v3 695; GFX90A-NEXT: v_mov_b32_e32 v1, v3 696; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 697; GFX90A-NEXT: s_waitcnt vmcnt(0) 698; GFX90A-NEXT: s_setpc_b64 s[30:31] 699; 700; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_u: 701; GFX940: ; %bb.0: 702; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 703; GFX940-NEXT: ;;#ASMSTART 704; GFX940-NEXT: ; def v[0:3] 705; GFX940-NEXT: ;;#ASMEND 706; GFX940-NEXT: v_mov_b32_e32 v4, 0 707; GFX940-NEXT: v_mov_b32_e32 v0, v3 708; GFX940-NEXT: v_mov_b32_e32 v1, v3 709; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 710; GFX940-NEXT: s_waitcnt vmcnt(0) 711; GFX940-NEXT: s_setpc_b64 s[30:31] 712 %vec0 = call <4 x i32> asm "; def $0", "=v"() 713 %vec1 = call <4 x i32> asm "; def $0", "=v"() 714 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 poison> 715 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 716 ret void 717} 718 719define void @v_shuffle_v3i32_v4i32__7_7_0(ptr addrspace(1) inreg %ptr) { 720; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_0: 721; GFX900: ; %bb.0: 722; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 723; GFX900-NEXT: ;;#ASMSTART 724; GFX900-NEXT: ; def v[0:3] 725; GFX900-NEXT: ;;#ASMEND 726; GFX900-NEXT: ;;#ASMSTART 727; GFX900-NEXT: ; def v[1:4] 728; GFX900-NEXT: ;;#ASMEND 729; GFX900-NEXT: v_mov_b32_e32 v5, 0 730; GFX900-NEXT: v_mov_b32_e32 v1, v4 731; GFX900-NEXT: v_mov_b32_e32 v2, v4 732; GFX900-NEXT: v_mov_b32_e32 v3, v0 733; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17] 734; GFX900-NEXT: s_waitcnt vmcnt(0) 735; GFX900-NEXT: s_setpc_b64 s[30:31] 736; 737; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_7_0: 738; GFX90A: ; %bb.0: 739; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 740; GFX90A-NEXT: ;;#ASMSTART 741; GFX90A-NEXT: ; def v[0:3] 742; GFX90A-NEXT: ;;#ASMEND 743; GFX90A-NEXT: ;;#ASMSTART 744; GFX90A-NEXT: ; def v[2:5] 745; GFX90A-NEXT: ;;#ASMEND 746; GFX90A-NEXT: v_mov_b32_e32 v6, 0 747; GFX90A-NEXT: v_mov_b32_e32 v2, v5 748; GFX90A-NEXT: v_mov_b32_e32 v3, v5 749; GFX90A-NEXT: v_mov_b32_e32 v4, v0 750; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 751; GFX90A-NEXT: s_waitcnt vmcnt(0) 752; GFX90A-NEXT: s_setpc_b64 s[30:31] 753; 754; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_0: 755; GFX940: ; %bb.0: 756; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 757; GFX940-NEXT: ;;#ASMSTART 758; GFX940-NEXT: ; def v[0:3] 759; GFX940-NEXT: ;;#ASMEND 760; GFX940-NEXT: v_mov_b32_e32 v6, 0 761; GFX940-NEXT: ;;#ASMSTART 762; GFX940-NEXT: ; def v[2:5] 763; GFX940-NEXT: ;;#ASMEND 764; GFX940-NEXT: s_nop 0 765; GFX940-NEXT: v_mov_b32_e32 v2, v5 766; GFX940-NEXT: v_mov_b32_e32 v3, v5 767; GFX940-NEXT: v_mov_b32_e32 v4, v0 768; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 769; GFX940-NEXT: s_waitcnt vmcnt(0) 770; GFX940-NEXT: s_setpc_b64 s[30:31] 771 %vec0 = call <4 x i32> asm "; def $0", "=v"() 772 %vec1 = call <4 x i32> asm "; def $0", "=v"() 773 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 0> 774 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 775 ret void 776} 777 778define void @v_shuffle_v3i32_v4i32__7_7_1(ptr addrspace(1) inreg %ptr) { 779; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_1: 780; GFX900: ; %bb.0: 781; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 782; GFX900-NEXT: ;;#ASMSTART 783; GFX900-NEXT: ; def v[0:3] 784; GFX900-NEXT: ;;#ASMEND 785; GFX900-NEXT: ;;#ASMSTART 786; GFX900-NEXT: ; def v[2:5] 787; GFX900-NEXT: ;;#ASMEND 788; GFX900-NEXT: v_mov_b32_e32 v6, 0 789; GFX900-NEXT: v_mov_b32_e32 v2, v5 790; GFX900-NEXT: v_mov_b32_e32 v3, v5 791; GFX900-NEXT: v_mov_b32_e32 v4, v1 792; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 793; GFX900-NEXT: s_waitcnt vmcnt(0) 794; GFX900-NEXT: s_setpc_b64 s[30:31] 795; 796; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_7_1: 797; GFX90A: ; %bb.0: 798; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 799; GFX90A-NEXT: ;;#ASMSTART 800; GFX90A-NEXT: ; def v[0:3] 801; GFX90A-NEXT: ;;#ASMEND 802; GFX90A-NEXT: ;;#ASMSTART 803; GFX90A-NEXT: ; def v[2:5] 804; GFX90A-NEXT: ;;#ASMEND 805; GFX90A-NEXT: v_mov_b32_e32 v6, 0 806; GFX90A-NEXT: v_mov_b32_e32 v2, v5 807; GFX90A-NEXT: v_mov_b32_e32 v3, v5 808; GFX90A-NEXT: v_mov_b32_e32 v4, v1 809; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 810; GFX90A-NEXT: s_waitcnt vmcnt(0) 811; GFX90A-NEXT: s_setpc_b64 s[30:31] 812; 813; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_1: 814; GFX940: ; %bb.0: 815; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 816; GFX940-NEXT: ;;#ASMSTART 817; GFX940-NEXT: ; def v[0:3] 818; GFX940-NEXT: ;;#ASMEND 819; GFX940-NEXT: v_mov_b32_e32 v6, 0 820; GFX940-NEXT: ;;#ASMSTART 821; GFX940-NEXT: ; def v[2:5] 822; GFX940-NEXT: ;;#ASMEND 823; GFX940-NEXT: s_nop 0 824; GFX940-NEXT: v_mov_b32_e32 v2, v5 825; GFX940-NEXT: v_mov_b32_e32 v3, v5 826; GFX940-NEXT: v_mov_b32_e32 v4, v1 827; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 828; GFX940-NEXT: s_waitcnt vmcnt(0) 829; GFX940-NEXT: s_setpc_b64 s[30:31] 830 %vec0 = call <4 x i32> asm "; def $0", "=v"() 831 %vec1 = call <4 x i32> asm "; def $0", "=v"() 832 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 1> 833 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 834 ret void 835} 836 837define void @v_shuffle_v3i32_v4i32__7_7_2(ptr addrspace(1) inreg %ptr) { 838; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_2: 839; GFX900: ; %bb.0: 840; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 841; GFX900-NEXT: ;;#ASMSTART 842; GFX900-NEXT: ; def v[0:3] 843; GFX900-NEXT: ;;#ASMEND 844; GFX900-NEXT: v_mov_b32_e32 v7, 0 845; GFX900-NEXT: ;;#ASMSTART 846; GFX900-NEXT: ; def v[3:6] 847; GFX900-NEXT: ;;#ASMEND 848; GFX900-NEXT: v_mov_b32_e32 v0, v6 849; GFX900-NEXT: v_mov_b32_e32 v1, v6 850; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17] 851; GFX900-NEXT: s_waitcnt vmcnt(0) 852; GFX900-NEXT: s_setpc_b64 s[30:31] 853; 854; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_7_2: 855; GFX90A: ; %bb.0: 856; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 857; GFX90A-NEXT: ;;#ASMSTART 858; GFX90A-NEXT: ; def v[0:3] 859; GFX90A-NEXT: ;;#ASMEND 860; GFX90A-NEXT: v_mov_b32_e32 v8, 0 861; GFX90A-NEXT: ;;#ASMSTART 862; GFX90A-NEXT: ; def v[4:7] 863; GFX90A-NEXT: ;;#ASMEND 864; GFX90A-NEXT: v_mov_b32_e32 v0, v7 865; GFX90A-NEXT: v_mov_b32_e32 v1, v7 866; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 867; GFX90A-NEXT: s_waitcnt vmcnt(0) 868; GFX90A-NEXT: s_setpc_b64 s[30:31] 869; 870; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_2: 871; GFX940: ; %bb.0: 872; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 873; GFX940-NEXT: ;;#ASMSTART 874; GFX940-NEXT: ; def v[0:3] 875; GFX940-NEXT: ;;#ASMEND 876; GFX940-NEXT: v_mov_b32_e32 v8, 0 877; GFX940-NEXT: ;;#ASMSTART 878; GFX940-NEXT: ; def v[4:7] 879; GFX940-NEXT: ;;#ASMEND 880; GFX940-NEXT: s_nop 0 881; GFX940-NEXT: v_mov_b32_e32 v0, v7 882; GFX940-NEXT: v_mov_b32_e32 v1, v7 883; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 884; GFX940-NEXT: s_waitcnt vmcnt(0) 885; GFX940-NEXT: s_setpc_b64 s[30:31] 886 %vec0 = call <4 x i32> asm "; def $0", "=v"() 887 %vec1 = call <4 x i32> asm "; def $0", "=v"() 888 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 2> 889 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 890 ret void 891} 892 893define void @v_shuffle_v3i32_v4i32__7_7_3(ptr addrspace(1) inreg %ptr) { 894; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_3: 895; GFX900: ; %bb.0: 896; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 897; GFX900-NEXT: ;;#ASMSTART 898; GFX900-NEXT: ; def v[0:3] 899; GFX900-NEXT: ;;#ASMEND 900; GFX900-NEXT: v_mov_b32_e32 v8, 0 901; GFX900-NEXT: ;;#ASMSTART 902; GFX900-NEXT: ; def v[4:7] 903; GFX900-NEXT: ;;#ASMEND 904; GFX900-NEXT: v_mov_b32_e32 v1, v7 905; GFX900-NEXT: v_mov_b32_e32 v2, v7 906; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17] 907; GFX900-NEXT: s_waitcnt vmcnt(0) 908; GFX900-NEXT: s_setpc_b64 s[30:31] 909; 910; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_7_3: 911; GFX90A: ; %bb.0: 912; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 913; GFX90A-NEXT: ;;#ASMSTART 914; GFX90A-NEXT: ; def v[0:3] 915; GFX90A-NEXT: ;;#ASMEND 916; GFX90A-NEXT: v_mov_b32_e32 v8, 0 917; GFX90A-NEXT: ;;#ASMSTART 918; GFX90A-NEXT: ; def v[4:7] 919; GFX90A-NEXT: ;;#ASMEND 920; GFX90A-NEXT: v_mov_b32_e32 v0, v7 921; GFX90A-NEXT: v_mov_b32_e32 v1, v7 922; GFX90A-NEXT: v_mov_b32_e32 v2, v3 923; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 924; GFX90A-NEXT: s_waitcnt vmcnt(0) 925; GFX90A-NEXT: s_setpc_b64 s[30:31] 926; 927; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_3: 928; GFX940: ; %bb.0: 929; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 930; GFX940-NEXT: ;;#ASMSTART 931; GFX940-NEXT: ; def v[0:3] 932; GFX940-NEXT: ;;#ASMEND 933; GFX940-NEXT: v_mov_b32_e32 v8, 0 934; GFX940-NEXT: ;;#ASMSTART 935; GFX940-NEXT: ; def v[4:7] 936; GFX940-NEXT: ;;#ASMEND 937; GFX940-NEXT: v_mov_b32_e32 v2, v3 938; GFX940-NEXT: v_mov_b32_e32 v0, v7 939; GFX940-NEXT: v_mov_b32_e32 v1, v7 940; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 941; GFX940-NEXT: s_waitcnt vmcnt(0) 942; GFX940-NEXT: s_setpc_b64 s[30:31] 943 %vec0 = call <4 x i32> asm "; def $0", "=v"() 944 %vec1 = call <4 x i32> asm "; def $0", "=v"() 945 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 3> 946 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 947 ret void 948} 949 950define void @v_shuffle_v3i32_v4i32__7_7_4(ptr addrspace(1) inreg %ptr) { 951; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_4: 952; GFX900: ; %bb.0: 953; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 954; GFX900-NEXT: ;;#ASMSTART 955; GFX900-NEXT: ; def v[0:3] 956; GFX900-NEXT: ;;#ASMEND 957; GFX900-NEXT: v_mov_b32_e32 v4, 0 958; GFX900-NEXT: v_mov_b32_e32 v1, v3 959; GFX900-NEXT: v_mov_b32_e32 v2, v3 960; GFX900-NEXT: v_mov_b32_e32 v3, v0 961; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 962; GFX900-NEXT: s_waitcnt vmcnt(0) 963; GFX900-NEXT: s_setpc_b64 s[30:31] 964; 965; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_7_4: 966; GFX90A: ; %bb.0: 967; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 968; GFX90A-NEXT: ;;#ASMSTART 969; GFX90A-NEXT: ; def v[0:3] 970; GFX90A-NEXT: ;;#ASMEND 971; GFX90A-NEXT: v_mov_b32_e32 v5, 0 972; GFX90A-NEXT: v_mov_b32_e32 v2, v3 973; GFX90A-NEXT: v_mov_b32_e32 v4, v0 974; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 975; GFX90A-NEXT: s_waitcnt vmcnt(0) 976; GFX90A-NEXT: s_setpc_b64 s[30:31] 977; 978; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_4: 979; GFX940: ; %bb.0: 980; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 981; GFX940-NEXT: ;;#ASMSTART 982; GFX940-NEXT: ; def v[0:3] 983; GFX940-NEXT: ;;#ASMEND 984; GFX940-NEXT: v_mov_b32_e32 v5, 0 985; GFX940-NEXT: v_mov_b32_e32 v2, v3 986; GFX940-NEXT: v_mov_b32_e32 v4, v0 987; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 988; GFX940-NEXT: s_waitcnt vmcnt(0) 989; GFX940-NEXT: s_setpc_b64 s[30:31] 990 %vec0 = call <4 x i32> asm "; def $0", "=v"() 991 %vec1 = call <4 x i32> asm "; def $0", "=v"() 992 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 4> 993 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 994 ret void 995} 996 997define void @v_shuffle_v3i32_v4i32__7_7_5(ptr addrspace(1) inreg %ptr) { 998; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_5: 999; GFX900: ; %bb.0: 1000; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1001; GFX900-NEXT: ;;#ASMSTART 1002; GFX900-NEXT: ; def v[0:3] 1003; GFX900-NEXT: ;;#ASMEND 1004; GFX900-NEXT: v_mov_b32_e32 v5, 0 1005; GFX900-NEXT: v_mov_b32_e32 v2, v3 1006; GFX900-NEXT: v_mov_b32_e32 v4, v1 1007; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 1008; GFX900-NEXT: s_waitcnt vmcnt(0) 1009; GFX900-NEXT: s_setpc_b64 s[30:31] 1010; 1011; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_7_5: 1012; GFX90A: ; %bb.0: 1013; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1014; GFX90A-NEXT: ;;#ASMSTART 1015; GFX90A-NEXT: ; def v[0:3] 1016; GFX90A-NEXT: ;;#ASMEND 1017; GFX90A-NEXT: v_mov_b32_e32 v5, 0 1018; GFX90A-NEXT: v_mov_b32_e32 v2, v3 1019; GFX90A-NEXT: v_mov_b32_e32 v4, v1 1020; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 1021; GFX90A-NEXT: s_waitcnt vmcnt(0) 1022; GFX90A-NEXT: s_setpc_b64 s[30:31] 1023; 1024; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_5: 1025; GFX940: ; %bb.0: 1026; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1027; GFX940-NEXT: ;;#ASMSTART 1028; GFX940-NEXT: ; def v[0:3] 1029; GFX940-NEXT: ;;#ASMEND 1030; GFX940-NEXT: v_mov_b32_e32 v5, 0 1031; GFX940-NEXT: v_mov_b32_e32 v2, v3 1032; GFX940-NEXT: v_mov_b32_e32 v4, v1 1033; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 1034; GFX940-NEXT: s_waitcnt vmcnt(0) 1035; GFX940-NEXT: s_setpc_b64 s[30:31] 1036 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1037 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1038 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 5> 1039 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1040 ret void 1041} 1042 1043define void @v_shuffle_v3i32_v4i32__7_7_6(ptr addrspace(1) inreg %ptr) { 1044; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_6: 1045; GFX900: ; %bb.0: 1046; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1047; GFX900-NEXT: ;;#ASMSTART 1048; GFX900-NEXT: ; def v[0:3] 1049; GFX900-NEXT: ;;#ASMEND 1050; GFX900-NEXT: v_mov_b32_e32 v4, 0 1051; GFX900-NEXT: v_mov_b32_e32 v0, v3 1052; GFX900-NEXT: v_mov_b32_e32 v1, v3 1053; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 1054; GFX900-NEXT: s_waitcnt vmcnt(0) 1055; GFX900-NEXT: s_setpc_b64 s[30:31] 1056; 1057; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_7_6: 1058; GFX90A: ; %bb.0: 1059; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1060; GFX90A-NEXT: ;;#ASMSTART 1061; GFX90A-NEXT: ; def v[0:3] 1062; GFX90A-NEXT: ;;#ASMEND 1063; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1064; GFX90A-NEXT: v_mov_b32_e32 v0, v3 1065; GFX90A-NEXT: v_mov_b32_e32 v1, v3 1066; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 1067; GFX90A-NEXT: s_waitcnt vmcnt(0) 1068; GFX90A-NEXT: s_setpc_b64 s[30:31] 1069; 1070; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_6: 1071; GFX940: ; %bb.0: 1072; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1073; GFX940-NEXT: ;;#ASMSTART 1074; GFX940-NEXT: ; def v[0:3] 1075; GFX940-NEXT: ;;#ASMEND 1076; GFX940-NEXT: v_mov_b32_e32 v4, 0 1077; GFX940-NEXT: v_mov_b32_e32 v0, v3 1078; GFX940-NEXT: v_mov_b32_e32 v1, v3 1079; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 1080; GFX940-NEXT: s_waitcnt vmcnt(0) 1081; GFX940-NEXT: s_setpc_b64 s[30:31] 1082 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1083 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1084 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 6> 1085 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1086 ret void 1087} 1088 1089define void @v_shuffle_v3i32_v4i32__7_7_7(ptr addrspace(1) inreg %ptr) { 1090; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_7: 1091; GFX900: ; %bb.0: 1092; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1093; GFX900-NEXT: ;;#ASMSTART 1094; GFX900-NEXT: ; def v[0:3] 1095; GFX900-NEXT: ;;#ASMEND 1096; GFX900-NEXT: v_mov_b32_e32 v4, 0 1097; GFX900-NEXT: v_mov_b32_e32 v1, v3 1098; GFX900-NEXT: v_mov_b32_e32 v2, v3 1099; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 1100; GFX900-NEXT: s_waitcnt vmcnt(0) 1101; GFX900-NEXT: s_setpc_b64 s[30:31] 1102; 1103; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_7_7: 1104; GFX90A: ; %bb.0: 1105; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1106; GFX90A-NEXT: ;;#ASMSTART 1107; GFX90A-NEXT: ; def v[0:3] 1108; GFX90A-NEXT: ;;#ASMEND 1109; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1110; GFX90A-NEXT: v_mov_b32_e32 v0, v3 1111; GFX90A-NEXT: v_mov_b32_e32 v1, v3 1112; GFX90A-NEXT: v_mov_b32_e32 v2, v3 1113; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 1114; GFX90A-NEXT: s_waitcnt vmcnt(0) 1115; GFX90A-NEXT: s_setpc_b64 s[30:31] 1116; 1117; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_7_7: 1118; GFX940: ; %bb.0: 1119; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1120; GFX940-NEXT: ;;#ASMSTART 1121; GFX940-NEXT: ; def v[0:3] 1122; GFX940-NEXT: ;;#ASMEND 1123; GFX940-NEXT: v_mov_b32_e32 v4, 0 1124; GFX940-NEXT: v_mov_b32_e32 v0, v3 1125; GFX940-NEXT: v_mov_b32_e32 v1, v3 1126; GFX940-NEXT: v_mov_b32_e32 v2, v3 1127; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 1128; GFX940-NEXT: s_waitcnt vmcnt(0) 1129; GFX940-NEXT: s_setpc_b64 s[30:31] 1130 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1131 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1132 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 7> 1133 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1134 ret void 1135} 1136 1137define void @v_shuffle_v3i32_v4i32__u_0_0(ptr addrspace(1) inreg %ptr) { 1138; GFX900-LABEL: v_shuffle_v3i32_v4i32__u_0_0: 1139; GFX900: ; %bb.0: 1140; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1141; GFX900-NEXT: ;;#ASMSTART 1142; GFX900-NEXT: ; def v[0:3] 1143; GFX900-NEXT: ;;#ASMEND 1144; GFX900-NEXT: v_mov_b32_e32 v4, 0 1145; GFX900-NEXT: v_mov_b32_e32 v1, v0 1146; GFX900-NEXT: v_mov_b32_e32 v2, v0 1147; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 1148; GFX900-NEXT: s_waitcnt vmcnt(0) 1149; GFX900-NEXT: s_setpc_b64 s[30:31] 1150; 1151; GFX90A-LABEL: v_shuffle_v3i32_v4i32__u_0_0: 1152; GFX90A: ; %bb.0: 1153; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1154; GFX90A-NEXT: ;;#ASMSTART 1155; GFX90A-NEXT: ; def v[0:3] 1156; GFX90A-NEXT: ;;#ASMEND 1157; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1158; GFX90A-NEXT: v_mov_b32_e32 v1, v0 1159; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1160; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 1161; GFX90A-NEXT: s_waitcnt vmcnt(0) 1162; GFX90A-NEXT: s_setpc_b64 s[30:31] 1163; 1164; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_0_0: 1165; GFX940: ; %bb.0: 1166; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1167; GFX940-NEXT: ;;#ASMSTART 1168; GFX940-NEXT: ; def v[0:3] 1169; GFX940-NEXT: ;;#ASMEND 1170; GFX940-NEXT: v_mov_b32_e32 v4, 0 1171; GFX940-NEXT: v_mov_b32_e32 v1, v0 1172; GFX940-NEXT: v_mov_b32_e32 v2, v0 1173; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 1174; GFX940-NEXT: s_waitcnt vmcnt(0) 1175; GFX940-NEXT: s_setpc_b64 s[30:31] 1176 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1177 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 poison, i32 0, i32 0> 1178 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1179 ret void 1180} 1181 1182define void @v_shuffle_v3i32_v4i32__0_0_0(ptr addrspace(1) inreg %ptr) { 1183; GFX900-LABEL: v_shuffle_v3i32_v4i32__0_0_0: 1184; GFX900: ; %bb.0: 1185; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1186; GFX900-NEXT: ;;#ASMSTART 1187; GFX900-NEXT: ; def v[0:3] 1188; GFX900-NEXT: ;;#ASMEND 1189; GFX900-NEXT: v_mov_b32_e32 v4, 0 1190; GFX900-NEXT: v_mov_b32_e32 v1, v0 1191; GFX900-NEXT: v_mov_b32_e32 v2, v0 1192; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 1193; GFX900-NEXT: s_waitcnt vmcnt(0) 1194; GFX900-NEXT: s_setpc_b64 s[30:31] 1195; 1196; GFX90A-LABEL: v_shuffle_v3i32_v4i32__0_0_0: 1197; GFX90A: ; %bb.0: 1198; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1199; GFX90A-NEXT: ;;#ASMSTART 1200; GFX90A-NEXT: ; def v[0:3] 1201; GFX90A-NEXT: ;;#ASMEND 1202; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1203; GFX90A-NEXT: v_mov_b32_e32 v1, v0 1204; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1205; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 1206; GFX90A-NEXT: s_waitcnt vmcnt(0) 1207; GFX90A-NEXT: s_setpc_b64 s[30:31] 1208; 1209; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_0_0: 1210; GFX940: ; %bb.0: 1211; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1212; GFX940-NEXT: ;;#ASMSTART 1213; GFX940-NEXT: ; def v[0:3] 1214; GFX940-NEXT: ;;#ASMEND 1215; GFX940-NEXT: v_mov_b32_e32 v4, 0 1216; GFX940-NEXT: v_mov_b32_e32 v1, v0 1217; GFX940-NEXT: v_mov_b32_e32 v2, v0 1218; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 1219; GFX940-NEXT: s_waitcnt vmcnt(0) 1220; GFX940-NEXT: s_setpc_b64 s[30:31] 1221 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1222 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> zeroinitializer 1223 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1224 ret void 1225} 1226 1227define void @v_shuffle_v3i32_v4i32__1_0_0(ptr addrspace(1) inreg %ptr) { 1228; GFX900-LABEL: v_shuffle_v3i32_v4i32__1_0_0: 1229; GFX900: ; %bb.0: 1230; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1231; GFX900-NEXT: ;;#ASMSTART 1232; GFX900-NEXT: ; def v[0:3] 1233; GFX900-NEXT: ;;#ASMEND 1234; GFX900-NEXT: v_mov_b32_e32 v4, 0 1235; GFX900-NEXT: v_mov_b32_e32 v2, v0 1236; GFX900-NEXT: v_mov_b32_e32 v3, v0 1237; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 1238; GFX900-NEXT: s_waitcnt vmcnt(0) 1239; GFX900-NEXT: s_setpc_b64 s[30:31] 1240; 1241; GFX90A-LABEL: v_shuffle_v3i32_v4i32__1_0_0: 1242; GFX90A: ; %bb.0: 1243; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1244; GFX90A-NEXT: ;;#ASMSTART 1245; GFX90A-NEXT: ; def v[0:3] 1246; GFX90A-NEXT: ;;#ASMEND 1247; GFX90A-NEXT: v_mov_b32_e32 v5, 0 1248; GFX90A-NEXT: v_mov_b32_e32 v2, v1 1249; GFX90A-NEXT: v_mov_b32_e32 v3, v0 1250; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1251; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 1252; GFX90A-NEXT: s_waitcnt vmcnt(0) 1253; GFX90A-NEXT: s_setpc_b64 s[30:31] 1254; 1255; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_0_0: 1256; GFX940: ; %bb.0: 1257; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1258; GFX940-NEXT: ;;#ASMSTART 1259; GFX940-NEXT: ; def v[0:3] 1260; GFX940-NEXT: ;;#ASMEND 1261; GFX940-NEXT: v_mov_b32_e32 v5, 0 1262; GFX940-NEXT: v_mov_b32_e32 v2, v1 1263; GFX940-NEXT: v_mov_b32_e32 v3, v0 1264; GFX940-NEXT: v_mov_b32_e32 v4, v0 1265; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 1266; GFX940-NEXT: s_waitcnt vmcnt(0) 1267; GFX940-NEXT: s_setpc_b64 s[30:31] 1268 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1269 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 0, i32 0> 1270 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1271 ret void 1272} 1273 1274define void @v_shuffle_v3i32_v4i32__2_0_0(ptr addrspace(1) inreg %ptr) { 1275; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_0_0: 1276; GFX900: ; %bb.0: 1277; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1278; GFX900-NEXT: ;;#ASMSTART 1279; GFX900-NEXT: ; def v[0:3] 1280; GFX900-NEXT: ;;#ASMEND 1281; GFX900-NEXT: v_mov_b32_e32 v4, 0 1282; GFX900-NEXT: v_mov_b32_e32 v1, v2 1283; GFX900-NEXT: v_mov_b32_e32 v2, v0 1284; GFX900-NEXT: v_mov_b32_e32 v3, v0 1285; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 1286; GFX900-NEXT: s_waitcnt vmcnt(0) 1287; GFX900-NEXT: s_setpc_b64 s[30:31] 1288; 1289; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_0_0: 1290; GFX90A: ; %bb.0: 1291; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1292; GFX90A-NEXT: ;;#ASMSTART 1293; GFX90A-NEXT: ; def v[0:3] 1294; GFX90A-NEXT: ;;#ASMEND 1295; GFX90A-NEXT: v_mov_b32_e32 v5, 0 1296; GFX90A-NEXT: v_mov_b32_e32 v3, v0 1297; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1298; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 1299; GFX90A-NEXT: s_waitcnt vmcnt(0) 1300; GFX90A-NEXT: s_setpc_b64 s[30:31] 1301; 1302; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_0_0: 1303; GFX940: ; %bb.0: 1304; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1305; GFX940-NEXT: ;;#ASMSTART 1306; GFX940-NEXT: ; def v[0:3] 1307; GFX940-NEXT: ;;#ASMEND 1308; GFX940-NEXT: v_mov_b32_e32 v5, 0 1309; GFX940-NEXT: v_mov_b32_e32 v3, v0 1310; GFX940-NEXT: v_mov_b32_e32 v4, v0 1311; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 1312; GFX940-NEXT: s_waitcnt vmcnt(0) 1313; GFX940-NEXT: s_setpc_b64 s[30:31] 1314 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1315 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 0, i32 0> 1316 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1317 ret void 1318} 1319 1320define void @v_shuffle_v3i32_v4i32__3_0_0(ptr addrspace(1) inreg %ptr) { 1321; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_0_0: 1322; GFX900: ; %bb.0: 1323; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1324; GFX900-NEXT: ;;#ASMSTART 1325; GFX900-NEXT: ; def v[0:3] 1326; GFX900-NEXT: ;;#ASMEND 1327; GFX900-NEXT: v_mov_b32_e32 v4, 0 1328; GFX900-NEXT: v_mov_b32_e32 v1, v3 1329; GFX900-NEXT: v_mov_b32_e32 v2, v0 1330; GFX900-NEXT: v_mov_b32_e32 v3, v0 1331; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 1332; GFX900-NEXT: s_waitcnt vmcnt(0) 1333; GFX900-NEXT: s_setpc_b64 s[30:31] 1334; 1335; GFX90A-LABEL: v_shuffle_v3i32_v4i32__3_0_0: 1336; GFX90A: ; %bb.0: 1337; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1338; GFX90A-NEXT: ;;#ASMSTART 1339; GFX90A-NEXT: ; def v[0:3] 1340; GFX90A-NEXT: ;;#ASMEND 1341; GFX90A-NEXT: v_mov_b32_e32 v5, 0 1342; GFX90A-NEXT: v_mov_b32_e32 v2, v3 1343; GFX90A-NEXT: v_mov_b32_e32 v3, v0 1344; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1345; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 1346; GFX90A-NEXT: s_waitcnt vmcnt(0) 1347; GFX90A-NEXT: s_setpc_b64 s[30:31] 1348; 1349; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_0_0: 1350; GFX940: ; %bb.0: 1351; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1352; GFX940-NEXT: ;;#ASMSTART 1353; GFX940-NEXT: ; def v[0:3] 1354; GFX940-NEXT: ;;#ASMEND 1355; GFX940-NEXT: v_mov_b32_e32 v5, 0 1356; GFX940-NEXT: v_mov_b32_e32 v2, v3 1357; GFX940-NEXT: v_mov_b32_e32 v3, v0 1358; GFX940-NEXT: v_mov_b32_e32 v4, v0 1359; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 1360; GFX940-NEXT: s_waitcnt vmcnt(0) 1361; GFX940-NEXT: s_setpc_b64 s[30:31] 1362 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1363 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 0, i32 0> 1364 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1365 ret void 1366} 1367 1368define void @v_shuffle_v3i32_v4i32__4_0_0(ptr addrspace(1) inreg %ptr) { 1369; GFX900-LABEL: v_shuffle_v3i32_v4i32__4_0_0: 1370; GFX900: ; %bb.0: 1371; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1372; GFX900-NEXT: ;;#ASMSTART 1373; GFX900-NEXT: ; def v[0:3] 1374; GFX900-NEXT: ;;#ASMEND 1375; GFX900-NEXT: v_mov_b32_e32 v4, 0 1376; GFX900-NEXT: v_mov_b32_e32 v1, v0 1377; GFX900-NEXT: v_mov_b32_e32 v2, v0 1378; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 1379; GFX900-NEXT: s_waitcnt vmcnt(0) 1380; GFX900-NEXT: s_setpc_b64 s[30:31] 1381; 1382; GFX90A-LABEL: v_shuffle_v3i32_v4i32__4_0_0: 1383; GFX90A: ; %bb.0: 1384; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1385; GFX90A-NEXT: ;;#ASMSTART 1386; GFX90A-NEXT: ; def v[0:3] 1387; GFX90A-NEXT: ;;#ASMEND 1388; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1389; GFX90A-NEXT: v_mov_b32_e32 v1, v0 1390; GFX90A-NEXT: v_mov_b32_e32 v2, v0 1391; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 1392; GFX90A-NEXT: s_waitcnt vmcnt(0) 1393; GFX90A-NEXT: s_setpc_b64 s[30:31] 1394; 1395; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_0_0: 1396; GFX940: ; %bb.0: 1397; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1398; GFX940-NEXT: ;;#ASMSTART 1399; GFX940-NEXT: ; def v[0:3] 1400; GFX940-NEXT: ;;#ASMEND 1401; GFX940-NEXT: v_mov_b32_e32 v4, 0 1402; GFX940-NEXT: v_mov_b32_e32 v1, v0 1403; GFX940-NEXT: v_mov_b32_e32 v2, v0 1404; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 1405; GFX940-NEXT: s_waitcnt vmcnt(0) 1406; GFX940-NEXT: s_setpc_b64 s[30:31] 1407 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1408 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 0, i32 0> 1409 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1410 ret void 1411} 1412 1413define void @v_shuffle_v3i32_v4i32__5_0_0(ptr addrspace(1) inreg %ptr) { 1414; GFX900-LABEL: v_shuffle_v3i32_v4i32__5_0_0: 1415; GFX900: ; %bb.0: 1416; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1417; GFX900-NEXT: ;;#ASMSTART 1418; GFX900-NEXT: ; def v[0:3] 1419; GFX900-NEXT: ;;#ASMEND 1420; GFX900-NEXT: ;;#ASMSTART 1421; GFX900-NEXT: ; def v[1:4] 1422; GFX900-NEXT: ;;#ASMEND 1423; GFX900-NEXT: v_mov_b32_e32 v5, 0 1424; GFX900-NEXT: v_mov_b32_e32 v3, v0 1425; GFX900-NEXT: v_mov_b32_e32 v4, v0 1426; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 1427; GFX900-NEXT: s_waitcnt vmcnt(0) 1428; GFX900-NEXT: s_setpc_b64 s[30:31] 1429; 1430; GFX90A-LABEL: v_shuffle_v3i32_v4i32__5_0_0: 1431; GFX90A: ; %bb.0: 1432; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1433; GFX90A-NEXT: ;;#ASMSTART 1434; GFX90A-NEXT: ; def v[0:3] 1435; GFX90A-NEXT: ;;#ASMEND 1436; GFX90A-NEXT: ;;#ASMSTART 1437; GFX90A-NEXT: ; def v[2:5] 1438; GFX90A-NEXT: ;;#ASMEND 1439; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1440; GFX90A-NEXT: v_mov_b32_e32 v2, v3 1441; GFX90A-NEXT: v_mov_b32_e32 v3, v0 1442; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1443; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 1444; GFX90A-NEXT: s_waitcnt vmcnt(0) 1445; GFX90A-NEXT: s_setpc_b64 s[30:31] 1446; 1447; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_0_0: 1448; GFX940: ; %bb.0: 1449; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1450; GFX940-NEXT: ;;#ASMSTART 1451; GFX940-NEXT: ; def v[0:3] 1452; GFX940-NEXT: ;;#ASMEND 1453; GFX940-NEXT: v_mov_b32_e32 v6, 0 1454; GFX940-NEXT: ;;#ASMSTART 1455; GFX940-NEXT: ; def v[2:5] 1456; GFX940-NEXT: ;;#ASMEND 1457; GFX940-NEXT: s_nop 0 1458; GFX940-NEXT: v_mov_b32_e32 v2, v3 1459; GFX940-NEXT: v_mov_b32_e32 v3, v0 1460; GFX940-NEXT: v_mov_b32_e32 v4, v0 1461; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 1462; GFX940-NEXT: s_waitcnt vmcnt(0) 1463; GFX940-NEXT: s_setpc_b64 s[30:31] 1464 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1465 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1466 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 0, i32 0> 1467 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1468 ret void 1469} 1470 1471define void @v_shuffle_v3i32_v4i32__6_0_0(ptr addrspace(1) inreg %ptr) { 1472; GFX900-LABEL: v_shuffle_v3i32_v4i32__6_0_0: 1473; GFX900: ; %bb.0: 1474; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1475; GFX900-NEXT: ;;#ASMSTART 1476; GFX900-NEXT: ; def v[0:3] 1477; GFX900-NEXT: ;;#ASMEND 1478; GFX900-NEXT: ;;#ASMSTART 1479; GFX900-NEXT: ; def v[1:4] 1480; GFX900-NEXT: ;;#ASMEND 1481; GFX900-NEXT: v_mov_b32_e32 v5, 0 1482; GFX900-NEXT: v_mov_b32_e32 v1, v3 1483; GFX900-NEXT: v_mov_b32_e32 v2, v0 1484; GFX900-NEXT: v_mov_b32_e32 v3, v0 1485; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17] 1486; GFX900-NEXT: s_waitcnt vmcnt(0) 1487; GFX900-NEXT: s_setpc_b64 s[30:31] 1488; 1489; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_0_0: 1490; GFX90A: ; %bb.0: 1491; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1492; GFX90A-NEXT: ;;#ASMSTART 1493; GFX90A-NEXT: ; def v[0:3] 1494; GFX90A-NEXT: ;;#ASMEND 1495; GFX90A-NEXT: ;;#ASMSTART 1496; GFX90A-NEXT: ; def v[2:5] 1497; GFX90A-NEXT: ;;#ASMEND 1498; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1499; GFX90A-NEXT: v_mov_b32_e32 v2, v4 1500; GFX90A-NEXT: v_mov_b32_e32 v3, v0 1501; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1502; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 1503; GFX90A-NEXT: s_waitcnt vmcnt(0) 1504; GFX90A-NEXT: s_setpc_b64 s[30:31] 1505; 1506; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_0_0: 1507; GFX940: ; %bb.0: 1508; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1509; GFX940-NEXT: ;;#ASMSTART 1510; GFX940-NEXT: ; def v[0:3] 1511; GFX940-NEXT: ;;#ASMEND 1512; GFX940-NEXT: v_mov_b32_e32 v6, 0 1513; GFX940-NEXT: ;;#ASMSTART 1514; GFX940-NEXT: ; def v[2:5] 1515; GFX940-NEXT: ;;#ASMEND 1516; GFX940-NEXT: s_nop 0 1517; GFX940-NEXT: v_mov_b32_e32 v2, v4 1518; GFX940-NEXT: v_mov_b32_e32 v3, v0 1519; GFX940-NEXT: v_mov_b32_e32 v4, v0 1520; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 1521; GFX940-NEXT: s_waitcnt vmcnt(0) 1522; GFX940-NEXT: s_setpc_b64 s[30:31] 1523 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1524 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1525 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 0, i32 0> 1526 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1527 ret void 1528} 1529 1530define void @v_shuffle_v3i32_v4i32__7_0_0(ptr addrspace(1) inreg %ptr) { 1531; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_0_0: 1532; GFX900: ; %bb.0: 1533; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1534; GFX900-NEXT: ;;#ASMSTART 1535; GFX900-NEXT: ; def v[0:3] 1536; GFX900-NEXT: ;;#ASMEND 1537; GFX900-NEXT: ;;#ASMSTART 1538; GFX900-NEXT: ; def v[1:4] 1539; GFX900-NEXT: ;;#ASMEND 1540; GFX900-NEXT: v_mov_b32_e32 v5, 0 1541; GFX900-NEXT: v_mov_b32_e32 v1, v4 1542; GFX900-NEXT: v_mov_b32_e32 v2, v0 1543; GFX900-NEXT: v_mov_b32_e32 v3, v0 1544; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17] 1545; GFX900-NEXT: s_waitcnt vmcnt(0) 1546; GFX900-NEXT: s_setpc_b64 s[30:31] 1547; 1548; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_0_0: 1549; GFX90A: ; %bb.0: 1550; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1551; GFX90A-NEXT: ;;#ASMSTART 1552; GFX90A-NEXT: ; def v[0:3] 1553; GFX90A-NEXT: ;;#ASMEND 1554; GFX90A-NEXT: ;;#ASMSTART 1555; GFX90A-NEXT: ; def v[2:5] 1556; GFX90A-NEXT: ;;#ASMEND 1557; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1558; GFX90A-NEXT: v_mov_b32_e32 v2, v5 1559; GFX90A-NEXT: v_mov_b32_e32 v3, v0 1560; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1561; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 1562; GFX90A-NEXT: s_waitcnt vmcnt(0) 1563; GFX90A-NEXT: s_setpc_b64 s[30:31] 1564; 1565; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_0: 1566; GFX940: ; %bb.0: 1567; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1568; GFX940-NEXT: ;;#ASMSTART 1569; GFX940-NEXT: ; def v[0:3] 1570; GFX940-NEXT: ;;#ASMEND 1571; GFX940-NEXT: v_mov_b32_e32 v6, 0 1572; GFX940-NEXT: ;;#ASMSTART 1573; GFX940-NEXT: ; def v[2:5] 1574; GFX940-NEXT: ;;#ASMEND 1575; GFX940-NEXT: s_nop 0 1576; GFX940-NEXT: v_mov_b32_e32 v2, v5 1577; GFX940-NEXT: v_mov_b32_e32 v3, v0 1578; GFX940-NEXT: v_mov_b32_e32 v4, v0 1579; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 1580; GFX940-NEXT: s_waitcnt vmcnt(0) 1581; GFX940-NEXT: s_setpc_b64 s[30:31] 1582 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1583 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1584 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 0> 1585 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1586 ret void 1587} 1588 1589define void @v_shuffle_v3i32_v4i32__7_u_0(ptr addrspace(1) inreg %ptr) { 1590; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_0: 1591; GFX900: ; %bb.0: 1592; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1593; GFX900-NEXT: ;;#ASMSTART 1594; GFX900-NEXT: ; def v[0:3] 1595; GFX900-NEXT: ;;#ASMEND 1596; GFX900-NEXT: ;;#ASMSTART 1597; GFX900-NEXT: ; def v[1:4] 1598; GFX900-NEXT: ;;#ASMEND 1599; GFX900-NEXT: v_mov_b32_e32 v5, 0 1600; GFX900-NEXT: v_mov_b32_e32 v1, v4 1601; GFX900-NEXT: v_mov_b32_e32 v3, v0 1602; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17] 1603; GFX900-NEXT: s_waitcnt vmcnt(0) 1604; GFX900-NEXT: s_setpc_b64 s[30:31] 1605; 1606; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_0: 1607; GFX90A: ; %bb.0: 1608; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1609; GFX90A-NEXT: ;;#ASMSTART 1610; GFX90A-NEXT: ; def v[0:3] 1611; GFX90A-NEXT: ;;#ASMEND 1612; GFX90A-NEXT: ;;#ASMSTART 1613; GFX90A-NEXT: ; def v[2:5] 1614; GFX90A-NEXT: ;;#ASMEND 1615; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1616; GFX90A-NEXT: v_mov_b32_e32 v2, v5 1617; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1618; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 1619; GFX90A-NEXT: s_waitcnt vmcnt(0) 1620; GFX90A-NEXT: s_setpc_b64 s[30:31] 1621; 1622; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_0: 1623; GFX940: ; %bb.0: 1624; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1625; GFX940-NEXT: ;;#ASMSTART 1626; GFX940-NEXT: ; def v[0:3] 1627; GFX940-NEXT: ;;#ASMEND 1628; GFX940-NEXT: v_mov_b32_e32 v6, 0 1629; GFX940-NEXT: ;;#ASMSTART 1630; GFX940-NEXT: ; def v[2:5] 1631; GFX940-NEXT: ;;#ASMEND 1632; GFX940-NEXT: s_nop 0 1633; GFX940-NEXT: v_mov_b32_e32 v2, v5 1634; GFX940-NEXT: v_mov_b32_e32 v4, v0 1635; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 1636; GFX940-NEXT: s_waitcnt vmcnt(0) 1637; GFX940-NEXT: s_setpc_b64 s[30:31] 1638 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1639 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1640 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 0> 1641 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1642 ret void 1643} 1644 1645define void @v_shuffle_v3i32_v4i32__7_1_0(ptr addrspace(1) inreg %ptr) { 1646; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_0: 1647; GFX900: ; %bb.0: 1648; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1649; GFX900-NEXT: ;;#ASMSTART 1650; GFX900-NEXT: ; def v[0:3] 1651; GFX900-NEXT: ;;#ASMEND 1652; GFX900-NEXT: ;;#ASMSTART 1653; GFX900-NEXT: ; def v[2:5] 1654; GFX900-NEXT: ;;#ASMEND 1655; GFX900-NEXT: v_mov_b32_e32 v6, 0 1656; GFX900-NEXT: v_mov_b32_e32 v2, v5 1657; GFX900-NEXT: v_mov_b32_e32 v3, v1 1658; GFX900-NEXT: v_mov_b32_e32 v4, v0 1659; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 1660; GFX900-NEXT: s_waitcnt vmcnt(0) 1661; GFX900-NEXT: s_setpc_b64 s[30:31] 1662; 1663; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_1_0: 1664; GFX90A: ; %bb.0: 1665; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1666; GFX90A-NEXT: ;;#ASMSTART 1667; GFX90A-NEXT: ; def v[0:3] 1668; GFX90A-NEXT: ;;#ASMEND 1669; GFX90A-NEXT: ;;#ASMSTART 1670; GFX90A-NEXT: ; def v[2:5] 1671; GFX90A-NEXT: ;;#ASMEND 1672; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1673; GFX90A-NEXT: v_mov_b32_e32 v2, v5 1674; GFX90A-NEXT: v_mov_b32_e32 v3, v1 1675; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1676; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 1677; GFX90A-NEXT: s_waitcnt vmcnt(0) 1678; GFX90A-NEXT: s_setpc_b64 s[30:31] 1679; 1680; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_0: 1681; GFX940: ; %bb.0: 1682; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1683; GFX940-NEXT: ;;#ASMSTART 1684; GFX940-NEXT: ; def v[0:3] 1685; GFX940-NEXT: ;;#ASMEND 1686; GFX940-NEXT: v_mov_b32_e32 v6, 0 1687; GFX940-NEXT: ;;#ASMSTART 1688; GFX940-NEXT: ; def v[2:5] 1689; GFX940-NEXT: ;;#ASMEND 1690; GFX940-NEXT: s_nop 0 1691; GFX940-NEXT: v_mov_b32_e32 v2, v5 1692; GFX940-NEXT: v_mov_b32_e32 v3, v1 1693; GFX940-NEXT: v_mov_b32_e32 v4, v0 1694; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 1695; GFX940-NEXT: s_waitcnt vmcnt(0) 1696; GFX940-NEXT: s_setpc_b64 s[30:31] 1697 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1698 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1699 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 0> 1700 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1701 ret void 1702} 1703 1704define void @v_shuffle_v3i32_v4i32__7_2_0(ptr addrspace(1) inreg %ptr) { 1705; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_0: 1706; GFX900: ; %bb.0: 1707; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1708; GFX900-NEXT: ;;#ASMSTART 1709; GFX900-NEXT: ; def v[0:3] 1710; GFX900-NEXT: ;;#ASMEND 1711; GFX900-NEXT: ;;#ASMSTART 1712; GFX900-NEXT: ; def v[3:6] 1713; GFX900-NEXT: ;;#ASMEND 1714; GFX900-NEXT: v_mov_b32_e32 v7, 0 1715; GFX900-NEXT: v_mov_b32_e32 v1, v6 1716; GFX900-NEXT: v_mov_b32_e32 v3, v0 1717; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17] 1718; GFX900-NEXT: s_waitcnt vmcnt(0) 1719; GFX900-NEXT: s_setpc_b64 s[30:31] 1720; 1721; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_2_0: 1722; GFX90A: ; %bb.0: 1723; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1724; GFX90A-NEXT: ;;#ASMSTART 1725; GFX90A-NEXT: ; def v[4:7] 1726; GFX90A-NEXT: ;;#ASMEND 1727; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1728; GFX90A-NEXT: ;;#ASMSTART 1729; GFX90A-NEXT: ; def v[0:3] 1730; GFX90A-NEXT: ;;#ASMEND 1731; GFX90A-NEXT: v_mov_b32_e32 v4, v7 1732; GFX90A-NEXT: v_mov_b32_e32 v5, v2 1733; GFX90A-NEXT: v_mov_b32_e32 v6, v0 1734; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 1735; GFX90A-NEXT: s_waitcnt vmcnt(0) 1736; GFX90A-NEXT: s_setpc_b64 s[30:31] 1737; 1738; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_0: 1739; GFX940: ; %bb.0: 1740; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1741; GFX940-NEXT: ;;#ASMSTART 1742; GFX940-NEXT: ; def v[4:7] 1743; GFX940-NEXT: ;;#ASMEND 1744; GFX940-NEXT: v_mov_b32_e32 v8, 0 1745; GFX940-NEXT: ;;#ASMSTART 1746; GFX940-NEXT: ; def v[0:3] 1747; GFX940-NEXT: ;;#ASMEND 1748; GFX940-NEXT: v_mov_b32_e32 v4, v7 1749; GFX940-NEXT: v_mov_b32_e32 v5, v2 1750; GFX940-NEXT: v_mov_b32_e32 v6, v0 1751; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 1752; GFX940-NEXT: s_waitcnt vmcnt(0) 1753; GFX940-NEXT: s_setpc_b64 s[30:31] 1754 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1755 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1756 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 0> 1757 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1758 ret void 1759} 1760 1761define void @v_shuffle_v3i32_v4i32__7_3_0(ptr addrspace(1) inreg %ptr) { 1762; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_0: 1763; GFX900: ; %bb.0: 1764; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1765; GFX900-NEXT: ;;#ASMSTART 1766; GFX900-NEXT: ; def v[0:3] 1767; GFX900-NEXT: ;;#ASMEND 1768; GFX900-NEXT: v_mov_b32_e32 v8, 0 1769; GFX900-NEXT: ;;#ASMSTART 1770; GFX900-NEXT: ; def v[4:7] 1771; GFX900-NEXT: ;;#ASMEND 1772; GFX900-NEXT: v_mov_b32_e32 v1, v7 1773; GFX900-NEXT: v_mov_b32_e32 v2, v3 1774; GFX900-NEXT: v_mov_b32_e32 v3, v0 1775; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17] 1776; GFX900-NEXT: s_waitcnt vmcnt(0) 1777; GFX900-NEXT: s_setpc_b64 s[30:31] 1778; 1779; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_3_0: 1780; GFX90A: ; %bb.0: 1781; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1782; GFX90A-NEXT: ;;#ASMSTART 1783; GFX90A-NEXT: ; def v[0:3] 1784; GFX90A-NEXT: ;;#ASMEND 1785; GFX90A-NEXT: ;;#ASMSTART 1786; GFX90A-NEXT: ; def v[4:7] 1787; GFX90A-NEXT: ;;#ASMEND 1788; GFX90A-NEXT: v_mov_b32_e32 v8, 0 1789; GFX90A-NEXT: v_mov_b32_e32 v2, v7 1790; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1791; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17] 1792; GFX90A-NEXT: s_waitcnt vmcnt(0) 1793; GFX90A-NEXT: s_setpc_b64 s[30:31] 1794; 1795; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_0: 1796; GFX940: ; %bb.0: 1797; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1798; GFX940-NEXT: ;;#ASMSTART 1799; GFX940-NEXT: ; def v[0:3] 1800; GFX940-NEXT: ;;#ASMEND 1801; GFX940-NEXT: ;;#ASMSTART 1802; GFX940-NEXT: ; def v[4:7] 1803; GFX940-NEXT: ;;#ASMEND 1804; GFX940-NEXT: v_mov_b32_e32 v8, 0 1805; GFX940-NEXT: v_mov_b32_e32 v2, v7 1806; GFX940-NEXT: v_mov_b32_e32 v4, v0 1807; GFX940-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] sc0 sc1 1808; GFX940-NEXT: s_waitcnt vmcnt(0) 1809; GFX940-NEXT: s_setpc_b64 s[30:31] 1810 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1811 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1812 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 0> 1813 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1814 ret void 1815} 1816 1817define void @v_shuffle_v3i32_v4i32__7_4_0(ptr addrspace(1) inreg %ptr) { 1818; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_0: 1819; GFX900: ; %bb.0: 1820; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1821; GFX900-NEXT: ;;#ASMSTART 1822; GFX900-NEXT: ; def v[0:3] 1823; GFX900-NEXT: ;;#ASMEND 1824; GFX900-NEXT: ;;#ASMSTART 1825; GFX900-NEXT: ; def v[1:4] 1826; GFX900-NEXT: ;;#ASMEND 1827; GFX900-NEXT: v_mov_b32_e32 v5, 0 1828; GFX900-NEXT: v_mov_b32_e32 v2, v4 1829; GFX900-NEXT: v_mov_b32_e32 v3, v1 1830; GFX900-NEXT: v_mov_b32_e32 v4, v0 1831; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 1832; GFX900-NEXT: s_waitcnt vmcnt(0) 1833; GFX900-NEXT: s_setpc_b64 s[30:31] 1834; 1835; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_4_0: 1836; GFX90A: ; %bb.0: 1837; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1838; GFX90A-NEXT: ;;#ASMSTART 1839; GFX90A-NEXT: ; def v[0:3] 1840; GFX90A-NEXT: ;;#ASMEND 1841; GFX90A-NEXT: ;;#ASMSTART 1842; GFX90A-NEXT: ; def v[2:5] 1843; GFX90A-NEXT: ;;#ASMEND 1844; GFX90A-NEXT: v_mov_b32_e32 v7, 0 1845; GFX90A-NEXT: v_mov_b32_e32 v4, v5 1846; GFX90A-NEXT: v_mov_b32_e32 v5, v2 1847; GFX90A-NEXT: v_mov_b32_e32 v6, v0 1848; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 1849; GFX90A-NEXT: s_waitcnt vmcnt(0) 1850; GFX90A-NEXT: s_setpc_b64 s[30:31] 1851; 1852; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_0: 1853; GFX940: ; %bb.0: 1854; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1855; GFX940-NEXT: ;;#ASMSTART 1856; GFX940-NEXT: ; def v[0:3] 1857; GFX940-NEXT: ;;#ASMEND 1858; GFX940-NEXT: v_mov_b32_e32 v7, 0 1859; GFX940-NEXT: ;;#ASMSTART 1860; GFX940-NEXT: ; def v[2:5] 1861; GFX940-NEXT: ;;#ASMEND 1862; GFX940-NEXT: v_mov_b32_e32 v6, v0 1863; GFX940-NEXT: v_mov_b32_e32 v4, v5 1864; GFX940-NEXT: v_mov_b32_e32 v5, v2 1865; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 1866; GFX940-NEXT: s_waitcnt vmcnt(0) 1867; GFX940-NEXT: s_setpc_b64 s[30:31] 1868 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1869 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1870 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 0> 1871 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1872 ret void 1873} 1874 1875define void @v_shuffle_v3i32_v4i32__7_5_0(ptr addrspace(1) inreg %ptr) { 1876; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_0: 1877; GFX900: ; %bb.0: 1878; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1879; GFX900-NEXT: ;;#ASMSTART 1880; GFX900-NEXT: ; def v[0:3] 1881; GFX900-NEXT: ;;#ASMEND 1882; GFX900-NEXT: ;;#ASMSTART 1883; GFX900-NEXT: ; def v[1:4] 1884; GFX900-NEXT: ;;#ASMEND 1885; GFX900-NEXT: v_mov_b32_e32 v5, 0 1886; GFX900-NEXT: v_mov_b32_e32 v1, v4 1887; GFX900-NEXT: v_mov_b32_e32 v3, v0 1888; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17] 1889; GFX900-NEXT: s_waitcnt vmcnt(0) 1890; GFX900-NEXT: s_setpc_b64 s[30:31] 1891; 1892; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_5_0: 1893; GFX90A: ; %bb.0: 1894; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1895; GFX90A-NEXT: ;;#ASMSTART 1896; GFX90A-NEXT: ; def v[0:3] 1897; GFX90A-NEXT: ;;#ASMEND 1898; GFX90A-NEXT: ;;#ASMSTART 1899; GFX90A-NEXT: ; def v[2:5] 1900; GFX90A-NEXT: ;;#ASMEND 1901; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1902; GFX90A-NEXT: v_mov_b32_e32 v2, v5 1903; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1904; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 1905; GFX90A-NEXT: s_waitcnt vmcnt(0) 1906; GFX90A-NEXT: s_setpc_b64 s[30:31] 1907; 1908; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_0: 1909; GFX940: ; %bb.0: 1910; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1911; GFX940-NEXT: ;;#ASMSTART 1912; GFX940-NEXT: ; def v[0:3] 1913; GFX940-NEXT: ;;#ASMEND 1914; GFX940-NEXT: v_mov_b32_e32 v6, 0 1915; GFX940-NEXT: ;;#ASMSTART 1916; GFX940-NEXT: ; def v[2:5] 1917; GFX940-NEXT: ;;#ASMEND 1918; GFX940-NEXT: s_nop 0 1919; GFX940-NEXT: v_mov_b32_e32 v2, v5 1920; GFX940-NEXT: v_mov_b32_e32 v4, v0 1921; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 1922; GFX940-NEXT: s_waitcnt vmcnt(0) 1923; GFX940-NEXT: s_setpc_b64 s[30:31] 1924 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1925 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1926 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 0> 1927 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1928 ret void 1929} 1930 1931define void @v_shuffle_v3i32_v4i32__7_6_0(ptr addrspace(1) inreg %ptr) { 1932; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_0: 1933; GFX900: ; %bb.0: 1934; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1935; GFX900-NEXT: ;;#ASMSTART 1936; GFX900-NEXT: ; def v[0:3] 1937; GFX900-NEXT: ;;#ASMEND 1938; GFX900-NEXT: ;;#ASMSTART 1939; GFX900-NEXT: ; def v[1:4] 1940; GFX900-NEXT: ;;#ASMEND 1941; GFX900-NEXT: v_mov_b32_e32 v5, 0 1942; GFX900-NEXT: v_mov_b32_e32 v2, v4 1943; GFX900-NEXT: v_mov_b32_e32 v4, v0 1944; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 1945; GFX900-NEXT: s_waitcnt vmcnt(0) 1946; GFX900-NEXT: s_setpc_b64 s[30:31] 1947; 1948; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_6_0: 1949; GFX90A: ; %bb.0: 1950; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1951; GFX90A-NEXT: ;;#ASMSTART 1952; GFX90A-NEXT: ; def v[0:3] 1953; GFX90A-NEXT: ;;#ASMEND 1954; GFX90A-NEXT: ;;#ASMSTART 1955; GFX90A-NEXT: ; def v[2:5] 1956; GFX90A-NEXT: ;;#ASMEND 1957; GFX90A-NEXT: v_mov_b32_e32 v6, 0 1958; GFX90A-NEXT: v_mov_b32_e32 v2, v5 1959; GFX90A-NEXT: v_mov_b32_e32 v3, v4 1960; GFX90A-NEXT: v_mov_b32_e32 v4, v0 1961; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 1962; GFX90A-NEXT: s_waitcnt vmcnt(0) 1963; GFX90A-NEXT: s_setpc_b64 s[30:31] 1964; 1965; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_0: 1966; GFX940: ; %bb.0: 1967; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1968; GFX940-NEXT: ;;#ASMSTART 1969; GFX940-NEXT: ; def v[0:3] 1970; GFX940-NEXT: ;;#ASMEND 1971; GFX940-NEXT: v_mov_b32_e32 v6, 0 1972; GFX940-NEXT: ;;#ASMSTART 1973; GFX940-NEXT: ; def v[2:5] 1974; GFX940-NEXT: ;;#ASMEND 1975; GFX940-NEXT: s_nop 0 1976; GFX940-NEXT: v_mov_b32_e32 v2, v5 1977; GFX940-NEXT: v_mov_b32_e32 v3, v4 1978; GFX940-NEXT: v_mov_b32_e32 v4, v0 1979; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 1980; GFX940-NEXT: s_waitcnt vmcnt(0) 1981; GFX940-NEXT: s_setpc_b64 s[30:31] 1982 %vec0 = call <4 x i32> asm "; def $0", "=v"() 1983 %vec1 = call <4 x i32> asm "; def $0", "=v"() 1984 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 0> 1985 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 1986 ret void 1987} 1988 1989define void @v_shuffle_v3i32_v4i32__u_1_1(ptr addrspace(1) inreg %ptr) { 1990; GFX900-LABEL: v_shuffle_v3i32_v4i32__u_1_1: 1991; GFX900: ; %bb.0: 1992; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1993; GFX900-NEXT: ;;#ASMSTART 1994; GFX900-NEXT: ; def v[0:3] 1995; GFX900-NEXT: ;;#ASMEND 1996; GFX900-NEXT: v_mov_b32_e32 v4, 0 1997; GFX900-NEXT: v_mov_b32_e32 v2, v1 1998; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 1999; GFX900-NEXT: s_waitcnt vmcnt(0) 2000; GFX900-NEXT: s_setpc_b64 s[30:31] 2001; 2002; GFX90A-LABEL: v_shuffle_v3i32_v4i32__u_1_1: 2003; GFX90A: ; %bb.0: 2004; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2005; GFX90A-NEXT: ;;#ASMSTART 2006; GFX90A-NEXT: ; def v[0:3] 2007; GFX90A-NEXT: ;;#ASMEND 2008; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2009; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2010; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2011; GFX90A-NEXT: s_waitcnt vmcnt(0) 2012; GFX90A-NEXT: s_setpc_b64 s[30:31] 2013; 2014; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_1_1: 2015; GFX940: ; %bb.0: 2016; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2017; GFX940-NEXT: ;;#ASMSTART 2018; GFX940-NEXT: ; def v[0:3] 2019; GFX940-NEXT: ;;#ASMEND 2020; GFX940-NEXT: v_mov_b32_e32 v4, 0 2021; GFX940-NEXT: v_mov_b32_e32 v2, v1 2022; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 2023; GFX940-NEXT: s_waitcnt vmcnt(0) 2024; GFX940-NEXT: s_setpc_b64 s[30:31] 2025 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2026 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 poison, i32 1, i32 1> 2027 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2028 ret void 2029} 2030 2031define void @v_shuffle_v3i32_v4i32__0_1_1(ptr addrspace(1) inreg %ptr) { 2032; GFX900-LABEL: v_shuffle_v3i32_v4i32__0_1_1: 2033; GFX900: ; %bb.0: 2034; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2035; GFX900-NEXT: ;;#ASMSTART 2036; GFX900-NEXT: ; def v[0:3] 2037; GFX900-NEXT: ;;#ASMEND 2038; GFX900-NEXT: v_mov_b32_e32 v4, 0 2039; GFX900-NEXT: v_mov_b32_e32 v2, v1 2040; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2041; GFX900-NEXT: s_waitcnt vmcnt(0) 2042; GFX900-NEXT: s_setpc_b64 s[30:31] 2043; 2044; GFX90A-LABEL: v_shuffle_v3i32_v4i32__0_1_1: 2045; GFX90A: ; %bb.0: 2046; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2047; GFX90A-NEXT: ;;#ASMSTART 2048; GFX90A-NEXT: ; def v[0:3] 2049; GFX90A-NEXT: ;;#ASMEND 2050; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2051; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2052; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2053; GFX90A-NEXT: s_waitcnt vmcnt(0) 2054; GFX90A-NEXT: s_setpc_b64 s[30:31] 2055; 2056; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_1_1: 2057; GFX940: ; %bb.0: 2058; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2059; GFX940-NEXT: ;;#ASMSTART 2060; GFX940-NEXT: ; def v[0:3] 2061; GFX940-NEXT: ;;#ASMEND 2062; GFX940-NEXT: v_mov_b32_e32 v4, 0 2063; GFX940-NEXT: v_mov_b32_e32 v2, v1 2064; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 2065; GFX940-NEXT: s_waitcnt vmcnt(0) 2066; GFX940-NEXT: s_setpc_b64 s[30:31] 2067 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2068 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 1> 2069 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2070 ret void 2071} 2072 2073define void @v_shuffle_v3i32_v4i32__1_1_1(ptr addrspace(1) inreg %ptr) { 2074; GFX900-LABEL: v_shuffle_v3i32_v4i32__1_1_1: 2075; GFX900: ; %bb.0: 2076; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2077; GFX900-NEXT: ;;#ASMSTART 2078; GFX900-NEXT: ; def v[0:3] 2079; GFX900-NEXT: ;;#ASMEND 2080; GFX900-NEXT: v_mov_b32_e32 v4, 0 2081; GFX900-NEXT: v_mov_b32_e32 v2, v1 2082; GFX900-NEXT: v_mov_b32_e32 v3, v1 2083; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 2084; GFX900-NEXT: s_waitcnt vmcnt(0) 2085; GFX900-NEXT: s_setpc_b64 s[30:31] 2086; 2087; GFX90A-LABEL: v_shuffle_v3i32_v4i32__1_1_1: 2088; GFX90A: ; %bb.0: 2089; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2090; GFX90A-NEXT: ;;#ASMSTART 2091; GFX90A-NEXT: ; def v[0:3] 2092; GFX90A-NEXT: ;;#ASMEND 2093; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2094; GFX90A-NEXT: v_mov_b32_e32 v0, v1 2095; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2096; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2097; GFX90A-NEXT: s_waitcnt vmcnt(0) 2098; GFX90A-NEXT: s_setpc_b64 s[30:31] 2099; 2100; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_1_1: 2101; GFX940: ; %bb.0: 2102; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2103; GFX940-NEXT: ;;#ASMSTART 2104; GFX940-NEXT: ; def v[0:3] 2105; GFX940-NEXT: ;;#ASMEND 2106; GFX940-NEXT: v_mov_b32_e32 v4, 0 2107; GFX940-NEXT: v_mov_b32_e32 v0, v1 2108; GFX940-NEXT: v_mov_b32_e32 v2, v1 2109; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 2110; GFX940-NEXT: s_waitcnt vmcnt(0) 2111; GFX940-NEXT: s_setpc_b64 s[30:31] 2112 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2113 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 1, i32 1> 2114 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2115 ret void 2116} 2117 2118define void @v_shuffle_v3i32_v4i32__2_1_1(ptr addrspace(1) inreg %ptr) { 2119; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_1_1: 2120; GFX900: ; %bb.0: 2121; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2122; GFX900-NEXT: ;;#ASMSTART 2123; GFX900-NEXT: ; def v[0:3] 2124; GFX900-NEXT: ;;#ASMEND 2125; GFX900-NEXT: v_mov_b32_e32 v4, 0 2126; GFX900-NEXT: v_mov_b32_e32 v0, v2 2127; GFX900-NEXT: v_mov_b32_e32 v2, v1 2128; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2129; GFX900-NEXT: s_waitcnt vmcnt(0) 2130; GFX900-NEXT: s_setpc_b64 s[30:31] 2131; 2132; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_1_1: 2133; GFX90A: ; %bb.0: 2134; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2135; GFX90A-NEXT: ;;#ASMSTART 2136; GFX90A-NEXT: ; def v[0:3] 2137; GFX90A-NEXT: ;;#ASMEND 2138; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2139; GFX90A-NEXT: v_mov_b32_e32 v0, v2 2140; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2141; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2142; GFX90A-NEXT: s_waitcnt vmcnt(0) 2143; GFX90A-NEXT: s_setpc_b64 s[30:31] 2144; 2145; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_1_1: 2146; GFX940: ; %bb.0: 2147; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2148; GFX940-NEXT: ;;#ASMSTART 2149; GFX940-NEXT: ; def v[0:3] 2150; GFX940-NEXT: ;;#ASMEND 2151; GFX940-NEXT: v_mov_b32_e32 v4, 0 2152; GFX940-NEXT: v_mov_b32_e32 v0, v2 2153; GFX940-NEXT: v_mov_b32_e32 v2, v1 2154; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 2155; GFX940-NEXT: s_waitcnt vmcnt(0) 2156; GFX940-NEXT: s_setpc_b64 s[30:31] 2157 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2158 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 1, i32 1> 2159 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2160 ret void 2161} 2162 2163define void @v_shuffle_v3i32_v4i32__3_1_1(ptr addrspace(1) inreg %ptr) { 2164; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_1_1: 2165; GFX900: ; %bb.0: 2166; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2167; GFX900-NEXT: ;;#ASMSTART 2168; GFX900-NEXT: ; def v[0:3] 2169; GFX900-NEXT: ;;#ASMEND 2170; GFX900-NEXT: v_mov_b32_e32 v4, 0 2171; GFX900-NEXT: v_mov_b32_e32 v0, v3 2172; GFX900-NEXT: v_mov_b32_e32 v2, v1 2173; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2174; GFX900-NEXT: s_waitcnt vmcnt(0) 2175; GFX900-NEXT: s_setpc_b64 s[30:31] 2176; 2177; GFX90A-LABEL: v_shuffle_v3i32_v4i32__3_1_1: 2178; GFX90A: ; %bb.0: 2179; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2180; GFX90A-NEXT: ;;#ASMSTART 2181; GFX90A-NEXT: ; def v[0:3] 2182; GFX90A-NEXT: ;;#ASMEND 2183; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2184; GFX90A-NEXT: v_mov_b32_e32 v0, v3 2185; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2186; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2187; GFX90A-NEXT: s_waitcnt vmcnt(0) 2188; GFX90A-NEXT: s_setpc_b64 s[30:31] 2189; 2190; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_1_1: 2191; GFX940: ; %bb.0: 2192; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2193; GFX940-NEXT: ;;#ASMSTART 2194; GFX940-NEXT: ; def v[0:3] 2195; GFX940-NEXT: ;;#ASMEND 2196; GFX940-NEXT: v_mov_b32_e32 v4, 0 2197; GFX940-NEXT: v_mov_b32_e32 v0, v3 2198; GFX940-NEXT: v_mov_b32_e32 v2, v1 2199; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 2200; GFX940-NEXT: s_waitcnt vmcnt(0) 2201; GFX940-NEXT: s_setpc_b64 s[30:31] 2202 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2203 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 1, i32 1> 2204 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2205 ret void 2206} 2207 2208define void @v_shuffle_v3i32_v4i32__4_1_1(ptr addrspace(1) inreg %ptr) { 2209; GFX900-LABEL: v_shuffle_v3i32_v4i32__4_1_1: 2210; GFX900: ; %bb.0: 2211; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2212; GFX900-NEXT: ;;#ASMSTART 2213; GFX900-NEXT: ; def v[0:3] 2214; GFX900-NEXT: ;;#ASMEND 2215; GFX900-NEXT: v_mov_b32_e32 v4, 0 2216; GFX900-NEXT: v_mov_b32_e32 v2, v1 2217; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2218; GFX900-NEXT: s_waitcnt vmcnt(0) 2219; GFX900-NEXT: s_setpc_b64 s[30:31] 2220; 2221; GFX90A-LABEL: v_shuffle_v3i32_v4i32__4_1_1: 2222; GFX90A: ; %bb.0: 2223; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2224; GFX90A-NEXT: ;;#ASMSTART 2225; GFX90A-NEXT: ; def v[0:3] 2226; GFX90A-NEXT: ;;#ASMEND 2227; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2228; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2229; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2230; GFX90A-NEXT: s_waitcnt vmcnt(0) 2231; GFX90A-NEXT: s_setpc_b64 s[30:31] 2232; 2233; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_1_1: 2234; GFX940: ; %bb.0: 2235; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2236; GFX940-NEXT: ;;#ASMSTART 2237; GFX940-NEXT: ; def v[0:3] 2238; GFX940-NEXT: ;;#ASMEND 2239; GFX940-NEXT: v_mov_b32_e32 v4, 0 2240; GFX940-NEXT: v_mov_b32_e32 v2, v1 2241; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 2242; GFX940-NEXT: s_waitcnt vmcnt(0) 2243; GFX940-NEXT: s_setpc_b64 s[30:31] 2244 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2245 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 1, i32 1> 2246 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2247 ret void 2248} 2249 2250define void @v_shuffle_v3i32_v4i32__5_1_1(ptr addrspace(1) inreg %ptr) { 2251; GFX900-LABEL: v_shuffle_v3i32_v4i32__5_1_1: 2252; GFX900: ; %bb.0: 2253; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2254; GFX900-NEXT: ;;#ASMSTART 2255; GFX900-NEXT: ; def v[0:3] 2256; GFX900-NEXT: ;;#ASMEND 2257; GFX900-NEXT: ;;#ASMSTART 2258; GFX900-NEXT: ; def v[2:5] 2259; GFX900-NEXT: ;;#ASMEND 2260; GFX900-NEXT: v_mov_b32_e32 v6, 0 2261; GFX900-NEXT: v_mov_b32_e32 v4, v1 2262; GFX900-NEXT: v_mov_b32_e32 v5, v1 2263; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17] 2264; GFX900-NEXT: s_waitcnt vmcnt(0) 2265; GFX900-NEXT: s_setpc_b64 s[30:31] 2266; 2267; GFX90A-LABEL: v_shuffle_v3i32_v4i32__5_1_1: 2268; GFX90A: ; %bb.0: 2269; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2270; GFX90A-NEXT: ;;#ASMSTART 2271; GFX90A-NEXT: ; def v[0:3] 2272; GFX90A-NEXT: ;;#ASMEND 2273; GFX90A-NEXT: ;;#ASMSTART 2274; GFX90A-NEXT: ; def v[2:5] 2275; GFX90A-NEXT: ;;#ASMEND 2276; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2277; GFX90A-NEXT: v_mov_b32_e32 v0, v3 2278; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2279; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 2280; GFX90A-NEXT: s_waitcnt vmcnt(0) 2281; GFX90A-NEXT: s_setpc_b64 s[30:31] 2282; 2283; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_1_1: 2284; GFX940: ; %bb.0: 2285; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2286; GFX940-NEXT: ;;#ASMSTART 2287; GFX940-NEXT: ; def v[0:3] 2288; GFX940-NEXT: ;;#ASMEND 2289; GFX940-NEXT: v_mov_b32_e32 v6, 0 2290; GFX940-NEXT: ;;#ASMSTART 2291; GFX940-NEXT: ; def v[2:5] 2292; GFX940-NEXT: ;;#ASMEND 2293; GFX940-NEXT: s_nop 0 2294; GFX940-NEXT: v_mov_b32_e32 v0, v3 2295; GFX940-NEXT: v_mov_b32_e32 v2, v1 2296; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 2297; GFX940-NEXT: s_waitcnt vmcnt(0) 2298; GFX940-NEXT: s_setpc_b64 s[30:31] 2299 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2300 %vec1 = call <4 x i32> asm "; def $0", "=v"() 2301 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 1, i32 1> 2302 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2303 ret void 2304} 2305 2306define void @v_shuffle_v3i32_v4i32__6_1_1(ptr addrspace(1) inreg %ptr) { 2307; GFX900-LABEL: v_shuffle_v3i32_v4i32__6_1_1: 2308; GFX900: ; %bb.0: 2309; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2310; GFX900-NEXT: ;;#ASMSTART 2311; GFX900-NEXT: ; def v[0:3] 2312; GFX900-NEXT: ;;#ASMEND 2313; GFX900-NEXT: ;;#ASMSTART 2314; GFX900-NEXT: ; def v[2:5] 2315; GFX900-NEXT: ;;#ASMEND 2316; GFX900-NEXT: v_mov_b32_e32 v6, 0 2317; GFX900-NEXT: v_mov_b32_e32 v0, v4 2318; GFX900-NEXT: v_mov_b32_e32 v2, v1 2319; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 2320; GFX900-NEXT: s_waitcnt vmcnt(0) 2321; GFX900-NEXT: s_setpc_b64 s[30:31] 2322; 2323; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_1_1: 2324; GFX90A: ; %bb.0: 2325; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2326; GFX90A-NEXT: ;;#ASMSTART 2327; GFX90A-NEXT: ; def v[0:3] 2328; GFX90A-NEXT: ;;#ASMEND 2329; GFX90A-NEXT: ;;#ASMSTART 2330; GFX90A-NEXT: ; def v[2:5] 2331; GFX90A-NEXT: ;;#ASMEND 2332; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2333; GFX90A-NEXT: v_mov_b32_e32 v0, v4 2334; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2335; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 2336; GFX90A-NEXT: s_waitcnt vmcnt(0) 2337; GFX90A-NEXT: s_setpc_b64 s[30:31] 2338; 2339; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_1_1: 2340; GFX940: ; %bb.0: 2341; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2342; GFX940-NEXT: ;;#ASMSTART 2343; GFX940-NEXT: ; def v[0:3] 2344; GFX940-NEXT: ;;#ASMEND 2345; GFX940-NEXT: v_mov_b32_e32 v6, 0 2346; GFX940-NEXT: ;;#ASMSTART 2347; GFX940-NEXT: ; def v[2:5] 2348; GFX940-NEXT: ;;#ASMEND 2349; GFX940-NEXT: s_nop 0 2350; GFX940-NEXT: v_mov_b32_e32 v0, v4 2351; GFX940-NEXT: v_mov_b32_e32 v2, v1 2352; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 2353; GFX940-NEXT: s_waitcnt vmcnt(0) 2354; GFX940-NEXT: s_setpc_b64 s[30:31] 2355 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2356 %vec1 = call <4 x i32> asm "; def $0", "=v"() 2357 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 1, i32 1> 2358 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2359 ret void 2360} 2361 2362define void @v_shuffle_v3i32_v4i32__7_1_1(ptr addrspace(1) inreg %ptr) { 2363; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_1: 2364; GFX900: ; %bb.0: 2365; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2366; GFX900-NEXT: ;;#ASMSTART 2367; GFX900-NEXT: ; def v[0:3] 2368; GFX900-NEXT: ;;#ASMEND 2369; GFX900-NEXT: ;;#ASMSTART 2370; GFX900-NEXT: ; def v[2:5] 2371; GFX900-NEXT: ;;#ASMEND 2372; GFX900-NEXT: v_mov_b32_e32 v6, 0 2373; GFX900-NEXT: v_mov_b32_e32 v0, v5 2374; GFX900-NEXT: v_mov_b32_e32 v2, v1 2375; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 2376; GFX900-NEXT: s_waitcnt vmcnt(0) 2377; GFX900-NEXT: s_setpc_b64 s[30:31] 2378; 2379; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_1_1: 2380; GFX90A: ; %bb.0: 2381; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2382; GFX90A-NEXT: ;;#ASMSTART 2383; GFX90A-NEXT: ; def v[0:3] 2384; GFX90A-NEXT: ;;#ASMEND 2385; GFX90A-NEXT: ;;#ASMSTART 2386; GFX90A-NEXT: ; def v[2:5] 2387; GFX90A-NEXT: ;;#ASMEND 2388; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2389; GFX90A-NEXT: v_mov_b32_e32 v0, v5 2390; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2391; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 2392; GFX90A-NEXT: s_waitcnt vmcnt(0) 2393; GFX90A-NEXT: s_setpc_b64 s[30:31] 2394; 2395; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_1: 2396; GFX940: ; %bb.0: 2397; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2398; GFX940-NEXT: ;;#ASMSTART 2399; GFX940-NEXT: ; def v[0:3] 2400; GFX940-NEXT: ;;#ASMEND 2401; GFX940-NEXT: v_mov_b32_e32 v6, 0 2402; GFX940-NEXT: ;;#ASMSTART 2403; GFX940-NEXT: ; def v[2:5] 2404; GFX940-NEXT: ;;#ASMEND 2405; GFX940-NEXT: s_nop 0 2406; GFX940-NEXT: v_mov_b32_e32 v0, v5 2407; GFX940-NEXT: v_mov_b32_e32 v2, v1 2408; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 2409; GFX940-NEXT: s_waitcnt vmcnt(0) 2410; GFX940-NEXT: s_setpc_b64 s[30:31] 2411 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2412 %vec1 = call <4 x i32> asm "; def $0", "=v"() 2413 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 1> 2414 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2415 ret void 2416} 2417 2418define void @v_shuffle_v3i32_v4i32__7_u_1(ptr addrspace(1) inreg %ptr) { 2419; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_1: 2420; GFX900: ; %bb.0: 2421; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2422; GFX900-NEXT: ;;#ASMSTART 2423; GFX900-NEXT: ; def v[0:3] 2424; GFX900-NEXT: ;;#ASMEND 2425; GFX900-NEXT: ;;#ASMSTART 2426; GFX900-NEXT: ; def v[2:5] 2427; GFX900-NEXT: ;;#ASMEND 2428; GFX900-NEXT: v_mov_b32_e32 v6, 0 2429; GFX900-NEXT: v_mov_b32_e32 v0, v5 2430; GFX900-NEXT: v_mov_b32_e32 v2, v1 2431; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 2432; GFX900-NEXT: s_waitcnt vmcnt(0) 2433; GFX900-NEXT: s_setpc_b64 s[30:31] 2434; 2435; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_1: 2436; GFX90A: ; %bb.0: 2437; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2438; GFX90A-NEXT: ;;#ASMSTART 2439; GFX90A-NEXT: ; def v[0:3] 2440; GFX90A-NEXT: ;;#ASMEND 2441; GFX90A-NEXT: ;;#ASMSTART 2442; GFX90A-NEXT: ; def v[2:5] 2443; GFX90A-NEXT: ;;#ASMEND 2444; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2445; GFX90A-NEXT: v_mov_b32_e32 v0, v5 2446; GFX90A-NEXT: v_mov_b32_e32 v2, v1 2447; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 2448; GFX90A-NEXT: s_waitcnt vmcnt(0) 2449; GFX90A-NEXT: s_setpc_b64 s[30:31] 2450; 2451; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_1: 2452; GFX940: ; %bb.0: 2453; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2454; GFX940-NEXT: ;;#ASMSTART 2455; GFX940-NEXT: ; def v[0:3] 2456; GFX940-NEXT: ;;#ASMEND 2457; GFX940-NEXT: v_mov_b32_e32 v6, 0 2458; GFX940-NEXT: ;;#ASMSTART 2459; GFX940-NEXT: ; def v[2:5] 2460; GFX940-NEXT: ;;#ASMEND 2461; GFX940-NEXT: s_nop 0 2462; GFX940-NEXT: v_mov_b32_e32 v0, v5 2463; GFX940-NEXT: v_mov_b32_e32 v2, v1 2464; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 2465; GFX940-NEXT: s_waitcnt vmcnt(0) 2466; GFX940-NEXT: s_setpc_b64 s[30:31] 2467 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2468 %vec1 = call <4 x i32> asm "; def $0", "=v"() 2469 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 1> 2470 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2471 ret void 2472} 2473 2474define void @v_shuffle_v3i32_v4i32__7_0_1(ptr addrspace(1) inreg %ptr) { 2475; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_0_1: 2476; GFX900: ; %bb.0: 2477; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2478; GFX900-NEXT: ;;#ASMSTART 2479; GFX900-NEXT: ; def v[0:3] 2480; GFX900-NEXT: ;;#ASMEND 2481; GFX900-NEXT: ;;#ASMSTART 2482; GFX900-NEXT: ; def v[2:5] 2483; GFX900-NEXT: ;;#ASMEND 2484; GFX900-NEXT: v_mov_b32_e32 v6, 0 2485; GFX900-NEXT: v_mov_b32_e32 v2, v5 2486; GFX900-NEXT: v_mov_b32_e32 v3, v0 2487; GFX900-NEXT: v_mov_b32_e32 v4, v1 2488; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 2489; GFX900-NEXT: s_waitcnt vmcnt(0) 2490; GFX900-NEXT: s_setpc_b64 s[30:31] 2491; 2492; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_0_1: 2493; GFX90A: ; %bb.0: 2494; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2495; GFX90A-NEXT: ;;#ASMSTART 2496; GFX90A-NEXT: ; def v[0:3] 2497; GFX90A-NEXT: ;;#ASMEND 2498; GFX90A-NEXT: ;;#ASMSTART 2499; GFX90A-NEXT: ; def v[2:5] 2500; GFX90A-NEXT: ;;#ASMEND 2501; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2502; GFX90A-NEXT: v_mov_b32_e32 v2, v5 2503; GFX90A-NEXT: v_mov_b32_e32 v3, v0 2504; GFX90A-NEXT: v_mov_b32_e32 v4, v1 2505; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 2506; GFX90A-NEXT: s_waitcnt vmcnt(0) 2507; GFX90A-NEXT: s_setpc_b64 s[30:31] 2508; 2509; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_1: 2510; GFX940: ; %bb.0: 2511; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2512; GFX940-NEXT: ;;#ASMSTART 2513; GFX940-NEXT: ; def v[0:3] 2514; GFX940-NEXT: ;;#ASMEND 2515; GFX940-NEXT: v_mov_b32_e32 v6, 0 2516; GFX940-NEXT: ;;#ASMSTART 2517; GFX940-NEXT: ; def v[2:5] 2518; GFX940-NEXT: ;;#ASMEND 2519; GFX940-NEXT: s_nop 0 2520; GFX940-NEXT: v_mov_b32_e32 v2, v5 2521; GFX940-NEXT: v_mov_b32_e32 v3, v0 2522; GFX940-NEXT: v_mov_b32_e32 v4, v1 2523; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 2524; GFX940-NEXT: s_waitcnt vmcnt(0) 2525; GFX940-NEXT: s_setpc_b64 s[30:31] 2526 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2527 %vec1 = call <4 x i32> asm "; def $0", "=v"() 2528 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 1> 2529 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2530 ret void 2531} 2532 2533define void @v_shuffle_v3i32_v4i32__7_2_1(ptr addrspace(1) inreg %ptr) { 2534; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_1: 2535; GFX900: ; %bb.0: 2536; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2537; GFX900-NEXT: ;;#ASMSTART 2538; GFX900-NEXT: ; def v[0:3] 2539; GFX900-NEXT: ;;#ASMEND 2540; GFX900-NEXT: ;;#ASMSTART 2541; GFX900-NEXT: ; def v[3:6] 2542; GFX900-NEXT: ;;#ASMEND 2543; GFX900-NEXT: v_mov_b32_e32 v7, 0 2544; GFX900-NEXT: v_mov_b32_e32 v3, v6 2545; GFX900-NEXT: v_mov_b32_e32 v4, v2 2546; GFX900-NEXT: v_mov_b32_e32 v5, v1 2547; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17] 2548; GFX900-NEXT: s_waitcnt vmcnt(0) 2549; GFX900-NEXT: s_setpc_b64 s[30:31] 2550; 2551; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_2_1: 2552; GFX90A: ; %bb.0: 2553; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2554; GFX90A-NEXT: ;;#ASMSTART 2555; GFX90A-NEXT: ; def v[4:7] 2556; GFX90A-NEXT: ;;#ASMEND 2557; GFX90A-NEXT: v_mov_b32_e32 v8, 0 2558; GFX90A-NEXT: ;;#ASMSTART 2559; GFX90A-NEXT: ; def v[0:3] 2560; GFX90A-NEXT: ;;#ASMEND 2561; GFX90A-NEXT: v_mov_b32_e32 v4, v7 2562; GFX90A-NEXT: v_mov_b32_e32 v5, v2 2563; GFX90A-NEXT: v_mov_b32_e32 v6, v1 2564; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 2565; GFX90A-NEXT: s_waitcnt vmcnt(0) 2566; GFX90A-NEXT: s_setpc_b64 s[30:31] 2567; 2568; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_1: 2569; GFX940: ; %bb.0: 2570; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2571; GFX940-NEXT: ;;#ASMSTART 2572; GFX940-NEXT: ; def v[4:7] 2573; GFX940-NEXT: ;;#ASMEND 2574; GFX940-NEXT: v_mov_b32_e32 v8, 0 2575; GFX940-NEXT: ;;#ASMSTART 2576; GFX940-NEXT: ; def v[0:3] 2577; GFX940-NEXT: ;;#ASMEND 2578; GFX940-NEXT: v_mov_b32_e32 v4, v7 2579; GFX940-NEXT: v_mov_b32_e32 v5, v2 2580; GFX940-NEXT: v_mov_b32_e32 v6, v1 2581; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 2582; GFX940-NEXT: s_waitcnt vmcnt(0) 2583; GFX940-NEXT: s_setpc_b64 s[30:31] 2584 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2585 %vec1 = call <4 x i32> asm "; def $0", "=v"() 2586 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 1> 2587 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2588 ret void 2589} 2590 2591define void @v_shuffle_v3i32_v4i32__7_3_1(ptr addrspace(1) inreg %ptr) { 2592; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_1: 2593; GFX900: ; %bb.0: 2594; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2595; GFX900-NEXT: ;;#ASMSTART 2596; GFX900-NEXT: ; def v[0:3] 2597; GFX900-NEXT: ;;#ASMEND 2598; GFX900-NEXT: ;;#ASMSTART 2599; GFX900-NEXT: ; def v[4:7] 2600; GFX900-NEXT: ;;#ASMEND 2601; GFX900-NEXT: v_mov_b32_e32 v8, 0 2602; GFX900-NEXT: v_mov_b32_e32 v2, v7 2603; GFX900-NEXT: v_mov_b32_e32 v4, v1 2604; GFX900-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17] 2605; GFX900-NEXT: s_waitcnt vmcnt(0) 2606; GFX900-NEXT: s_setpc_b64 s[30:31] 2607; 2608; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_3_1: 2609; GFX90A: ; %bb.0: 2610; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2611; GFX90A-NEXT: ;;#ASMSTART 2612; GFX90A-NEXT: ; def v[0:3] 2613; GFX90A-NEXT: ;;#ASMEND 2614; GFX90A-NEXT: ;;#ASMSTART 2615; GFX90A-NEXT: ; def v[4:7] 2616; GFX90A-NEXT: ;;#ASMEND 2617; GFX90A-NEXT: v_mov_b32_e32 v8, 0 2618; GFX90A-NEXT: v_mov_b32_e32 v2, v7 2619; GFX90A-NEXT: v_mov_b32_e32 v4, v1 2620; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17] 2621; GFX90A-NEXT: s_waitcnt vmcnt(0) 2622; GFX90A-NEXT: s_setpc_b64 s[30:31] 2623; 2624; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_1: 2625; GFX940: ; %bb.0: 2626; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2627; GFX940-NEXT: ;;#ASMSTART 2628; GFX940-NEXT: ; def v[0:3] 2629; GFX940-NEXT: ;;#ASMEND 2630; GFX940-NEXT: ;;#ASMSTART 2631; GFX940-NEXT: ; def v[4:7] 2632; GFX940-NEXT: ;;#ASMEND 2633; GFX940-NEXT: v_mov_b32_e32 v8, 0 2634; GFX940-NEXT: v_mov_b32_e32 v2, v7 2635; GFX940-NEXT: v_mov_b32_e32 v4, v1 2636; GFX940-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1] sc0 sc1 2637; GFX940-NEXT: s_waitcnt vmcnt(0) 2638; GFX940-NEXT: s_setpc_b64 s[30:31] 2639 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2640 %vec1 = call <4 x i32> asm "; def $0", "=v"() 2641 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 1> 2642 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2643 ret void 2644} 2645 2646define void @v_shuffle_v3i32_v4i32__7_4_1(ptr addrspace(1) inreg %ptr) { 2647; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_1: 2648; GFX900: ; %bb.0: 2649; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2650; GFX900-NEXT: ;;#ASMSTART 2651; GFX900-NEXT: ; def v[0:3] 2652; GFX900-NEXT: ;;#ASMEND 2653; GFX900-NEXT: ;;#ASMSTART 2654; GFX900-NEXT: ; def v[2:5] 2655; GFX900-NEXT: ;;#ASMEND 2656; GFX900-NEXT: v_mov_b32_e32 v6, 0 2657; GFX900-NEXT: v_mov_b32_e32 v3, v5 2658; GFX900-NEXT: v_mov_b32_e32 v4, v2 2659; GFX900-NEXT: v_mov_b32_e32 v5, v1 2660; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17] 2661; GFX900-NEXT: s_waitcnt vmcnt(0) 2662; GFX900-NEXT: s_setpc_b64 s[30:31] 2663; 2664; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_4_1: 2665; GFX90A: ; %bb.0: 2666; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2667; GFX90A-NEXT: ;;#ASMSTART 2668; GFX90A-NEXT: ; def v[0:3] 2669; GFX90A-NEXT: ;;#ASMEND 2670; GFX90A-NEXT: ;;#ASMSTART 2671; GFX90A-NEXT: ; def v[2:5] 2672; GFX90A-NEXT: ;;#ASMEND 2673; GFX90A-NEXT: v_mov_b32_e32 v7, 0 2674; GFX90A-NEXT: v_mov_b32_e32 v4, v5 2675; GFX90A-NEXT: v_mov_b32_e32 v5, v2 2676; GFX90A-NEXT: v_mov_b32_e32 v6, v1 2677; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 2678; GFX90A-NEXT: s_waitcnt vmcnt(0) 2679; GFX90A-NEXT: s_setpc_b64 s[30:31] 2680; 2681; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_1: 2682; GFX940: ; %bb.0: 2683; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2684; GFX940-NEXT: ;;#ASMSTART 2685; GFX940-NEXT: ; def v[0:3] 2686; GFX940-NEXT: ;;#ASMEND 2687; GFX940-NEXT: v_mov_b32_e32 v7, 0 2688; GFX940-NEXT: ;;#ASMSTART 2689; GFX940-NEXT: ; def v[2:5] 2690; GFX940-NEXT: ;;#ASMEND 2691; GFX940-NEXT: v_mov_b32_e32 v6, v1 2692; GFX940-NEXT: v_mov_b32_e32 v4, v5 2693; GFX940-NEXT: v_mov_b32_e32 v5, v2 2694; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 2695; GFX940-NEXT: s_waitcnt vmcnt(0) 2696; GFX940-NEXT: s_setpc_b64 s[30:31] 2697 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2698 %vec1 = call <4 x i32> asm "; def $0", "=v"() 2699 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 1> 2700 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2701 ret void 2702} 2703 2704define void @v_shuffle_v3i32_v4i32__7_5_1(ptr addrspace(1) inreg %ptr) { 2705; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_1: 2706; GFX900: ; %bb.0: 2707; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2708; GFX900-NEXT: ;;#ASMSTART 2709; GFX900-NEXT: ; def v[0:3] 2710; GFX900-NEXT: ;;#ASMEND 2711; GFX900-NEXT: ;;#ASMSTART 2712; GFX900-NEXT: ; def v[2:5] 2713; GFX900-NEXT: ;;#ASMEND 2714; GFX900-NEXT: v_mov_b32_e32 v6, 0 2715; GFX900-NEXT: v_mov_b32_e32 v2, v5 2716; GFX900-NEXT: v_mov_b32_e32 v4, v1 2717; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 2718; GFX900-NEXT: s_waitcnt vmcnt(0) 2719; GFX900-NEXT: s_setpc_b64 s[30:31] 2720; 2721; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_5_1: 2722; GFX90A: ; %bb.0: 2723; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2724; GFX90A-NEXT: ;;#ASMSTART 2725; GFX90A-NEXT: ; def v[0:3] 2726; GFX90A-NEXT: ;;#ASMEND 2727; GFX90A-NEXT: ;;#ASMSTART 2728; GFX90A-NEXT: ; def v[2:5] 2729; GFX90A-NEXT: ;;#ASMEND 2730; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2731; GFX90A-NEXT: v_mov_b32_e32 v2, v5 2732; GFX90A-NEXT: v_mov_b32_e32 v4, v1 2733; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 2734; GFX90A-NEXT: s_waitcnt vmcnt(0) 2735; GFX90A-NEXT: s_setpc_b64 s[30:31] 2736; 2737; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_1: 2738; GFX940: ; %bb.0: 2739; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2740; GFX940-NEXT: ;;#ASMSTART 2741; GFX940-NEXT: ; def v[0:3] 2742; GFX940-NEXT: ;;#ASMEND 2743; GFX940-NEXT: v_mov_b32_e32 v6, 0 2744; GFX940-NEXT: ;;#ASMSTART 2745; GFX940-NEXT: ; def v[2:5] 2746; GFX940-NEXT: ;;#ASMEND 2747; GFX940-NEXT: s_nop 0 2748; GFX940-NEXT: v_mov_b32_e32 v2, v5 2749; GFX940-NEXT: v_mov_b32_e32 v4, v1 2750; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 2751; GFX940-NEXT: s_waitcnt vmcnt(0) 2752; GFX940-NEXT: s_setpc_b64 s[30:31] 2753 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2754 %vec1 = call <4 x i32> asm "; def $0", "=v"() 2755 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 1> 2756 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2757 ret void 2758} 2759 2760define void @v_shuffle_v3i32_v4i32__7_6_1(ptr addrspace(1) inreg %ptr) { 2761; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_1: 2762; GFX900: ; %bb.0: 2763; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2764; GFX900-NEXT: ;;#ASMSTART 2765; GFX900-NEXT: ; def v[0:3] 2766; GFX900-NEXT: ;;#ASMEND 2767; GFX900-NEXT: ;;#ASMSTART 2768; GFX900-NEXT: ; def v[2:5] 2769; GFX900-NEXT: ;;#ASMEND 2770; GFX900-NEXT: v_mov_b32_e32 v6, 0 2771; GFX900-NEXT: v_mov_b32_e32 v3, v5 2772; GFX900-NEXT: v_mov_b32_e32 v5, v1 2773; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17] 2774; GFX900-NEXT: s_waitcnt vmcnt(0) 2775; GFX900-NEXT: s_setpc_b64 s[30:31] 2776; 2777; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_6_1: 2778; GFX90A: ; %bb.0: 2779; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2780; GFX90A-NEXT: ;;#ASMSTART 2781; GFX90A-NEXT: ; def v[0:3] 2782; GFX90A-NEXT: ;;#ASMEND 2783; GFX90A-NEXT: ;;#ASMSTART 2784; GFX90A-NEXT: ; def v[2:5] 2785; GFX90A-NEXT: ;;#ASMEND 2786; GFX90A-NEXT: v_mov_b32_e32 v6, 0 2787; GFX90A-NEXT: v_mov_b32_e32 v2, v5 2788; GFX90A-NEXT: v_mov_b32_e32 v3, v4 2789; GFX90A-NEXT: v_mov_b32_e32 v4, v1 2790; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 2791; GFX90A-NEXT: s_waitcnt vmcnt(0) 2792; GFX90A-NEXT: s_setpc_b64 s[30:31] 2793; 2794; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_1: 2795; GFX940: ; %bb.0: 2796; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2797; GFX940-NEXT: ;;#ASMSTART 2798; GFX940-NEXT: ; def v[0:3] 2799; GFX940-NEXT: ;;#ASMEND 2800; GFX940-NEXT: v_mov_b32_e32 v6, 0 2801; GFX940-NEXT: ;;#ASMSTART 2802; GFX940-NEXT: ; def v[2:5] 2803; GFX940-NEXT: ;;#ASMEND 2804; GFX940-NEXT: s_nop 0 2805; GFX940-NEXT: v_mov_b32_e32 v2, v5 2806; GFX940-NEXT: v_mov_b32_e32 v3, v4 2807; GFX940-NEXT: v_mov_b32_e32 v4, v1 2808; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 2809; GFX940-NEXT: s_waitcnt vmcnt(0) 2810; GFX940-NEXT: s_setpc_b64 s[30:31] 2811 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2812 %vec1 = call <4 x i32> asm "; def $0", "=v"() 2813 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 1> 2814 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2815 ret void 2816} 2817 2818define void @v_shuffle_v3i32_v4i32__u_2_2(ptr addrspace(1) inreg %ptr) { 2819; GFX900-LABEL: v_shuffle_v3i32_v4i32__u_2_2: 2820; GFX900: ; %bb.0: 2821; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2822; GFX900-NEXT: ;;#ASMSTART 2823; GFX900-NEXT: ; def v[0:3] 2824; GFX900-NEXT: ;;#ASMEND 2825; GFX900-NEXT: v_mov_b32_e32 v4, 0 2826; GFX900-NEXT: v_mov_b32_e32 v3, v2 2827; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 2828; GFX900-NEXT: s_waitcnt vmcnt(0) 2829; GFX900-NEXT: s_setpc_b64 s[30:31] 2830; 2831; GFX90A-LABEL: v_shuffle_v3i32_v4i32__u_2_2: 2832; GFX90A: ; %bb.0: 2833; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2834; GFX90A-NEXT: ;;#ASMSTART 2835; GFX90A-NEXT: ; def v[0:3] 2836; GFX90A-NEXT: ;;#ASMEND 2837; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2838; GFX90A-NEXT: v_mov_b32_e32 v1, v2 2839; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2840; GFX90A-NEXT: s_waitcnt vmcnt(0) 2841; GFX90A-NEXT: s_setpc_b64 s[30:31] 2842; 2843; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_2_2: 2844; GFX940: ; %bb.0: 2845; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2846; GFX940-NEXT: ;;#ASMSTART 2847; GFX940-NEXT: ; def v[0:3] 2848; GFX940-NEXT: ;;#ASMEND 2849; GFX940-NEXT: v_mov_b32_e32 v4, 0 2850; GFX940-NEXT: v_mov_b32_e32 v1, v2 2851; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 2852; GFX940-NEXT: s_waitcnt vmcnt(0) 2853; GFX940-NEXT: s_setpc_b64 s[30:31] 2854 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2855 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 poison, i32 2, i32 2> 2856 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2857 ret void 2858} 2859 2860define void @v_shuffle_v3i32_v4i32__0_2_2(ptr addrspace(1) inreg %ptr) { 2861; GFX900-LABEL: v_shuffle_v3i32_v4i32__0_2_2: 2862; GFX900: ; %bb.0: 2863; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2864; GFX900-NEXT: ;;#ASMSTART 2865; GFX900-NEXT: ; def v[0:3] 2866; GFX900-NEXT: ;;#ASMEND 2867; GFX900-NEXT: v_mov_b32_e32 v4, 0 2868; GFX900-NEXT: v_mov_b32_e32 v1, v2 2869; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2870; GFX900-NEXT: s_waitcnt vmcnt(0) 2871; GFX900-NEXT: s_setpc_b64 s[30:31] 2872; 2873; GFX90A-LABEL: v_shuffle_v3i32_v4i32__0_2_2: 2874; GFX90A: ; %bb.0: 2875; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2876; GFX90A-NEXT: ;;#ASMSTART 2877; GFX90A-NEXT: ; def v[0:3] 2878; GFX90A-NEXT: ;;#ASMEND 2879; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2880; GFX90A-NEXT: v_mov_b32_e32 v1, v2 2881; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2882; GFX90A-NEXT: s_waitcnt vmcnt(0) 2883; GFX90A-NEXT: s_setpc_b64 s[30:31] 2884; 2885; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_2_2: 2886; GFX940: ; %bb.0: 2887; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2888; GFX940-NEXT: ;;#ASMSTART 2889; GFX940-NEXT: ; def v[0:3] 2890; GFX940-NEXT: ;;#ASMEND 2891; GFX940-NEXT: v_mov_b32_e32 v4, 0 2892; GFX940-NEXT: v_mov_b32_e32 v1, v2 2893; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 2894; GFX940-NEXT: s_waitcnt vmcnt(0) 2895; GFX940-NEXT: s_setpc_b64 s[30:31] 2896 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2897 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 0, i32 2, i32 2> 2898 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2899 ret void 2900} 2901 2902define void @v_shuffle_v3i32_v4i32__1_2_2(ptr addrspace(1) inreg %ptr) { 2903; GFX900-LABEL: v_shuffle_v3i32_v4i32__1_2_2: 2904; GFX900: ; %bb.0: 2905; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2906; GFX900-NEXT: ;;#ASMSTART 2907; GFX900-NEXT: ; def v[0:3] 2908; GFX900-NEXT: ;;#ASMEND 2909; GFX900-NEXT: v_mov_b32_e32 v4, 0 2910; GFX900-NEXT: v_mov_b32_e32 v3, v2 2911; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 2912; GFX900-NEXT: s_waitcnt vmcnt(0) 2913; GFX900-NEXT: s_setpc_b64 s[30:31] 2914; 2915; GFX90A-LABEL: v_shuffle_v3i32_v4i32__1_2_2: 2916; GFX90A: ; %bb.0: 2917; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2918; GFX90A-NEXT: ;;#ASMSTART 2919; GFX90A-NEXT: ; def v[0:3] 2920; GFX90A-NEXT: ;;#ASMEND 2921; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2922; GFX90A-NEXT: v_mov_b32_e32 v0, v1 2923; GFX90A-NEXT: v_mov_b32_e32 v1, v2 2924; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2925; GFX90A-NEXT: s_waitcnt vmcnt(0) 2926; GFX90A-NEXT: s_setpc_b64 s[30:31] 2927; 2928; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_2_2: 2929; GFX940: ; %bb.0: 2930; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2931; GFX940-NEXT: ;;#ASMSTART 2932; GFX940-NEXT: ; def v[0:3] 2933; GFX940-NEXT: ;;#ASMEND 2934; GFX940-NEXT: v_mov_b32_e32 v4, 0 2935; GFX940-NEXT: v_mov_b32_e32 v0, v1 2936; GFX940-NEXT: v_mov_b32_e32 v1, v2 2937; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 2938; GFX940-NEXT: s_waitcnt vmcnt(0) 2939; GFX940-NEXT: s_setpc_b64 s[30:31] 2940 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2941 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 2> 2942 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2943 ret void 2944} 2945 2946define void @v_shuffle_v3i32_v4i32__2_2_2(ptr addrspace(1) inreg %ptr) { 2947; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_2_2: 2948; GFX900: ; %bb.0: 2949; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2950; GFX900-NEXT: ;;#ASMSTART 2951; GFX900-NEXT: ; def v[0:3] 2952; GFX900-NEXT: ;;#ASMEND 2953; GFX900-NEXT: v_mov_b32_e32 v4, 0 2954; GFX900-NEXT: v_mov_b32_e32 v1, v2 2955; GFX900-NEXT: v_mov_b32_e32 v3, v2 2956; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 2957; GFX900-NEXT: s_waitcnt vmcnt(0) 2958; GFX900-NEXT: s_setpc_b64 s[30:31] 2959; 2960; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_2_2: 2961; GFX90A: ; %bb.0: 2962; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2963; GFX90A-NEXT: ;;#ASMSTART 2964; GFX90A-NEXT: ; def v[0:3] 2965; GFX90A-NEXT: ;;#ASMEND 2966; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2967; GFX90A-NEXT: v_mov_b32_e32 v0, v2 2968; GFX90A-NEXT: v_mov_b32_e32 v1, v2 2969; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 2970; GFX90A-NEXT: s_waitcnt vmcnt(0) 2971; GFX90A-NEXT: s_setpc_b64 s[30:31] 2972; 2973; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_2_2: 2974; GFX940: ; %bb.0: 2975; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2976; GFX940-NEXT: ;;#ASMSTART 2977; GFX940-NEXT: ; def v[0:3] 2978; GFX940-NEXT: ;;#ASMEND 2979; GFX940-NEXT: v_mov_b32_e32 v4, 0 2980; GFX940-NEXT: v_mov_b32_e32 v0, v2 2981; GFX940-NEXT: v_mov_b32_e32 v1, v2 2982; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 2983; GFX940-NEXT: s_waitcnt vmcnt(0) 2984; GFX940-NEXT: s_setpc_b64 s[30:31] 2985 %vec0 = call <4 x i32> asm "; def $0", "=v"() 2986 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 2, i32 2> 2987 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 2988 ret void 2989} 2990 2991define void @v_shuffle_v3i32_v4i32__3_2_2(ptr addrspace(1) inreg %ptr) { 2992; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_2_2: 2993; GFX900: ; %bb.0: 2994; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2995; GFX900-NEXT: ;;#ASMSTART 2996; GFX900-NEXT: ; def v[0:3] 2997; GFX900-NEXT: ;;#ASMEND 2998; GFX900-NEXT: v_mov_b32_e32 v4, 0 2999; GFX900-NEXT: v_mov_b32_e32 v1, v3 3000; GFX900-NEXT: v_mov_b32_e32 v3, v2 3001; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 3002; GFX900-NEXT: s_waitcnt vmcnt(0) 3003; GFX900-NEXT: s_setpc_b64 s[30:31] 3004; 3005; GFX90A-LABEL: v_shuffle_v3i32_v4i32__3_2_2: 3006; GFX90A: ; %bb.0: 3007; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3008; GFX90A-NEXT: ;;#ASMSTART 3009; GFX90A-NEXT: ; def v[0:3] 3010; GFX90A-NEXT: ;;#ASMEND 3011; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3012; GFX90A-NEXT: v_mov_b32_e32 v0, v3 3013; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3014; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 3015; GFX90A-NEXT: s_waitcnt vmcnt(0) 3016; GFX90A-NEXT: s_setpc_b64 s[30:31] 3017; 3018; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_2_2: 3019; GFX940: ; %bb.0: 3020; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3021; GFX940-NEXT: ;;#ASMSTART 3022; GFX940-NEXT: ; def v[0:3] 3023; GFX940-NEXT: ;;#ASMEND 3024; GFX940-NEXT: v_mov_b32_e32 v4, 0 3025; GFX940-NEXT: v_mov_b32_e32 v0, v3 3026; GFX940-NEXT: v_mov_b32_e32 v1, v2 3027; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 3028; GFX940-NEXT: s_waitcnt vmcnt(0) 3029; GFX940-NEXT: s_setpc_b64 s[30:31] 3030 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3031 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 2, i32 2> 3032 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3033 ret void 3034} 3035 3036define void @v_shuffle_v3i32_v4i32__4_2_2(ptr addrspace(1) inreg %ptr) { 3037; GFX900-LABEL: v_shuffle_v3i32_v4i32__4_2_2: 3038; GFX900: ; %bb.0: 3039; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3040; GFX900-NEXT: ;;#ASMSTART 3041; GFX900-NEXT: ; def v[0:3] 3042; GFX900-NEXT: ;;#ASMEND 3043; GFX900-NEXT: v_mov_b32_e32 v4, 0 3044; GFX900-NEXT: v_mov_b32_e32 v3, v2 3045; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 3046; GFX900-NEXT: s_waitcnt vmcnt(0) 3047; GFX900-NEXT: s_setpc_b64 s[30:31] 3048; 3049; GFX90A-LABEL: v_shuffle_v3i32_v4i32__4_2_2: 3050; GFX90A: ; %bb.0: 3051; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3052; GFX90A-NEXT: ;;#ASMSTART 3053; GFX90A-NEXT: ; def v[0:3] 3054; GFX90A-NEXT: ;;#ASMEND 3055; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3056; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3057; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 3058; GFX90A-NEXT: s_waitcnt vmcnt(0) 3059; GFX90A-NEXT: s_setpc_b64 s[30:31] 3060; 3061; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_2_2: 3062; GFX940: ; %bb.0: 3063; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3064; GFX940-NEXT: ;;#ASMSTART 3065; GFX940-NEXT: ; def v[0:3] 3066; GFX940-NEXT: ;;#ASMEND 3067; GFX940-NEXT: v_mov_b32_e32 v4, 0 3068; GFX940-NEXT: v_mov_b32_e32 v1, v2 3069; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 3070; GFX940-NEXT: s_waitcnt vmcnt(0) 3071; GFX940-NEXT: s_setpc_b64 s[30:31] 3072 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3073 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 2, i32 2> 3074 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3075 ret void 3076} 3077 3078define void @v_shuffle_v3i32_v4i32__5_2_2(ptr addrspace(1) inreg %ptr) { 3079; GFX900-LABEL: v_shuffle_v3i32_v4i32__5_2_2: 3080; GFX900: ; %bb.0: 3081; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3082; GFX900-NEXT: ;;#ASMSTART 3083; GFX900-NEXT: ; def v[0:3] 3084; GFX900-NEXT: ;;#ASMEND 3085; GFX900-NEXT: ;;#ASMSTART 3086; GFX900-NEXT: ; def v[3:6] 3087; GFX900-NEXT: ;;#ASMEND 3088; GFX900-NEXT: v_mov_b32_e32 v7, 0 3089; GFX900-NEXT: v_mov_b32_e32 v5, v2 3090; GFX900-NEXT: v_mov_b32_e32 v6, v2 3091; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 3092; GFX900-NEXT: s_waitcnt vmcnt(0) 3093; GFX900-NEXT: s_setpc_b64 s[30:31] 3094; 3095; GFX90A-LABEL: v_shuffle_v3i32_v4i32__5_2_2: 3096; GFX90A: ; %bb.0: 3097; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3098; GFX90A-NEXT: ;;#ASMSTART 3099; GFX90A-NEXT: ; def v[0:3] 3100; GFX90A-NEXT: ;;#ASMEND 3101; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3102; GFX90A-NEXT: ;;#ASMSTART 3103; GFX90A-NEXT: ; def v[4:7] 3104; GFX90A-NEXT: ;;#ASMEND 3105; GFX90A-NEXT: v_mov_b32_e32 v0, v5 3106; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3107; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 3108; GFX90A-NEXT: s_waitcnt vmcnt(0) 3109; GFX90A-NEXT: s_setpc_b64 s[30:31] 3110; 3111; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_2_2: 3112; GFX940: ; %bb.0: 3113; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3114; GFX940-NEXT: ;;#ASMSTART 3115; GFX940-NEXT: ; def v[0:3] 3116; GFX940-NEXT: ;;#ASMEND 3117; GFX940-NEXT: v_mov_b32_e32 v8, 0 3118; GFX940-NEXT: ;;#ASMSTART 3119; GFX940-NEXT: ; def v[4:7] 3120; GFX940-NEXT: ;;#ASMEND 3121; GFX940-NEXT: v_mov_b32_e32 v1, v2 3122; GFX940-NEXT: v_mov_b32_e32 v0, v5 3123; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 3124; GFX940-NEXT: s_waitcnt vmcnt(0) 3125; GFX940-NEXT: s_setpc_b64 s[30:31] 3126 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3127 %vec1 = call <4 x i32> asm "; def $0", "=v"() 3128 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 2, i32 2> 3129 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3130 ret void 3131} 3132 3133define void @v_shuffle_v3i32_v4i32__6_2_2(ptr addrspace(1) inreg %ptr) { 3134; GFX900-LABEL: v_shuffle_v3i32_v4i32__6_2_2: 3135; GFX900: ; %bb.0: 3136; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3137; GFX900-NEXT: ;;#ASMSTART 3138; GFX900-NEXT: ; def v[0:3] 3139; GFX900-NEXT: ;;#ASMEND 3140; GFX900-NEXT: ;;#ASMSTART 3141; GFX900-NEXT: ; def v[3:6] 3142; GFX900-NEXT: ;;#ASMEND 3143; GFX900-NEXT: v_mov_b32_e32 v7, 0 3144; GFX900-NEXT: v_mov_b32_e32 v1, v5 3145; GFX900-NEXT: v_mov_b32_e32 v3, v2 3146; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17] 3147; GFX900-NEXT: s_waitcnt vmcnt(0) 3148; GFX900-NEXT: s_setpc_b64 s[30:31] 3149; 3150; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_2_2: 3151; GFX90A: ; %bb.0: 3152; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3153; GFX90A-NEXT: ;;#ASMSTART 3154; GFX90A-NEXT: ; def v[0:3] 3155; GFX90A-NEXT: ;;#ASMEND 3156; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3157; GFX90A-NEXT: ;;#ASMSTART 3158; GFX90A-NEXT: ; def v[4:7] 3159; GFX90A-NEXT: ;;#ASMEND 3160; GFX90A-NEXT: v_mov_b32_e32 v0, v6 3161; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3162; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 3163; GFX90A-NEXT: s_waitcnt vmcnt(0) 3164; GFX90A-NEXT: s_setpc_b64 s[30:31] 3165; 3166; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_2_2: 3167; GFX940: ; %bb.0: 3168; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3169; GFX940-NEXT: ;;#ASMSTART 3170; GFX940-NEXT: ; def v[0:3] 3171; GFX940-NEXT: ;;#ASMEND 3172; GFX940-NEXT: v_mov_b32_e32 v8, 0 3173; GFX940-NEXT: ;;#ASMSTART 3174; GFX940-NEXT: ; def v[4:7] 3175; GFX940-NEXT: ;;#ASMEND 3176; GFX940-NEXT: v_mov_b32_e32 v1, v2 3177; GFX940-NEXT: v_mov_b32_e32 v0, v6 3178; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 3179; GFX940-NEXT: s_waitcnt vmcnt(0) 3180; GFX940-NEXT: s_setpc_b64 s[30:31] 3181 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3182 %vec1 = call <4 x i32> asm "; def $0", "=v"() 3183 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 2, i32 2> 3184 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3185 ret void 3186} 3187 3188define void @v_shuffle_v3i32_v4i32__7_2_2(ptr addrspace(1) inreg %ptr) { 3189; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_2: 3190; GFX900: ; %bb.0: 3191; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3192; GFX900-NEXT: ;;#ASMSTART 3193; GFX900-NEXT: ; def v[0:3] 3194; GFX900-NEXT: ;;#ASMEND 3195; GFX900-NEXT: ;;#ASMSTART 3196; GFX900-NEXT: ; def v[3:6] 3197; GFX900-NEXT: ;;#ASMEND 3198; GFX900-NEXT: v_mov_b32_e32 v7, 0 3199; GFX900-NEXT: v_mov_b32_e32 v1, v6 3200; GFX900-NEXT: v_mov_b32_e32 v3, v2 3201; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17] 3202; GFX900-NEXT: s_waitcnt vmcnt(0) 3203; GFX900-NEXT: s_setpc_b64 s[30:31] 3204; 3205; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_2_2: 3206; GFX90A: ; %bb.0: 3207; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3208; GFX90A-NEXT: ;;#ASMSTART 3209; GFX90A-NEXT: ; def v[0:3] 3210; GFX90A-NEXT: ;;#ASMEND 3211; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3212; GFX90A-NEXT: ;;#ASMSTART 3213; GFX90A-NEXT: ; def v[4:7] 3214; GFX90A-NEXT: ;;#ASMEND 3215; GFX90A-NEXT: v_mov_b32_e32 v0, v7 3216; GFX90A-NEXT: v_mov_b32_e32 v1, v2 3217; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 3218; GFX90A-NEXT: s_waitcnt vmcnt(0) 3219; GFX90A-NEXT: s_setpc_b64 s[30:31] 3220; 3221; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_2: 3222; GFX940: ; %bb.0: 3223; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3224; GFX940-NEXT: ;;#ASMSTART 3225; GFX940-NEXT: ; def v[0:3] 3226; GFX940-NEXT: ;;#ASMEND 3227; GFX940-NEXT: v_mov_b32_e32 v8, 0 3228; GFX940-NEXT: ;;#ASMSTART 3229; GFX940-NEXT: ; def v[4:7] 3230; GFX940-NEXT: ;;#ASMEND 3231; GFX940-NEXT: v_mov_b32_e32 v1, v2 3232; GFX940-NEXT: v_mov_b32_e32 v0, v7 3233; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 3234; GFX940-NEXT: s_waitcnt vmcnt(0) 3235; GFX940-NEXT: s_setpc_b64 s[30:31] 3236 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3237 %vec1 = call <4 x i32> asm "; def $0", "=v"() 3238 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 2> 3239 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3240 ret void 3241} 3242 3243define void @v_shuffle_v3i32_v4i32__7_u_2(ptr addrspace(1) inreg %ptr) { 3244; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_2: 3245; GFX900: ; %bb.0: 3246; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3247; GFX900-NEXT: ;;#ASMSTART 3248; GFX900-NEXT: ; def v[0:3] 3249; GFX900-NEXT: ;;#ASMEND 3250; GFX900-NEXT: v_mov_b32_e32 v7, 0 3251; GFX900-NEXT: ;;#ASMSTART 3252; GFX900-NEXT: ; def v[3:6] 3253; GFX900-NEXT: ;;#ASMEND 3254; GFX900-NEXT: v_mov_b32_e32 v0, v6 3255; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17] 3256; GFX900-NEXT: s_waitcnt vmcnt(0) 3257; GFX900-NEXT: s_setpc_b64 s[30:31] 3258; 3259; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_2: 3260; GFX90A: ; %bb.0: 3261; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3262; GFX90A-NEXT: ;;#ASMSTART 3263; GFX90A-NEXT: ; def v[0:3] 3264; GFX90A-NEXT: ;;#ASMEND 3265; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3266; GFX90A-NEXT: ;;#ASMSTART 3267; GFX90A-NEXT: ; def v[4:7] 3268; GFX90A-NEXT: ;;#ASMEND 3269; GFX90A-NEXT: v_mov_b32_e32 v0, v7 3270; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 3271; GFX90A-NEXT: s_waitcnt vmcnt(0) 3272; GFX90A-NEXT: s_setpc_b64 s[30:31] 3273; 3274; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_2: 3275; GFX940: ; %bb.0: 3276; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3277; GFX940-NEXT: ;;#ASMSTART 3278; GFX940-NEXT: ; def v[0:3] 3279; GFX940-NEXT: ;;#ASMEND 3280; GFX940-NEXT: v_mov_b32_e32 v8, 0 3281; GFX940-NEXT: ;;#ASMSTART 3282; GFX940-NEXT: ; def v[4:7] 3283; GFX940-NEXT: ;;#ASMEND 3284; GFX940-NEXT: s_nop 0 3285; GFX940-NEXT: v_mov_b32_e32 v0, v7 3286; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 3287; GFX940-NEXT: s_waitcnt vmcnt(0) 3288; GFX940-NEXT: s_setpc_b64 s[30:31] 3289 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3290 %vec1 = call <4 x i32> asm "; def $0", "=v"() 3291 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 2> 3292 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3293 ret void 3294} 3295 3296define void @v_shuffle_v3i32_v4i32__7_0_2(ptr addrspace(1) inreg %ptr) { 3297; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_0_2: 3298; GFX900: ; %bb.0: 3299; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3300; GFX900-NEXT: ;;#ASMSTART 3301; GFX900-NEXT: ; def v[0:3] 3302; GFX900-NEXT: ;;#ASMEND 3303; GFX900-NEXT: ;;#ASMSTART 3304; GFX900-NEXT: ; def v[3:6] 3305; GFX900-NEXT: ;;#ASMEND 3306; GFX900-NEXT: v_mov_b32_e32 v7, 0 3307; GFX900-NEXT: v_mov_b32_e32 v3, v6 3308; GFX900-NEXT: v_mov_b32_e32 v4, v0 3309; GFX900-NEXT: v_mov_b32_e32 v5, v2 3310; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17] 3311; GFX900-NEXT: s_waitcnt vmcnt(0) 3312; GFX900-NEXT: s_setpc_b64 s[30:31] 3313; 3314; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_0_2: 3315; GFX90A: ; %bb.0: 3316; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3317; GFX90A-NEXT: ;;#ASMSTART 3318; GFX90A-NEXT: ; def v[4:7] 3319; GFX90A-NEXT: ;;#ASMEND 3320; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3321; GFX90A-NEXT: ;;#ASMSTART 3322; GFX90A-NEXT: ; def v[0:3] 3323; GFX90A-NEXT: ;;#ASMEND 3324; GFX90A-NEXT: v_mov_b32_e32 v4, v7 3325; GFX90A-NEXT: v_mov_b32_e32 v5, v0 3326; GFX90A-NEXT: v_mov_b32_e32 v6, v2 3327; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 3328; GFX90A-NEXT: s_waitcnt vmcnt(0) 3329; GFX90A-NEXT: s_setpc_b64 s[30:31] 3330; 3331; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_2: 3332; GFX940: ; %bb.0: 3333; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3334; GFX940-NEXT: ;;#ASMSTART 3335; GFX940-NEXT: ; def v[4:7] 3336; GFX940-NEXT: ;;#ASMEND 3337; GFX940-NEXT: v_mov_b32_e32 v8, 0 3338; GFX940-NEXT: ;;#ASMSTART 3339; GFX940-NEXT: ; def v[0:3] 3340; GFX940-NEXT: ;;#ASMEND 3341; GFX940-NEXT: v_mov_b32_e32 v4, v7 3342; GFX940-NEXT: v_mov_b32_e32 v5, v0 3343; GFX940-NEXT: v_mov_b32_e32 v6, v2 3344; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 3345; GFX940-NEXT: s_waitcnt vmcnt(0) 3346; GFX940-NEXT: s_setpc_b64 s[30:31] 3347 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3348 %vec1 = call <4 x i32> asm "; def $0", "=v"() 3349 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 2> 3350 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3351 ret void 3352} 3353 3354define void @v_shuffle_v3i32_v4i32__7_1_2(ptr addrspace(1) inreg %ptr) { 3355; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_2: 3356; GFX900: ; %bb.0: 3357; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3358; GFX900-NEXT: ;;#ASMSTART 3359; GFX900-NEXT: ; def v[0:3] 3360; GFX900-NEXT: ;;#ASMEND 3361; GFX900-NEXT: v_mov_b32_e32 v7, 0 3362; GFX900-NEXT: ;;#ASMSTART 3363; GFX900-NEXT: ; def v[3:6] 3364; GFX900-NEXT: ;;#ASMEND 3365; GFX900-NEXT: v_mov_b32_e32 v0, v6 3366; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17] 3367; GFX900-NEXT: s_waitcnt vmcnt(0) 3368; GFX900-NEXT: s_setpc_b64 s[30:31] 3369; 3370; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_1_2: 3371; GFX90A: ; %bb.0: 3372; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3373; GFX90A-NEXT: ;;#ASMSTART 3374; GFX90A-NEXT: ; def v[0:3] 3375; GFX90A-NEXT: ;;#ASMEND 3376; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3377; GFX90A-NEXT: ;;#ASMSTART 3378; GFX90A-NEXT: ; def v[4:7] 3379; GFX90A-NEXT: ;;#ASMEND 3380; GFX90A-NEXT: v_mov_b32_e32 v0, v7 3381; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 3382; GFX90A-NEXT: s_waitcnt vmcnt(0) 3383; GFX90A-NEXT: s_setpc_b64 s[30:31] 3384; 3385; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_2: 3386; GFX940: ; %bb.0: 3387; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3388; GFX940-NEXT: ;;#ASMSTART 3389; GFX940-NEXT: ; def v[0:3] 3390; GFX940-NEXT: ;;#ASMEND 3391; GFX940-NEXT: v_mov_b32_e32 v8, 0 3392; GFX940-NEXT: ;;#ASMSTART 3393; GFX940-NEXT: ; def v[4:7] 3394; GFX940-NEXT: ;;#ASMEND 3395; GFX940-NEXT: s_nop 0 3396; GFX940-NEXT: v_mov_b32_e32 v0, v7 3397; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 3398; GFX940-NEXT: s_waitcnt vmcnt(0) 3399; GFX940-NEXT: s_setpc_b64 s[30:31] 3400 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3401 %vec1 = call <4 x i32> asm "; def $0", "=v"() 3402 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 2> 3403 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3404 ret void 3405} 3406 3407define void @v_shuffle_v3i32_v4i32__7_3_2(ptr addrspace(1) inreg %ptr) { 3408; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_2: 3409; GFX900: ; %bb.0: 3410; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3411; GFX900-NEXT: ;;#ASMSTART 3412; GFX900-NEXT: ; def v[0:3] 3413; GFX900-NEXT: ;;#ASMEND 3414; GFX900-NEXT: v_mov_b32_e32 v8, 0 3415; GFX900-NEXT: ;;#ASMSTART 3416; GFX900-NEXT: ; def v[4:7] 3417; GFX900-NEXT: ;;#ASMEND 3418; GFX900-NEXT: v_mov_b32_e32 v0, v7 3419; GFX900-NEXT: v_mov_b32_e32 v1, v3 3420; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 3421; GFX900-NEXT: s_waitcnt vmcnt(0) 3422; GFX900-NEXT: s_setpc_b64 s[30:31] 3423; 3424; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_3_2: 3425; GFX90A: ; %bb.0: 3426; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3427; GFX90A-NEXT: ;;#ASMSTART 3428; GFX90A-NEXT: ; def v[0:3] 3429; GFX90A-NEXT: ;;#ASMEND 3430; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3431; GFX90A-NEXT: ;;#ASMSTART 3432; GFX90A-NEXT: ; def v[4:7] 3433; GFX90A-NEXT: ;;#ASMEND 3434; GFX90A-NEXT: v_mov_b32_e32 v0, v7 3435; GFX90A-NEXT: v_mov_b32_e32 v1, v3 3436; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 3437; GFX90A-NEXT: s_waitcnt vmcnt(0) 3438; GFX90A-NEXT: s_setpc_b64 s[30:31] 3439; 3440; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_2: 3441; GFX940: ; %bb.0: 3442; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3443; GFX940-NEXT: ;;#ASMSTART 3444; GFX940-NEXT: ; def v[0:3] 3445; GFX940-NEXT: ;;#ASMEND 3446; GFX940-NEXT: v_mov_b32_e32 v8, 0 3447; GFX940-NEXT: ;;#ASMSTART 3448; GFX940-NEXT: ; def v[4:7] 3449; GFX940-NEXT: ;;#ASMEND 3450; GFX940-NEXT: v_mov_b32_e32 v1, v3 3451; GFX940-NEXT: v_mov_b32_e32 v0, v7 3452; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 3453; GFX940-NEXT: s_waitcnt vmcnt(0) 3454; GFX940-NEXT: s_setpc_b64 s[30:31] 3455 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3456 %vec1 = call <4 x i32> asm "; def $0", "=v"() 3457 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 2> 3458 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3459 ret void 3460} 3461 3462define void @v_shuffle_v3i32_v4i32__7_4_2(ptr addrspace(1) inreg %ptr) { 3463; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_2: 3464; GFX900: ; %bb.0: 3465; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3466; GFX900-NEXT: ;;#ASMSTART 3467; GFX900-NEXT: ; def v[0:3] 3468; GFX900-NEXT: ;;#ASMEND 3469; GFX900-NEXT: v_mov_b32_e32 v7, 0 3470; GFX900-NEXT: ;;#ASMSTART 3471; GFX900-NEXT: ; def v[3:6] 3472; GFX900-NEXT: ;;#ASMEND 3473; GFX900-NEXT: v_mov_b32_e32 v0, v6 3474; GFX900-NEXT: v_mov_b32_e32 v1, v3 3475; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17] 3476; GFX900-NEXT: s_waitcnt vmcnt(0) 3477; GFX900-NEXT: s_setpc_b64 s[30:31] 3478; 3479; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_4_2: 3480; GFX90A: ; %bb.0: 3481; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3482; GFX90A-NEXT: ;;#ASMSTART 3483; GFX90A-NEXT: ; def v[0:3] 3484; GFX90A-NEXT: ;;#ASMEND 3485; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3486; GFX90A-NEXT: ;;#ASMSTART 3487; GFX90A-NEXT: ; def v[4:7] 3488; GFX90A-NEXT: ;;#ASMEND 3489; GFX90A-NEXT: v_mov_b32_e32 v0, v7 3490; GFX90A-NEXT: v_mov_b32_e32 v1, v4 3491; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 3492; GFX90A-NEXT: s_waitcnt vmcnt(0) 3493; GFX90A-NEXT: s_setpc_b64 s[30:31] 3494; 3495; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_2: 3496; GFX940: ; %bb.0: 3497; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3498; GFX940-NEXT: ;;#ASMSTART 3499; GFX940-NEXT: ; def v[0:3] 3500; GFX940-NEXT: ;;#ASMEND 3501; GFX940-NEXT: v_mov_b32_e32 v8, 0 3502; GFX940-NEXT: ;;#ASMSTART 3503; GFX940-NEXT: ; def v[4:7] 3504; GFX940-NEXT: ;;#ASMEND 3505; GFX940-NEXT: s_nop 0 3506; GFX940-NEXT: v_mov_b32_e32 v0, v7 3507; GFX940-NEXT: v_mov_b32_e32 v1, v4 3508; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 3509; GFX940-NEXT: s_waitcnt vmcnt(0) 3510; GFX940-NEXT: s_setpc_b64 s[30:31] 3511 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3512 %vec1 = call <4 x i32> asm "; def $0", "=v"() 3513 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 2> 3514 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3515 ret void 3516} 3517 3518define void @v_shuffle_v3i32_v4i32__7_5_2(ptr addrspace(1) inreg %ptr) { 3519; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_2: 3520; GFX900: ; %bb.0: 3521; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3522; GFX900-NEXT: ;;#ASMSTART 3523; GFX900-NEXT: ; def v[0:3] 3524; GFX900-NEXT: ;;#ASMEND 3525; GFX900-NEXT: ;;#ASMSTART 3526; GFX900-NEXT: ; def v[3:6] 3527; GFX900-NEXT: ;;#ASMEND 3528; GFX900-NEXT: v_mov_b32_e32 v7, 0 3529; GFX900-NEXT: v_mov_b32_e32 v3, v6 3530; GFX900-NEXT: v_mov_b32_e32 v5, v2 3531; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17] 3532; GFX900-NEXT: s_waitcnt vmcnt(0) 3533; GFX900-NEXT: s_setpc_b64 s[30:31] 3534; 3535; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_5_2: 3536; GFX90A: ; %bb.0: 3537; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3538; GFX90A-NEXT: ;;#ASMSTART 3539; GFX90A-NEXT: ; def v[4:7] 3540; GFX90A-NEXT: ;;#ASMEND 3541; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3542; GFX90A-NEXT: ;;#ASMSTART 3543; GFX90A-NEXT: ; def v[0:3] 3544; GFX90A-NEXT: ;;#ASMEND 3545; GFX90A-NEXT: v_mov_b32_e32 v4, v7 3546; GFX90A-NEXT: v_mov_b32_e32 v6, v2 3547; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 3548; GFX90A-NEXT: s_waitcnt vmcnt(0) 3549; GFX90A-NEXT: s_setpc_b64 s[30:31] 3550; 3551; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_2: 3552; GFX940: ; %bb.0: 3553; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3554; GFX940-NEXT: ;;#ASMSTART 3555; GFX940-NEXT: ; def v[4:7] 3556; GFX940-NEXT: ;;#ASMEND 3557; GFX940-NEXT: v_mov_b32_e32 v8, 0 3558; GFX940-NEXT: ;;#ASMSTART 3559; GFX940-NEXT: ; def v[0:3] 3560; GFX940-NEXT: ;;#ASMEND 3561; GFX940-NEXT: v_mov_b32_e32 v4, v7 3562; GFX940-NEXT: v_mov_b32_e32 v6, v2 3563; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 3564; GFX940-NEXT: s_waitcnt vmcnt(0) 3565; GFX940-NEXT: s_setpc_b64 s[30:31] 3566 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3567 %vec1 = call <4 x i32> asm "; def $0", "=v"() 3568 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 2> 3569 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3570 ret void 3571} 3572 3573define void @v_shuffle_v3i32_v4i32__7_6_2(ptr addrspace(1) inreg %ptr) { 3574; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_2: 3575; GFX900: ; %bb.0: 3576; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3577; GFX900-NEXT: ;;#ASMSTART 3578; GFX900-NEXT: ; def v[0:3] 3579; GFX900-NEXT: ;;#ASMEND 3580; GFX900-NEXT: ;;#ASMSTART 3581; GFX900-NEXT: ; def v[3:6] 3582; GFX900-NEXT: ;;#ASMEND 3583; GFX900-NEXT: v_mov_b32_e32 v7, 0 3584; GFX900-NEXT: v_mov_b32_e32 v4, v6 3585; GFX900-NEXT: v_mov_b32_e32 v6, v2 3586; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 3587; GFX900-NEXT: s_waitcnt vmcnt(0) 3588; GFX900-NEXT: s_setpc_b64 s[30:31] 3589; 3590; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_6_2: 3591; GFX90A: ; %bb.0: 3592; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3593; GFX90A-NEXT: ;;#ASMSTART 3594; GFX90A-NEXT: ; def v[0:3] 3595; GFX90A-NEXT: ;;#ASMEND 3596; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3597; GFX90A-NEXT: ;;#ASMSTART 3598; GFX90A-NEXT: ; def v[4:7] 3599; GFX90A-NEXT: ;;#ASMEND 3600; GFX90A-NEXT: v_mov_b32_e32 v0, v7 3601; GFX90A-NEXT: v_mov_b32_e32 v1, v6 3602; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 3603; GFX90A-NEXT: s_waitcnt vmcnt(0) 3604; GFX90A-NEXT: s_setpc_b64 s[30:31] 3605; 3606; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_2: 3607; GFX940: ; %bb.0: 3608; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3609; GFX940-NEXT: ;;#ASMSTART 3610; GFX940-NEXT: ; def v[0:3] 3611; GFX940-NEXT: ;;#ASMEND 3612; GFX940-NEXT: v_mov_b32_e32 v8, 0 3613; GFX940-NEXT: ;;#ASMSTART 3614; GFX940-NEXT: ; def v[4:7] 3615; GFX940-NEXT: ;;#ASMEND 3616; GFX940-NEXT: s_nop 0 3617; GFX940-NEXT: v_mov_b32_e32 v0, v7 3618; GFX940-NEXT: v_mov_b32_e32 v1, v6 3619; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 3620; GFX940-NEXT: s_waitcnt vmcnt(0) 3621; GFX940-NEXT: s_setpc_b64 s[30:31] 3622 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3623 %vec1 = call <4 x i32> asm "; def $0", "=v"() 3624 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 2> 3625 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3626 ret void 3627} 3628 3629define void @v_shuffle_v3i32_v4i32__u_3_3(ptr addrspace(1) inreg %ptr) { 3630; GFX900-LABEL: v_shuffle_v3i32_v4i32__u_3_3: 3631; GFX900: ; %bb.0: 3632; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3633; GFX900-NEXT: ;;#ASMSTART 3634; GFX900-NEXT: ; def v[0:3] 3635; GFX900-NEXT: ;;#ASMEND 3636; GFX900-NEXT: v_mov_b32_e32 v4, 0 3637; GFX900-NEXT: v_mov_b32_e32 v2, v3 3638; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 3639; GFX900-NEXT: s_waitcnt vmcnt(0) 3640; GFX900-NEXT: s_setpc_b64 s[30:31] 3641; 3642; GFX90A-LABEL: v_shuffle_v3i32_v4i32__u_3_3: 3643; GFX90A: ; %bb.0: 3644; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3645; GFX90A-NEXT: ;;#ASMSTART 3646; GFX90A-NEXT: ; def v[0:3] 3647; GFX90A-NEXT: ;;#ASMEND 3648; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3649; GFX90A-NEXT: v_mov_b32_e32 v1, v3 3650; GFX90A-NEXT: v_mov_b32_e32 v2, v3 3651; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 3652; GFX90A-NEXT: s_waitcnt vmcnt(0) 3653; GFX90A-NEXT: s_setpc_b64 s[30:31] 3654; 3655; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_3_3: 3656; GFX940: ; %bb.0: 3657; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3658; GFX940-NEXT: ;;#ASMSTART 3659; GFX940-NEXT: ; def v[0:3] 3660; GFX940-NEXT: ;;#ASMEND 3661; GFX940-NEXT: v_mov_b32_e32 v4, 0 3662; GFX940-NEXT: v_mov_b32_e32 v1, v3 3663; GFX940-NEXT: v_mov_b32_e32 v2, v3 3664; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 3665; GFX940-NEXT: s_waitcnt vmcnt(0) 3666; GFX940-NEXT: s_setpc_b64 s[30:31] 3667 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3668 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 poison, i32 3, i32 3> 3669 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3670 ret void 3671} 3672 3673define void @v_shuffle_v3i32_v4i32__0_3_3(ptr addrspace(1) inreg %ptr) { 3674; GFX900-LABEL: v_shuffle_v3i32_v4i32__0_3_3: 3675; GFX900: ; %bb.0: 3676; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3677; GFX900-NEXT: ;;#ASMSTART 3678; GFX900-NEXT: ; def v[0:3] 3679; GFX900-NEXT: ;;#ASMEND 3680; GFX900-NEXT: v_mov_b32_e32 v4, 0 3681; GFX900-NEXT: v_mov_b32_e32 v1, v3 3682; GFX900-NEXT: v_mov_b32_e32 v2, v3 3683; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 3684; GFX900-NEXT: s_waitcnt vmcnt(0) 3685; GFX900-NEXT: s_setpc_b64 s[30:31] 3686; 3687; GFX90A-LABEL: v_shuffle_v3i32_v4i32__0_3_3: 3688; GFX90A: ; %bb.0: 3689; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3690; GFX90A-NEXT: ;;#ASMSTART 3691; GFX90A-NEXT: ; def v[0:3] 3692; GFX90A-NEXT: ;;#ASMEND 3693; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3694; GFX90A-NEXT: v_mov_b32_e32 v1, v3 3695; GFX90A-NEXT: v_mov_b32_e32 v2, v3 3696; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 3697; GFX90A-NEXT: s_waitcnt vmcnt(0) 3698; GFX90A-NEXT: s_setpc_b64 s[30:31] 3699; 3700; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_3_3: 3701; GFX940: ; %bb.0: 3702; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3703; GFX940-NEXT: ;;#ASMSTART 3704; GFX940-NEXT: ; def v[0:3] 3705; GFX940-NEXT: ;;#ASMEND 3706; GFX940-NEXT: v_mov_b32_e32 v4, 0 3707; GFX940-NEXT: v_mov_b32_e32 v1, v3 3708; GFX940-NEXT: v_mov_b32_e32 v2, v3 3709; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 3710; GFX940-NEXT: s_waitcnt vmcnt(0) 3711; GFX940-NEXT: s_setpc_b64 s[30:31] 3712 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3713 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 0, i32 3, i32 3> 3714 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3715 ret void 3716} 3717 3718define void @v_shuffle_v3i32_v4i32__1_3_3(ptr addrspace(1) inreg %ptr) { 3719; GFX900-LABEL: v_shuffle_v3i32_v4i32__1_3_3: 3720; GFX900: ; %bb.0: 3721; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3722; GFX900-NEXT: ;;#ASMSTART 3723; GFX900-NEXT: ; def v[0:3] 3724; GFX900-NEXT: ;;#ASMEND 3725; GFX900-NEXT: v_mov_b32_e32 v4, 0 3726; GFX900-NEXT: v_mov_b32_e32 v2, v3 3727; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 3728; GFX900-NEXT: s_waitcnt vmcnt(0) 3729; GFX900-NEXT: s_setpc_b64 s[30:31] 3730; 3731; GFX90A-LABEL: v_shuffle_v3i32_v4i32__1_3_3: 3732; GFX90A: ; %bb.0: 3733; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3734; GFX90A-NEXT: ;;#ASMSTART 3735; GFX90A-NEXT: ; def v[0:3] 3736; GFX90A-NEXT: ;;#ASMEND 3737; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3738; GFX90A-NEXT: v_mov_b32_e32 v0, v1 3739; GFX90A-NEXT: v_mov_b32_e32 v1, v3 3740; GFX90A-NEXT: v_mov_b32_e32 v2, v3 3741; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 3742; GFX90A-NEXT: s_waitcnt vmcnt(0) 3743; GFX90A-NEXT: s_setpc_b64 s[30:31] 3744; 3745; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_3_3: 3746; GFX940: ; %bb.0: 3747; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3748; GFX940-NEXT: ;;#ASMSTART 3749; GFX940-NEXT: ; def v[0:3] 3750; GFX940-NEXT: ;;#ASMEND 3751; GFX940-NEXT: v_mov_b32_e32 v4, 0 3752; GFX940-NEXT: v_mov_b32_e32 v0, v1 3753; GFX940-NEXT: v_mov_b32_e32 v1, v3 3754; GFX940-NEXT: v_mov_b32_e32 v2, v3 3755; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 3756; GFX940-NEXT: s_waitcnt vmcnt(0) 3757; GFX940-NEXT: s_setpc_b64 s[30:31] 3758 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3759 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 3, i32 3> 3760 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3761 ret void 3762} 3763 3764define void @v_shuffle_v3i32_v4i32__2_3_3(ptr addrspace(1) inreg %ptr) { 3765; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_3_3: 3766; GFX900: ; %bb.0: 3767; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3768; GFX900-NEXT: ;;#ASMSTART 3769; GFX900-NEXT: ; def v[0:3] 3770; GFX900-NEXT: ;;#ASMEND 3771; GFX900-NEXT: v_mov_b32_e32 v4, 0 3772; GFX900-NEXT: v_mov_b32_e32 v1, v2 3773; GFX900-NEXT: v_mov_b32_e32 v2, v3 3774; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 3775; GFX900-NEXT: s_waitcnt vmcnt(0) 3776; GFX900-NEXT: s_setpc_b64 s[30:31] 3777; 3778; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_3_3: 3779; GFX90A: ; %bb.0: 3780; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3781; GFX90A-NEXT: ;;#ASMSTART 3782; GFX90A-NEXT: ; def v[0:3] 3783; GFX90A-NEXT: ;;#ASMEND 3784; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3785; GFX90A-NEXT: v_mov_b32_e32 v0, v2 3786; GFX90A-NEXT: v_mov_b32_e32 v1, v3 3787; GFX90A-NEXT: v_mov_b32_e32 v2, v3 3788; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 3789; GFX90A-NEXT: s_waitcnt vmcnt(0) 3790; GFX90A-NEXT: s_setpc_b64 s[30:31] 3791; 3792; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_3_3: 3793; GFX940: ; %bb.0: 3794; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3795; GFX940-NEXT: ;;#ASMSTART 3796; GFX940-NEXT: ; def v[0:3] 3797; GFX940-NEXT: ;;#ASMEND 3798; GFX940-NEXT: v_mov_b32_e32 v4, 0 3799; GFX940-NEXT: v_mov_b32_e32 v0, v2 3800; GFX940-NEXT: v_mov_b32_e32 v1, v3 3801; GFX940-NEXT: v_mov_b32_e32 v2, v3 3802; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 3803; GFX940-NEXT: s_waitcnt vmcnt(0) 3804; GFX940-NEXT: s_setpc_b64 s[30:31] 3805 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3806 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 3, i32 3> 3807 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3808 ret void 3809} 3810 3811define void @v_shuffle_v3i32_v4i32__3_3_3(ptr addrspace(1) inreg %ptr) { 3812; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_3_3: 3813; GFX900: ; %bb.0: 3814; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3815; GFX900-NEXT: ;;#ASMSTART 3816; GFX900-NEXT: ; def v[0:3] 3817; GFX900-NEXT: ;;#ASMEND 3818; GFX900-NEXT: v_mov_b32_e32 v4, 0 3819; GFX900-NEXT: v_mov_b32_e32 v1, v3 3820; GFX900-NEXT: v_mov_b32_e32 v2, v3 3821; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 3822; GFX900-NEXT: s_waitcnt vmcnt(0) 3823; GFX900-NEXT: s_setpc_b64 s[30:31] 3824; 3825; GFX90A-LABEL: v_shuffle_v3i32_v4i32__3_3_3: 3826; GFX90A: ; %bb.0: 3827; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3828; GFX90A-NEXT: ;;#ASMSTART 3829; GFX90A-NEXT: ; def v[0:3] 3830; GFX90A-NEXT: ;;#ASMEND 3831; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3832; GFX90A-NEXT: v_mov_b32_e32 v0, v3 3833; GFX90A-NEXT: v_mov_b32_e32 v1, v3 3834; GFX90A-NEXT: v_mov_b32_e32 v2, v3 3835; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 3836; GFX90A-NEXT: s_waitcnt vmcnt(0) 3837; GFX90A-NEXT: s_setpc_b64 s[30:31] 3838; 3839; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_3_3: 3840; GFX940: ; %bb.0: 3841; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3842; GFX940-NEXT: ;;#ASMSTART 3843; GFX940-NEXT: ; def v[0:3] 3844; GFX940-NEXT: ;;#ASMEND 3845; GFX940-NEXT: v_mov_b32_e32 v4, 0 3846; GFX940-NEXT: v_mov_b32_e32 v0, v3 3847; GFX940-NEXT: v_mov_b32_e32 v1, v3 3848; GFX940-NEXT: v_mov_b32_e32 v2, v3 3849; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 3850; GFX940-NEXT: s_waitcnt vmcnt(0) 3851; GFX940-NEXT: s_setpc_b64 s[30:31] 3852 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3853 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 3, i32 3> 3854 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3855 ret void 3856} 3857 3858define void @v_shuffle_v3i32_v4i32__4_3_3(ptr addrspace(1) inreg %ptr) { 3859; GFX900-LABEL: v_shuffle_v3i32_v4i32__4_3_3: 3860; GFX900: ; %bb.0: 3861; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3862; GFX900-NEXT: ;;#ASMSTART 3863; GFX900-NEXT: ; def v[0:3] 3864; GFX900-NEXT: ;;#ASMEND 3865; GFX900-NEXT: v_mov_b32_e32 v4, 0 3866; GFX900-NEXT: v_mov_b32_e32 v2, v3 3867; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 3868; GFX900-NEXT: s_waitcnt vmcnt(0) 3869; GFX900-NEXT: s_setpc_b64 s[30:31] 3870; 3871; GFX90A-LABEL: v_shuffle_v3i32_v4i32__4_3_3: 3872; GFX90A: ; %bb.0: 3873; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3874; GFX90A-NEXT: ;;#ASMSTART 3875; GFX90A-NEXT: ; def v[0:3] 3876; GFX90A-NEXT: ;;#ASMEND 3877; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3878; GFX90A-NEXT: v_mov_b32_e32 v1, v3 3879; GFX90A-NEXT: v_mov_b32_e32 v2, v3 3880; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 3881; GFX90A-NEXT: s_waitcnt vmcnt(0) 3882; GFX90A-NEXT: s_setpc_b64 s[30:31] 3883; 3884; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_3_3: 3885; GFX940: ; %bb.0: 3886; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3887; GFX940-NEXT: ;;#ASMSTART 3888; GFX940-NEXT: ; def v[0:3] 3889; GFX940-NEXT: ;;#ASMEND 3890; GFX940-NEXT: v_mov_b32_e32 v4, 0 3891; GFX940-NEXT: v_mov_b32_e32 v1, v3 3892; GFX940-NEXT: v_mov_b32_e32 v2, v3 3893; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 3894; GFX940-NEXT: s_waitcnt vmcnt(0) 3895; GFX940-NEXT: s_setpc_b64 s[30:31] 3896 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3897 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 3, i32 3> 3898 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3899 ret void 3900} 3901 3902define void @v_shuffle_v3i32_v4i32__5_3_3(ptr addrspace(1) inreg %ptr) { 3903; GFX900-LABEL: v_shuffle_v3i32_v4i32__5_3_3: 3904; GFX900: ; %bb.0: 3905; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3906; GFX900-NEXT: ;;#ASMSTART 3907; GFX900-NEXT: ; def v[4:7] 3908; GFX900-NEXT: ;;#ASMEND 3909; GFX900-NEXT: v_mov_b32_e32 v8, 0 3910; GFX900-NEXT: ;;#ASMSTART 3911; GFX900-NEXT: ; def v[0:3] 3912; GFX900-NEXT: ;;#ASMEND 3913; GFX900-NEXT: v_mov_b32_e32 v6, v3 3914; GFX900-NEXT: v_mov_b32_e32 v7, v3 3915; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17] 3916; GFX900-NEXT: s_waitcnt vmcnt(0) 3917; GFX900-NEXT: s_setpc_b64 s[30:31] 3918; 3919; GFX90A-LABEL: v_shuffle_v3i32_v4i32__5_3_3: 3920; GFX90A: ; %bb.0: 3921; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3922; GFX90A-NEXT: ;;#ASMSTART 3923; GFX90A-NEXT: ; def v[0:3] 3924; GFX90A-NEXT: ;;#ASMEND 3925; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3926; GFX90A-NEXT: ;;#ASMSTART 3927; GFX90A-NEXT: ; def v[4:7] 3928; GFX90A-NEXT: ;;#ASMEND 3929; GFX90A-NEXT: v_mov_b32_e32 v0, v5 3930; GFX90A-NEXT: v_mov_b32_e32 v1, v3 3931; GFX90A-NEXT: v_mov_b32_e32 v2, v3 3932; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 3933; GFX90A-NEXT: s_waitcnt vmcnt(0) 3934; GFX90A-NEXT: s_setpc_b64 s[30:31] 3935; 3936; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_3_3: 3937; GFX940: ; %bb.0: 3938; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3939; GFX940-NEXT: ;;#ASMSTART 3940; GFX940-NEXT: ; def v[0:3] 3941; GFX940-NEXT: ;;#ASMEND 3942; GFX940-NEXT: v_mov_b32_e32 v8, 0 3943; GFX940-NEXT: ;;#ASMSTART 3944; GFX940-NEXT: ; def v[4:7] 3945; GFX940-NEXT: ;;#ASMEND 3946; GFX940-NEXT: v_mov_b32_e32 v1, v3 3947; GFX940-NEXT: v_mov_b32_e32 v0, v5 3948; GFX940-NEXT: v_mov_b32_e32 v2, v3 3949; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 3950; GFX940-NEXT: s_waitcnt vmcnt(0) 3951; GFX940-NEXT: s_setpc_b64 s[30:31] 3952 %vec0 = call <4 x i32> asm "; def $0", "=v"() 3953 %vec1 = call <4 x i32> asm "; def $0", "=v"() 3954 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 3, i32 3> 3955 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 3956 ret void 3957} 3958 3959define void @v_shuffle_v3i32_v4i32__6_3_3(ptr addrspace(1) inreg %ptr) { 3960; GFX900-LABEL: v_shuffle_v3i32_v4i32__6_3_3: 3961; GFX900: ; %bb.0: 3962; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3963; GFX900-NEXT: ;;#ASMSTART 3964; GFX900-NEXT: ; def v[0:3] 3965; GFX900-NEXT: ;;#ASMEND 3966; GFX900-NEXT: v_mov_b32_e32 v8, 0 3967; GFX900-NEXT: ;;#ASMSTART 3968; GFX900-NEXT: ; def v[4:7] 3969; GFX900-NEXT: ;;#ASMEND 3970; GFX900-NEXT: v_mov_b32_e32 v1, v6 3971; GFX900-NEXT: v_mov_b32_e32 v2, v3 3972; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17] 3973; GFX900-NEXT: s_waitcnt vmcnt(0) 3974; GFX900-NEXT: s_setpc_b64 s[30:31] 3975; 3976; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_3_3: 3977; GFX90A: ; %bb.0: 3978; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3979; GFX90A-NEXT: ;;#ASMSTART 3980; GFX90A-NEXT: ; def v[0:3] 3981; GFX90A-NEXT: ;;#ASMEND 3982; GFX90A-NEXT: v_mov_b32_e32 v8, 0 3983; GFX90A-NEXT: ;;#ASMSTART 3984; GFX90A-NEXT: ; def v[4:7] 3985; GFX90A-NEXT: ;;#ASMEND 3986; GFX90A-NEXT: v_mov_b32_e32 v0, v6 3987; GFX90A-NEXT: v_mov_b32_e32 v1, v3 3988; GFX90A-NEXT: v_mov_b32_e32 v2, v3 3989; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 3990; GFX90A-NEXT: s_waitcnt vmcnt(0) 3991; GFX90A-NEXT: s_setpc_b64 s[30:31] 3992; 3993; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_3_3: 3994; GFX940: ; %bb.0: 3995; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3996; GFX940-NEXT: ;;#ASMSTART 3997; GFX940-NEXT: ; def v[0:3] 3998; GFX940-NEXT: ;;#ASMEND 3999; GFX940-NEXT: v_mov_b32_e32 v8, 0 4000; GFX940-NEXT: ;;#ASMSTART 4001; GFX940-NEXT: ; def v[4:7] 4002; GFX940-NEXT: ;;#ASMEND 4003; GFX940-NEXT: v_mov_b32_e32 v1, v3 4004; GFX940-NEXT: v_mov_b32_e32 v0, v6 4005; GFX940-NEXT: v_mov_b32_e32 v2, v3 4006; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 4007; GFX940-NEXT: s_waitcnt vmcnt(0) 4008; GFX940-NEXT: s_setpc_b64 s[30:31] 4009 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4010 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4011 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 3, i32 3> 4012 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4013 ret void 4014} 4015 4016define void @v_shuffle_v3i32_v4i32__7_3_3(ptr addrspace(1) inreg %ptr) { 4017; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_3: 4018; GFX900: ; %bb.0: 4019; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4020; GFX900-NEXT: ;;#ASMSTART 4021; GFX900-NEXT: ; def v[0:3] 4022; GFX900-NEXT: ;;#ASMEND 4023; GFX900-NEXT: v_mov_b32_e32 v8, 0 4024; GFX900-NEXT: ;;#ASMSTART 4025; GFX900-NEXT: ; def v[4:7] 4026; GFX900-NEXT: ;;#ASMEND 4027; GFX900-NEXT: v_mov_b32_e32 v1, v7 4028; GFX900-NEXT: v_mov_b32_e32 v2, v3 4029; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17] 4030; GFX900-NEXT: s_waitcnt vmcnt(0) 4031; GFX900-NEXT: s_setpc_b64 s[30:31] 4032; 4033; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_3_3: 4034; GFX90A: ; %bb.0: 4035; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4036; GFX90A-NEXT: ;;#ASMSTART 4037; GFX90A-NEXT: ; def v[0:3] 4038; GFX90A-NEXT: ;;#ASMEND 4039; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4040; GFX90A-NEXT: ;;#ASMSTART 4041; GFX90A-NEXT: ; def v[4:7] 4042; GFX90A-NEXT: ;;#ASMEND 4043; GFX90A-NEXT: v_mov_b32_e32 v0, v7 4044; GFX90A-NEXT: v_mov_b32_e32 v1, v3 4045; GFX90A-NEXT: v_mov_b32_e32 v2, v3 4046; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 4047; GFX90A-NEXT: s_waitcnt vmcnt(0) 4048; GFX90A-NEXT: s_setpc_b64 s[30:31] 4049; 4050; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_3: 4051; GFX940: ; %bb.0: 4052; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4053; GFX940-NEXT: ;;#ASMSTART 4054; GFX940-NEXT: ; def v[0:3] 4055; GFX940-NEXT: ;;#ASMEND 4056; GFX940-NEXT: v_mov_b32_e32 v8, 0 4057; GFX940-NEXT: ;;#ASMSTART 4058; GFX940-NEXT: ; def v[4:7] 4059; GFX940-NEXT: ;;#ASMEND 4060; GFX940-NEXT: v_mov_b32_e32 v1, v3 4061; GFX940-NEXT: v_mov_b32_e32 v0, v7 4062; GFX940-NEXT: v_mov_b32_e32 v2, v3 4063; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 4064; GFX940-NEXT: s_waitcnt vmcnt(0) 4065; GFX940-NEXT: s_setpc_b64 s[30:31] 4066 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4067 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4068 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 3> 4069 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4070 ret void 4071} 4072 4073define void @v_shuffle_v3i32_v4i32__7_u_3(ptr addrspace(1) inreg %ptr) { 4074; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_3: 4075; GFX900: ; %bb.0: 4076; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4077; GFX900-NEXT: ;;#ASMSTART 4078; GFX900-NEXT: ; def v[0:3] 4079; GFX900-NEXT: ;;#ASMEND 4080; GFX900-NEXT: v_mov_b32_e32 v8, 0 4081; GFX900-NEXT: ;;#ASMSTART 4082; GFX900-NEXT: ; def v[4:7] 4083; GFX900-NEXT: ;;#ASMEND 4084; GFX900-NEXT: v_mov_b32_e32 v1, v7 4085; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17] 4086; GFX900-NEXT: s_waitcnt vmcnt(0) 4087; GFX900-NEXT: s_setpc_b64 s[30:31] 4088; 4089; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_3: 4090; GFX90A: ; %bb.0: 4091; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4092; GFX90A-NEXT: ;;#ASMSTART 4093; GFX90A-NEXT: ; def v[0:3] 4094; GFX90A-NEXT: ;;#ASMEND 4095; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4096; GFX90A-NEXT: ;;#ASMSTART 4097; GFX90A-NEXT: ; def v[4:7] 4098; GFX90A-NEXT: ;;#ASMEND 4099; GFX90A-NEXT: v_mov_b32_e32 v0, v7 4100; GFX90A-NEXT: v_mov_b32_e32 v2, v3 4101; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 4102; GFX90A-NEXT: s_waitcnt vmcnt(0) 4103; GFX90A-NEXT: s_setpc_b64 s[30:31] 4104; 4105; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_3: 4106; GFX940: ; %bb.0: 4107; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4108; GFX940-NEXT: ;;#ASMSTART 4109; GFX940-NEXT: ; def v[0:3] 4110; GFX940-NEXT: ;;#ASMEND 4111; GFX940-NEXT: v_mov_b32_e32 v8, 0 4112; GFX940-NEXT: ;;#ASMSTART 4113; GFX940-NEXT: ; def v[4:7] 4114; GFX940-NEXT: ;;#ASMEND 4115; GFX940-NEXT: v_mov_b32_e32 v2, v3 4116; GFX940-NEXT: v_mov_b32_e32 v0, v7 4117; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 4118; GFX940-NEXT: s_waitcnt vmcnt(0) 4119; GFX940-NEXT: s_setpc_b64 s[30:31] 4120 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4121 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4122 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 3> 4123 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4124 ret void 4125} 4126 4127define void @v_shuffle_v3i32_v4i32__7_0_3(ptr addrspace(1) inreg %ptr) { 4128; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_0_3: 4129; GFX900: ; %bb.0: 4130; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4131; GFX900-NEXT: ;;#ASMSTART 4132; GFX900-NEXT: ; def v[0:3] 4133; GFX900-NEXT: ;;#ASMEND 4134; GFX900-NEXT: v_mov_b32_e32 v8, 0 4135; GFX900-NEXT: ;;#ASMSTART 4136; GFX900-NEXT: ; def v[4:7] 4137; GFX900-NEXT: ;;#ASMEND 4138; GFX900-NEXT: v_mov_b32_e32 v1, v7 4139; GFX900-NEXT: v_mov_b32_e32 v2, v0 4140; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17] 4141; GFX900-NEXT: s_waitcnt vmcnt(0) 4142; GFX900-NEXT: s_setpc_b64 s[30:31] 4143; 4144; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_0_3: 4145; GFX90A: ; %bb.0: 4146; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4147; GFX90A-NEXT: ;;#ASMSTART 4148; GFX90A-NEXT: ; def v[4:7] 4149; GFX90A-NEXT: ;;#ASMEND 4150; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4151; GFX90A-NEXT: ;;#ASMSTART 4152; GFX90A-NEXT: ; def v[0:3] 4153; GFX90A-NEXT: ;;#ASMEND 4154; GFX90A-NEXT: v_mov_b32_e32 v4, v7 4155; GFX90A-NEXT: v_mov_b32_e32 v5, v0 4156; GFX90A-NEXT: v_mov_b32_e32 v6, v3 4157; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 4158; GFX90A-NEXT: s_waitcnt vmcnt(0) 4159; GFX90A-NEXT: s_setpc_b64 s[30:31] 4160; 4161; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_3: 4162; GFX940: ; %bb.0: 4163; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4164; GFX940-NEXT: ;;#ASMSTART 4165; GFX940-NEXT: ; def v[4:7] 4166; GFX940-NEXT: ;;#ASMEND 4167; GFX940-NEXT: v_mov_b32_e32 v8, 0 4168; GFX940-NEXT: ;;#ASMSTART 4169; GFX940-NEXT: ; def v[0:3] 4170; GFX940-NEXT: ;;#ASMEND 4171; GFX940-NEXT: v_mov_b32_e32 v4, v7 4172; GFX940-NEXT: v_mov_b32_e32 v5, v0 4173; GFX940-NEXT: v_mov_b32_e32 v6, v3 4174; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 4175; GFX940-NEXT: s_waitcnt vmcnt(0) 4176; GFX940-NEXT: s_setpc_b64 s[30:31] 4177 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4178 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4179 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 3> 4180 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4181 ret void 4182} 4183 4184define void @v_shuffle_v3i32_v4i32__7_1_3(ptr addrspace(1) inreg %ptr) { 4185; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_3: 4186; GFX900: ; %bb.0: 4187; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4188; GFX900-NEXT: ;;#ASMSTART 4189; GFX900-NEXT: ; def v[0:3] 4190; GFX900-NEXT: ;;#ASMEND 4191; GFX900-NEXT: v_mov_b32_e32 v8, 0 4192; GFX900-NEXT: ;;#ASMSTART 4193; GFX900-NEXT: ; def v[4:7] 4194; GFX900-NEXT: ;;#ASMEND 4195; GFX900-NEXT: v_mov_b32_e32 v0, v7 4196; GFX900-NEXT: v_mov_b32_e32 v2, v3 4197; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 4198; GFX900-NEXT: s_waitcnt vmcnt(0) 4199; GFX900-NEXT: s_setpc_b64 s[30:31] 4200; 4201; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_1_3: 4202; GFX90A: ; %bb.0: 4203; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4204; GFX90A-NEXT: ;;#ASMSTART 4205; GFX90A-NEXT: ; def v[0:3] 4206; GFX90A-NEXT: ;;#ASMEND 4207; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4208; GFX90A-NEXT: ;;#ASMSTART 4209; GFX90A-NEXT: ; def v[4:7] 4210; GFX90A-NEXT: ;;#ASMEND 4211; GFX90A-NEXT: v_mov_b32_e32 v0, v7 4212; GFX90A-NEXT: v_mov_b32_e32 v2, v3 4213; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 4214; GFX90A-NEXT: s_waitcnt vmcnt(0) 4215; GFX90A-NEXT: s_setpc_b64 s[30:31] 4216; 4217; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_3: 4218; GFX940: ; %bb.0: 4219; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4220; GFX940-NEXT: ;;#ASMSTART 4221; GFX940-NEXT: ; def v[0:3] 4222; GFX940-NEXT: ;;#ASMEND 4223; GFX940-NEXT: v_mov_b32_e32 v8, 0 4224; GFX940-NEXT: ;;#ASMSTART 4225; GFX940-NEXT: ; def v[4:7] 4226; GFX940-NEXT: ;;#ASMEND 4227; GFX940-NEXT: v_mov_b32_e32 v2, v3 4228; GFX940-NEXT: v_mov_b32_e32 v0, v7 4229; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 4230; GFX940-NEXT: s_waitcnt vmcnt(0) 4231; GFX940-NEXT: s_setpc_b64 s[30:31] 4232 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4233 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4234 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 3> 4235 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4236 ret void 4237} 4238 4239define void @v_shuffle_v3i32_v4i32__7_2_3(ptr addrspace(1) inreg %ptr) { 4240; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_3: 4241; GFX900: ; %bb.0: 4242; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4243; GFX900-NEXT: ;;#ASMSTART 4244; GFX900-NEXT: ; def v[0:3] 4245; GFX900-NEXT: ;;#ASMEND 4246; GFX900-NEXT: v_mov_b32_e32 v8, 0 4247; GFX900-NEXT: ;;#ASMSTART 4248; GFX900-NEXT: ; def v[4:7] 4249; GFX900-NEXT: ;;#ASMEND 4250; GFX900-NEXT: v_mov_b32_e32 v1, v7 4251; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17] 4252; GFX900-NEXT: s_waitcnt vmcnt(0) 4253; GFX900-NEXT: s_setpc_b64 s[30:31] 4254; 4255; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_2_3: 4256; GFX90A: ; %bb.0: 4257; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4258; GFX90A-NEXT: ;;#ASMSTART 4259; GFX90A-NEXT: ; def v[0:3] 4260; GFX90A-NEXT: ;;#ASMEND 4261; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4262; GFX90A-NEXT: ;;#ASMSTART 4263; GFX90A-NEXT: ; def v[4:7] 4264; GFX90A-NEXT: ;;#ASMEND 4265; GFX90A-NEXT: v_mov_b32_e32 v0, v7 4266; GFX90A-NEXT: v_mov_b32_e32 v1, v2 4267; GFX90A-NEXT: v_mov_b32_e32 v2, v3 4268; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 4269; GFX90A-NEXT: s_waitcnt vmcnt(0) 4270; GFX90A-NEXT: s_setpc_b64 s[30:31] 4271; 4272; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_3: 4273; GFX940: ; %bb.0: 4274; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4275; GFX940-NEXT: ;;#ASMSTART 4276; GFX940-NEXT: ; def v[0:3] 4277; GFX940-NEXT: ;;#ASMEND 4278; GFX940-NEXT: v_mov_b32_e32 v8, 0 4279; GFX940-NEXT: ;;#ASMSTART 4280; GFX940-NEXT: ; def v[4:7] 4281; GFX940-NEXT: ;;#ASMEND 4282; GFX940-NEXT: v_mov_b32_e32 v1, v2 4283; GFX940-NEXT: v_mov_b32_e32 v0, v7 4284; GFX940-NEXT: v_mov_b32_e32 v2, v3 4285; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 4286; GFX940-NEXT: s_waitcnt vmcnt(0) 4287; GFX940-NEXT: s_setpc_b64 s[30:31] 4288 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4289 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4290 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 3> 4291 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4292 ret void 4293} 4294 4295define void @v_shuffle_v3i32_v4i32__7_4_3(ptr addrspace(1) inreg %ptr) { 4296; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_3: 4297; GFX900: ; %bb.0: 4298; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4299; GFX900-NEXT: ;;#ASMSTART 4300; GFX900-NEXT: ; def v[0:3] 4301; GFX900-NEXT: ;;#ASMEND 4302; GFX900-NEXT: v_mov_b32_e32 v8, 0 4303; GFX900-NEXT: ;;#ASMSTART 4304; GFX900-NEXT: ; def v[4:7] 4305; GFX900-NEXT: ;;#ASMEND 4306; GFX900-NEXT: v_mov_b32_e32 v1, v7 4307; GFX900-NEXT: v_mov_b32_e32 v2, v4 4308; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17] 4309; GFX900-NEXT: s_waitcnt vmcnt(0) 4310; GFX900-NEXT: s_setpc_b64 s[30:31] 4311; 4312; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_4_3: 4313; GFX90A: ; %bb.0: 4314; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4315; GFX90A-NEXT: ;;#ASMSTART 4316; GFX90A-NEXT: ; def v[0:3] 4317; GFX90A-NEXT: ;;#ASMEND 4318; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4319; GFX90A-NEXT: ;;#ASMSTART 4320; GFX90A-NEXT: ; def v[4:7] 4321; GFX90A-NEXT: ;;#ASMEND 4322; GFX90A-NEXT: v_mov_b32_e32 v0, v7 4323; GFX90A-NEXT: v_mov_b32_e32 v1, v4 4324; GFX90A-NEXT: v_mov_b32_e32 v2, v3 4325; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 4326; GFX90A-NEXT: s_waitcnt vmcnt(0) 4327; GFX90A-NEXT: s_setpc_b64 s[30:31] 4328; 4329; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_3: 4330; GFX940: ; %bb.0: 4331; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4332; GFX940-NEXT: ;;#ASMSTART 4333; GFX940-NEXT: ; def v[0:3] 4334; GFX940-NEXT: ;;#ASMEND 4335; GFX940-NEXT: v_mov_b32_e32 v8, 0 4336; GFX940-NEXT: ;;#ASMSTART 4337; GFX940-NEXT: ; def v[4:7] 4338; GFX940-NEXT: ;;#ASMEND 4339; GFX940-NEXT: v_mov_b32_e32 v2, v3 4340; GFX940-NEXT: v_mov_b32_e32 v0, v7 4341; GFX940-NEXT: v_mov_b32_e32 v1, v4 4342; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 4343; GFX940-NEXT: s_waitcnt vmcnt(0) 4344; GFX940-NEXT: s_setpc_b64 s[30:31] 4345 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4346 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4347 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 3> 4348 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4349 ret void 4350} 4351 4352define void @v_shuffle_v3i32_v4i32__7_5_3(ptr addrspace(1) inreg %ptr) { 4353; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_3: 4354; GFX900: ; %bb.0: 4355; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4356; GFX900-NEXT: ;;#ASMSTART 4357; GFX900-NEXT: ; def v[4:7] 4358; GFX900-NEXT: ;;#ASMEND 4359; GFX900-NEXT: v_mov_b32_e32 v8, 0 4360; GFX900-NEXT: ;;#ASMSTART 4361; GFX900-NEXT: ; def v[0:3] 4362; GFX900-NEXT: ;;#ASMEND 4363; GFX900-NEXT: v_mov_b32_e32 v4, v7 4364; GFX900-NEXT: v_mov_b32_e32 v6, v3 4365; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 4366; GFX900-NEXT: s_waitcnt vmcnt(0) 4367; GFX900-NEXT: s_setpc_b64 s[30:31] 4368; 4369; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_5_3: 4370; GFX90A: ; %bb.0: 4371; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4372; GFX90A-NEXT: ;;#ASMSTART 4373; GFX90A-NEXT: ; def v[4:7] 4374; GFX90A-NEXT: ;;#ASMEND 4375; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4376; GFX90A-NEXT: ;;#ASMSTART 4377; GFX90A-NEXT: ; def v[0:3] 4378; GFX90A-NEXT: ;;#ASMEND 4379; GFX90A-NEXT: v_mov_b32_e32 v4, v7 4380; GFX90A-NEXT: v_mov_b32_e32 v6, v3 4381; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 4382; GFX90A-NEXT: s_waitcnt vmcnt(0) 4383; GFX90A-NEXT: s_setpc_b64 s[30:31] 4384; 4385; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_3: 4386; GFX940: ; %bb.0: 4387; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4388; GFX940-NEXT: ;;#ASMSTART 4389; GFX940-NEXT: ; def v[4:7] 4390; GFX940-NEXT: ;;#ASMEND 4391; GFX940-NEXT: v_mov_b32_e32 v8, 0 4392; GFX940-NEXT: ;;#ASMSTART 4393; GFX940-NEXT: ; def v[0:3] 4394; GFX940-NEXT: ;;#ASMEND 4395; GFX940-NEXT: v_mov_b32_e32 v4, v7 4396; GFX940-NEXT: v_mov_b32_e32 v6, v3 4397; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 4398; GFX940-NEXT: s_waitcnt vmcnt(0) 4399; GFX940-NEXT: s_setpc_b64 s[30:31] 4400 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4401 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4402 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 3> 4403 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4404 ret void 4405} 4406 4407define void @v_shuffle_v3i32_v4i32__7_6_3(ptr addrspace(1) inreg %ptr) { 4408; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_3: 4409; GFX900: ; %bb.0: 4410; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4411; GFX900-NEXT: ;;#ASMSTART 4412; GFX900-NEXT: ; def v[4:7] 4413; GFX900-NEXT: ;;#ASMEND 4414; GFX900-NEXT: v_mov_b32_e32 v8, 0 4415; GFX900-NEXT: ;;#ASMSTART 4416; GFX900-NEXT: ; def v[0:3] 4417; GFX900-NEXT: ;;#ASMEND 4418; GFX900-NEXT: v_mov_b32_e32 v5, v7 4419; GFX900-NEXT: v_mov_b32_e32 v7, v3 4420; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17] 4421; GFX900-NEXT: s_waitcnt vmcnt(0) 4422; GFX900-NEXT: s_setpc_b64 s[30:31] 4423; 4424; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_6_3: 4425; GFX90A: ; %bb.0: 4426; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4427; GFX90A-NEXT: ;;#ASMSTART 4428; GFX90A-NEXT: ; def v[0:3] 4429; GFX90A-NEXT: ;;#ASMEND 4430; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4431; GFX90A-NEXT: ;;#ASMSTART 4432; GFX90A-NEXT: ; def v[4:7] 4433; GFX90A-NEXT: ;;#ASMEND 4434; GFX90A-NEXT: v_mov_b32_e32 v0, v7 4435; GFX90A-NEXT: v_mov_b32_e32 v1, v6 4436; GFX90A-NEXT: v_mov_b32_e32 v2, v3 4437; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 4438; GFX90A-NEXT: s_waitcnt vmcnt(0) 4439; GFX90A-NEXT: s_setpc_b64 s[30:31] 4440; 4441; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_3: 4442; GFX940: ; %bb.0: 4443; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4444; GFX940-NEXT: ;;#ASMSTART 4445; GFX940-NEXT: ; def v[0:3] 4446; GFX940-NEXT: ;;#ASMEND 4447; GFX940-NEXT: v_mov_b32_e32 v8, 0 4448; GFX940-NEXT: ;;#ASMSTART 4449; GFX940-NEXT: ; def v[4:7] 4450; GFX940-NEXT: ;;#ASMEND 4451; GFX940-NEXT: v_mov_b32_e32 v2, v3 4452; GFX940-NEXT: v_mov_b32_e32 v0, v7 4453; GFX940-NEXT: v_mov_b32_e32 v1, v6 4454; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 4455; GFX940-NEXT: s_waitcnt vmcnt(0) 4456; GFX940-NEXT: s_setpc_b64 s[30:31] 4457 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4458 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4459 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 3> 4460 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4461 ret void 4462} 4463 4464define void @v_shuffle_v3i32_v4i32__u_4_4(ptr addrspace(1) inreg %ptr) { 4465; GFX9-LABEL: v_shuffle_v3i32_v4i32__u_4_4: 4466; GFX9: ; %bb.0: 4467; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4468; GFX9-NEXT: s_setpc_b64 s[30:31] 4469 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4470 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 poison, i32 4, i32 4> 4471 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4472 ret void 4473} 4474 4475define void @v_shuffle_v3i32_v4i32__0_4_4(ptr addrspace(1) inreg %ptr) { 4476; GFX900-LABEL: v_shuffle_v3i32_v4i32__0_4_4: 4477; GFX900: ; %bb.0: 4478; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4479; GFX900-NEXT: v_mov_b32_e32 v4, 0 4480; GFX900-NEXT: ;;#ASMSTART 4481; GFX900-NEXT: ; def v[0:3] 4482; GFX900-NEXT: ;;#ASMEND 4483; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 4484; GFX900-NEXT: s_waitcnt vmcnt(0) 4485; GFX900-NEXT: s_setpc_b64 s[30:31] 4486; 4487; GFX90A-LABEL: v_shuffle_v3i32_v4i32__0_4_4: 4488; GFX90A: ; %bb.0: 4489; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4490; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4491; GFX90A-NEXT: ;;#ASMSTART 4492; GFX90A-NEXT: ; def v[0:3] 4493; GFX90A-NEXT: ;;#ASMEND 4494; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 4495; GFX90A-NEXT: s_waitcnt vmcnt(0) 4496; GFX90A-NEXT: s_setpc_b64 s[30:31] 4497; 4498; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_4_4: 4499; GFX940: ; %bb.0: 4500; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4501; GFX940-NEXT: v_mov_b32_e32 v4, 0 4502; GFX940-NEXT: ;;#ASMSTART 4503; GFX940-NEXT: ; def v[0:3] 4504; GFX940-NEXT: ;;#ASMEND 4505; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 4506; GFX940-NEXT: s_waitcnt vmcnt(0) 4507; GFX940-NEXT: s_setpc_b64 s[30:31] 4508 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4509 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 0, i32 4, i32 4> 4510 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4511 ret void 4512} 4513 4514define void @v_shuffle_v3i32_v4i32__1_4_4(ptr addrspace(1) inreg %ptr) { 4515; GFX900-LABEL: v_shuffle_v3i32_v4i32__1_4_4: 4516; GFX900: ; %bb.0: 4517; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4518; GFX900-NEXT: v_mov_b32_e32 v4, 0 4519; GFX900-NEXT: ;;#ASMSTART 4520; GFX900-NEXT: ; def v[0:3] 4521; GFX900-NEXT: ;;#ASMEND 4522; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 4523; GFX900-NEXT: s_waitcnt vmcnt(0) 4524; GFX900-NEXT: s_setpc_b64 s[30:31] 4525; 4526; GFX90A-LABEL: v_shuffle_v3i32_v4i32__1_4_4: 4527; GFX90A: ; %bb.0: 4528; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4529; GFX90A-NEXT: ;;#ASMSTART 4530; GFX90A-NEXT: ; def v[0:3] 4531; GFX90A-NEXT: ;;#ASMEND 4532; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4533; GFX90A-NEXT: v_mov_b32_e32 v0, v1 4534; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 4535; GFX90A-NEXT: s_waitcnt vmcnt(0) 4536; GFX90A-NEXT: s_setpc_b64 s[30:31] 4537; 4538; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_4_4: 4539; GFX940: ; %bb.0: 4540; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4541; GFX940-NEXT: ;;#ASMSTART 4542; GFX940-NEXT: ; def v[0:3] 4543; GFX940-NEXT: ;;#ASMEND 4544; GFX940-NEXT: v_mov_b32_e32 v4, 0 4545; GFX940-NEXT: v_mov_b32_e32 v0, v1 4546; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 4547; GFX940-NEXT: s_waitcnt vmcnt(0) 4548; GFX940-NEXT: s_setpc_b64 s[30:31] 4549 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4550 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 4, i32 4> 4551 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4552 ret void 4553} 4554 4555define void @v_shuffle_v3i32_v4i32__2_4_4(ptr addrspace(1) inreg %ptr) { 4556; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_4_4: 4557; GFX900: ; %bb.0: 4558; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4559; GFX900-NEXT: ;;#ASMSTART 4560; GFX900-NEXT: ; def v[0:3] 4561; GFX900-NEXT: ;;#ASMEND 4562; GFX900-NEXT: v_mov_b32_e32 v4, 0 4563; GFX900-NEXT: v_mov_b32_e32 v0, v2 4564; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 4565; GFX900-NEXT: s_waitcnt vmcnt(0) 4566; GFX900-NEXT: s_setpc_b64 s[30:31] 4567; 4568; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_4_4: 4569; GFX90A: ; %bb.0: 4570; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4571; GFX90A-NEXT: ;;#ASMSTART 4572; GFX90A-NEXT: ; def v[0:3] 4573; GFX90A-NEXT: ;;#ASMEND 4574; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4575; GFX90A-NEXT: v_mov_b32_e32 v0, v2 4576; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 4577; GFX90A-NEXT: s_waitcnt vmcnt(0) 4578; GFX90A-NEXT: s_setpc_b64 s[30:31] 4579; 4580; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_4_4: 4581; GFX940: ; %bb.0: 4582; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4583; GFX940-NEXT: ;;#ASMSTART 4584; GFX940-NEXT: ; def v[0:3] 4585; GFX940-NEXT: ;;#ASMEND 4586; GFX940-NEXT: v_mov_b32_e32 v4, 0 4587; GFX940-NEXT: v_mov_b32_e32 v0, v2 4588; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 4589; GFX940-NEXT: s_waitcnt vmcnt(0) 4590; GFX940-NEXT: s_setpc_b64 s[30:31] 4591 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4592 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 4, i32 4> 4593 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4594 ret void 4595} 4596 4597define void @v_shuffle_v3i32_v4i32__3_4_4(ptr addrspace(1) inreg %ptr) { 4598; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_4_4: 4599; GFX900: ; %bb.0: 4600; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4601; GFX900-NEXT: ;;#ASMSTART 4602; GFX900-NEXT: ; def v[0:3] 4603; GFX900-NEXT: ;;#ASMEND 4604; GFX900-NEXT: v_mov_b32_e32 v4, 0 4605; GFX900-NEXT: v_mov_b32_e32 v0, v3 4606; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 4607; GFX900-NEXT: s_waitcnt vmcnt(0) 4608; GFX900-NEXT: s_setpc_b64 s[30:31] 4609; 4610; GFX90A-LABEL: v_shuffle_v3i32_v4i32__3_4_4: 4611; GFX90A: ; %bb.0: 4612; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4613; GFX90A-NEXT: ;;#ASMSTART 4614; GFX90A-NEXT: ; def v[0:3] 4615; GFX90A-NEXT: ;;#ASMEND 4616; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4617; GFX90A-NEXT: v_mov_b32_e32 v0, v3 4618; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 4619; GFX90A-NEXT: s_waitcnt vmcnt(0) 4620; GFX90A-NEXT: s_setpc_b64 s[30:31] 4621; 4622; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_4_4: 4623; GFX940: ; %bb.0: 4624; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4625; GFX940-NEXT: ;;#ASMSTART 4626; GFX940-NEXT: ; def v[0:3] 4627; GFX940-NEXT: ;;#ASMEND 4628; GFX940-NEXT: v_mov_b32_e32 v4, 0 4629; GFX940-NEXT: v_mov_b32_e32 v0, v3 4630; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 4631; GFX940-NEXT: s_waitcnt vmcnt(0) 4632; GFX940-NEXT: s_setpc_b64 s[30:31] 4633 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4634 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 4, i32 4> 4635 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4636 ret void 4637} 4638 4639define void @v_shuffle_v3i32_v4i32__4_4_4(ptr addrspace(1) inreg %ptr) { 4640; GFX9-LABEL: v_shuffle_v3i32_v4i32__4_4_4: 4641; GFX9: ; %bb.0: 4642; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4643; GFX9-NEXT: s_setpc_b64 s[30:31] 4644 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4645 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 4, i32 4> 4646 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4647 ret void 4648} 4649 4650define void @v_shuffle_v3i32_v4i32__5_4_4(ptr addrspace(1) inreg %ptr) { 4651; GFX900-LABEL: v_shuffle_v3i32_v4i32__5_4_4: 4652; GFX900: ; %bb.0: 4653; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4654; GFX900-NEXT: ;;#ASMSTART 4655; GFX900-NEXT: ; def v[0:3] 4656; GFX900-NEXT: ;;#ASMEND 4657; GFX900-NEXT: v_mov_b32_e32 v4, 0 4658; GFX900-NEXT: v_mov_b32_e32 v2, v0 4659; GFX900-NEXT: v_mov_b32_e32 v3, v0 4660; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 4661; GFX900-NEXT: s_waitcnt vmcnt(0) 4662; GFX900-NEXT: s_setpc_b64 s[30:31] 4663; 4664; GFX90A-LABEL: v_shuffle_v3i32_v4i32__5_4_4: 4665; GFX90A: ; %bb.0: 4666; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4667; GFX90A-NEXT: ;;#ASMSTART 4668; GFX90A-NEXT: ; def v[0:3] 4669; GFX90A-NEXT: ;;#ASMEND 4670; GFX90A-NEXT: v_mov_b32_e32 v5, 0 4671; GFX90A-NEXT: v_mov_b32_e32 v2, v1 4672; GFX90A-NEXT: v_mov_b32_e32 v3, v0 4673; GFX90A-NEXT: v_mov_b32_e32 v4, v0 4674; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 4675; GFX90A-NEXT: s_waitcnt vmcnt(0) 4676; GFX90A-NEXT: s_setpc_b64 s[30:31] 4677; 4678; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_4_4: 4679; GFX940: ; %bb.0: 4680; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4681; GFX940-NEXT: ;;#ASMSTART 4682; GFX940-NEXT: ; def v[0:3] 4683; GFX940-NEXT: ;;#ASMEND 4684; GFX940-NEXT: v_mov_b32_e32 v5, 0 4685; GFX940-NEXT: v_mov_b32_e32 v2, v1 4686; GFX940-NEXT: v_mov_b32_e32 v3, v0 4687; GFX940-NEXT: v_mov_b32_e32 v4, v0 4688; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 4689; GFX940-NEXT: s_waitcnt vmcnt(0) 4690; GFX940-NEXT: s_setpc_b64 s[30:31] 4691 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4692 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4693 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 4, i32 4> 4694 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4695 ret void 4696} 4697 4698define void @v_shuffle_v3i32_v4i32__6_4_4(ptr addrspace(1) inreg %ptr) { 4699; GFX900-LABEL: v_shuffle_v3i32_v4i32__6_4_4: 4700; GFX900: ; %bb.0: 4701; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4702; GFX900-NEXT: ;;#ASMSTART 4703; GFX900-NEXT: ; def v[0:3] 4704; GFX900-NEXT: ;;#ASMEND 4705; GFX900-NEXT: v_mov_b32_e32 v4, 0 4706; GFX900-NEXT: v_mov_b32_e32 v1, v2 4707; GFX900-NEXT: v_mov_b32_e32 v2, v0 4708; GFX900-NEXT: v_mov_b32_e32 v3, v0 4709; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 4710; GFX900-NEXT: s_waitcnt vmcnt(0) 4711; GFX900-NEXT: s_setpc_b64 s[30:31] 4712; 4713; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_4_4: 4714; GFX90A: ; %bb.0: 4715; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4716; GFX90A-NEXT: ;;#ASMSTART 4717; GFX90A-NEXT: ; def v[0:3] 4718; GFX90A-NEXT: ;;#ASMEND 4719; GFX90A-NEXT: v_mov_b32_e32 v5, 0 4720; GFX90A-NEXT: v_mov_b32_e32 v3, v0 4721; GFX90A-NEXT: v_mov_b32_e32 v4, v0 4722; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 4723; GFX90A-NEXT: s_waitcnt vmcnt(0) 4724; GFX90A-NEXT: s_setpc_b64 s[30:31] 4725; 4726; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_4_4: 4727; GFX940: ; %bb.0: 4728; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4729; GFX940-NEXT: ;;#ASMSTART 4730; GFX940-NEXT: ; def v[0:3] 4731; GFX940-NEXT: ;;#ASMEND 4732; GFX940-NEXT: v_mov_b32_e32 v5, 0 4733; GFX940-NEXT: v_mov_b32_e32 v3, v0 4734; GFX940-NEXT: v_mov_b32_e32 v4, v0 4735; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 4736; GFX940-NEXT: s_waitcnt vmcnt(0) 4737; GFX940-NEXT: s_setpc_b64 s[30:31] 4738 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4739 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4740 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 4, i32 4> 4741 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4742 ret void 4743} 4744 4745define void @v_shuffle_v3i32_v4i32__7_4_4(ptr addrspace(1) inreg %ptr) { 4746; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_4: 4747; GFX900: ; %bb.0: 4748; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4749; GFX900-NEXT: ;;#ASMSTART 4750; GFX900-NEXT: ; def v[0:3] 4751; GFX900-NEXT: ;;#ASMEND 4752; GFX900-NEXT: v_mov_b32_e32 v4, 0 4753; GFX900-NEXT: v_mov_b32_e32 v1, v3 4754; GFX900-NEXT: v_mov_b32_e32 v2, v0 4755; GFX900-NEXT: v_mov_b32_e32 v3, v0 4756; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 4757; GFX900-NEXT: s_waitcnt vmcnt(0) 4758; GFX900-NEXT: s_setpc_b64 s[30:31] 4759; 4760; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_4_4: 4761; GFX90A: ; %bb.0: 4762; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4763; GFX90A-NEXT: ;;#ASMSTART 4764; GFX90A-NEXT: ; def v[0:3] 4765; GFX90A-NEXT: ;;#ASMEND 4766; GFX90A-NEXT: v_mov_b32_e32 v5, 0 4767; GFX90A-NEXT: v_mov_b32_e32 v2, v3 4768; GFX90A-NEXT: v_mov_b32_e32 v3, v0 4769; GFX90A-NEXT: v_mov_b32_e32 v4, v0 4770; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 4771; GFX90A-NEXT: s_waitcnt vmcnt(0) 4772; GFX90A-NEXT: s_setpc_b64 s[30:31] 4773; 4774; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_4: 4775; GFX940: ; %bb.0: 4776; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4777; GFX940-NEXT: ;;#ASMSTART 4778; GFX940-NEXT: ; def v[0:3] 4779; GFX940-NEXT: ;;#ASMEND 4780; GFX940-NEXT: v_mov_b32_e32 v5, 0 4781; GFX940-NEXT: v_mov_b32_e32 v2, v3 4782; GFX940-NEXT: v_mov_b32_e32 v3, v0 4783; GFX940-NEXT: v_mov_b32_e32 v4, v0 4784; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 4785; GFX940-NEXT: s_waitcnt vmcnt(0) 4786; GFX940-NEXT: s_setpc_b64 s[30:31] 4787 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4788 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4789 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 4> 4790 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4791 ret void 4792} 4793 4794define void @v_shuffle_v3i32_v4i32__7_u_4(ptr addrspace(1) inreg %ptr) { 4795; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_4: 4796; GFX900: ; %bb.0: 4797; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4798; GFX900-NEXT: ;;#ASMSTART 4799; GFX900-NEXT: ; def v[0:3] 4800; GFX900-NEXT: ;;#ASMEND 4801; GFX900-NEXT: v_mov_b32_e32 v4, 0 4802; GFX900-NEXT: v_mov_b32_e32 v1, v3 4803; GFX900-NEXT: v_mov_b32_e32 v3, v0 4804; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 4805; GFX900-NEXT: s_waitcnt vmcnt(0) 4806; GFX900-NEXT: s_setpc_b64 s[30:31] 4807; 4808; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_4: 4809; GFX90A: ; %bb.0: 4810; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4811; GFX90A-NEXT: ;;#ASMSTART 4812; GFX90A-NEXT: ; def v[0:3] 4813; GFX90A-NEXT: ;;#ASMEND 4814; GFX90A-NEXT: v_mov_b32_e32 v5, 0 4815; GFX90A-NEXT: v_mov_b32_e32 v2, v3 4816; GFX90A-NEXT: v_mov_b32_e32 v4, v0 4817; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 4818; GFX90A-NEXT: s_waitcnt vmcnt(0) 4819; GFX90A-NEXT: s_setpc_b64 s[30:31] 4820; 4821; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_4: 4822; GFX940: ; %bb.0: 4823; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4824; GFX940-NEXT: ;;#ASMSTART 4825; GFX940-NEXT: ; def v[0:3] 4826; GFX940-NEXT: ;;#ASMEND 4827; GFX940-NEXT: v_mov_b32_e32 v5, 0 4828; GFX940-NEXT: v_mov_b32_e32 v2, v3 4829; GFX940-NEXT: v_mov_b32_e32 v4, v0 4830; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 4831; GFX940-NEXT: s_waitcnt vmcnt(0) 4832; GFX940-NEXT: s_setpc_b64 s[30:31] 4833 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4834 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4835 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 4> 4836 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4837 ret void 4838} 4839 4840define void @v_shuffle_v3i32_v4i32__7_0_4(ptr addrspace(1) inreg %ptr) { 4841; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_0_4: 4842; GFX900: ; %bb.0: 4843; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4844; GFX900-NEXT: ;;#ASMSTART 4845; GFX900-NEXT: ; def v[0:3] 4846; GFX900-NEXT: ;;#ASMEND 4847; GFX900-NEXT: ;;#ASMSTART 4848; GFX900-NEXT: ; def v[1:4] 4849; GFX900-NEXT: ;;#ASMEND 4850; GFX900-NEXT: v_mov_b32_e32 v5, 0 4851; GFX900-NEXT: v_mov_b32_e32 v2, v4 4852; GFX900-NEXT: v_mov_b32_e32 v3, v0 4853; GFX900-NEXT: v_mov_b32_e32 v4, v1 4854; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 4855; GFX900-NEXT: s_waitcnt vmcnt(0) 4856; GFX900-NEXT: s_setpc_b64 s[30:31] 4857; 4858; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_0_4: 4859; GFX90A: ; %bb.0: 4860; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4861; GFX90A-NEXT: ;;#ASMSTART 4862; GFX90A-NEXT: ; def v[0:3] 4863; GFX90A-NEXT: ;;#ASMEND 4864; GFX90A-NEXT: ;;#ASMSTART 4865; GFX90A-NEXT: ; def v[2:5] 4866; GFX90A-NEXT: ;;#ASMEND 4867; GFX90A-NEXT: v_mov_b32_e32 v7, 0 4868; GFX90A-NEXT: v_mov_b32_e32 v4, v5 4869; GFX90A-NEXT: v_mov_b32_e32 v5, v0 4870; GFX90A-NEXT: v_mov_b32_e32 v6, v2 4871; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 4872; GFX90A-NEXT: s_waitcnt vmcnt(0) 4873; GFX90A-NEXT: s_setpc_b64 s[30:31] 4874; 4875; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_4: 4876; GFX940: ; %bb.0: 4877; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4878; GFX940-NEXT: ;;#ASMSTART 4879; GFX940-NEXT: ; def v[0:3] 4880; GFX940-NEXT: ;;#ASMEND 4881; GFX940-NEXT: v_mov_b32_e32 v7, 0 4882; GFX940-NEXT: ;;#ASMSTART 4883; GFX940-NEXT: ; def v[2:5] 4884; GFX940-NEXT: ;;#ASMEND 4885; GFX940-NEXT: s_nop 0 4886; GFX940-NEXT: v_mov_b32_e32 v4, v5 4887; GFX940-NEXT: v_mov_b32_e32 v5, v0 4888; GFX940-NEXT: v_mov_b32_e32 v6, v2 4889; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 4890; GFX940-NEXT: s_waitcnt vmcnt(0) 4891; GFX940-NEXT: s_setpc_b64 s[30:31] 4892 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4893 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4894 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 4> 4895 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4896 ret void 4897} 4898 4899define void @v_shuffle_v3i32_v4i32__7_1_4(ptr addrspace(1) inreg %ptr) { 4900; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_4: 4901; GFX900: ; %bb.0: 4902; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4903; GFX900-NEXT: ;;#ASMSTART 4904; GFX900-NEXT: ; def v[0:3] 4905; GFX900-NEXT: ;;#ASMEND 4906; GFX900-NEXT: v_mov_b32_e32 v6, 0 4907; GFX900-NEXT: ;;#ASMSTART 4908; GFX900-NEXT: ; def v[2:5] 4909; GFX900-NEXT: ;;#ASMEND 4910; GFX900-NEXT: v_mov_b32_e32 v0, v5 4911; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 4912; GFX900-NEXT: s_waitcnt vmcnt(0) 4913; GFX900-NEXT: s_setpc_b64 s[30:31] 4914; 4915; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_1_4: 4916; GFX90A: ; %bb.0: 4917; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4918; GFX90A-NEXT: ;;#ASMSTART 4919; GFX90A-NEXT: ; def v[0:3] 4920; GFX90A-NEXT: ;;#ASMEND 4921; GFX90A-NEXT: v_mov_b32_e32 v6, 0 4922; GFX90A-NEXT: ;;#ASMSTART 4923; GFX90A-NEXT: ; def v[2:5] 4924; GFX90A-NEXT: ;;#ASMEND 4925; GFX90A-NEXT: v_mov_b32_e32 v0, v5 4926; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 4927; GFX90A-NEXT: s_waitcnt vmcnt(0) 4928; GFX90A-NEXT: s_setpc_b64 s[30:31] 4929; 4930; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_4: 4931; GFX940: ; %bb.0: 4932; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4933; GFX940-NEXT: ;;#ASMSTART 4934; GFX940-NEXT: ; def v[0:3] 4935; GFX940-NEXT: ;;#ASMEND 4936; GFX940-NEXT: v_mov_b32_e32 v6, 0 4937; GFX940-NEXT: ;;#ASMSTART 4938; GFX940-NEXT: ; def v[2:5] 4939; GFX940-NEXT: ;;#ASMEND 4940; GFX940-NEXT: s_nop 0 4941; GFX940-NEXT: v_mov_b32_e32 v0, v5 4942; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 4943; GFX940-NEXT: s_waitcnt vmcnt(0) 4944; GFX940-NEXT: s_setpc_b64 s[30:31] 4945 %vec0 = call <4 x i32> asm "; def $0", "=v"() 4946 %vec1 = call <4 x i32> asm "; def $0", "=v"() 4947 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 4> 4948 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 4949 ret void 4950} 4951 4952define void @v_shuffle_v3i32_v4i32__7_2_4(ptr addrspace(1) inreg %ptr) { 4953; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_4: 4954; GFX900: ; %bb.0: 4955; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4956; GFX900-NEXT: ;;#ASMSTART 4957; GFX900-NEXT: ; def v[0:3] 4958; GFX900-NEXT: ;;#ASMEND 4959; GFX900-NEXT: v_mov_b32_e32 v7, 0 4960; GFX900-NEXT: ;;#ASMSTART 4961; GFX900-NEXT: ; def v[3:6] 4962; GFX900-NEXT: ;;#ASMEND 4963; GFX900-NEXT: v_mov_b32_e32 v1, v6 4964; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17] 4965; GFX900-NEXT: s_waitcnt vmcnt(0) 4966; GFX900-NEXT: s_setpc_b64 s[30:31] 4967; 4968; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_2_4: 4969; GFX90A: ; %bb.0: 4970; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4971; GFX90A-NEXT: ;;#ASMSTART 4972; GFX90A-NEXT: ; def v[0:3] 4973; GFX90A-NEXT: ;;#ASMEND 4974; GFX90A-NEXT: v_mov_b32_e32 v8, 0 4975; GFX90A-NEXT: ;;#ASMSTART 4976; GFX90A-NEXT: ; def v[4:7] 4977; GFX90A-NEXT: ;;#ASMEND 4978; GFX90A-NEXT: v_mov_b32_e32 v0, v7 4979; GFX90A-NEXT: v_mov_b32_e32 v1, v2 4980; GFX90A-NEXT: v_mov_b32_e32 v2, v4 4981; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 4982; GFX90A-NEXT: s_waitcnt vmcnt(0) 4983; GFX90A-NEXT: s_setpc_b64 s[30:31] 4984; 4985; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_4: 4986; GFX940: ; %bb.0: 4987; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4988; GFX940-NEXT: ;;#ASMSTART 4989; GFX940-NEXT: ; def v[0:3] 4990; GFX940-NEXT: ;;#ASMEND 4991; GFX940-NEXT: v_mov_b32_e32 v8, 0 4992; GFX940-NEXT: ;;#ASMSTART 4993; GFX940-NEXT: ; def v[4:7] 4994; GFX940-NEXT: ;;#ASMEND 4995; GFX940-NEXT: v_mov_b32_e32 v1, v2 4996; GFX940-NEXT: v_mov_b32_e32 v0, v7 4997; GFX940-NEXT: v_mov_b32_e32 v2, v4 4998; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 4999; GFX940-NEXT: s_waitcnt vmcnt(0) 5000; GFX940-NEXT: s_setpc_b64 s[30:31] 5001 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5002 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5003 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 4> 5004 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5005 ret void 5006} 5007 5008define void @v_shuffle_v3i32_v4i32__7_3_4(ptr addrspace(1) inreg %ptr) { 5009; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_4: 5010; GFX900: ; %bb.0: 5011; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5012; GFX900-NEXT: ;;#ASMSTART 5013; GFX900-NEXT: ; def v[0:3] 5014; GFX900-NEXT: ;;#ASMEND 5015; GFX900-NEXT: v_mov_b32_e32 v8, 0 5016; GFX900-NEXT: ;;#ASMSTART 5017; GFX900-NEXT: ; def v[4:7] 5018; GFX900-NEXT: ;;#ASMEND 5019; GFX900-NEXT: v_mov_b32_e32 v0, v7 5020; GFX900-NEXT: v_mov_b32_e32 v1, v3 5021; GFX900-NEXT: v_mov_b32_e32 v2, v4 5022; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 5023; GFX900-NEXT: s_waitcnt vmcnt(0) 5024; GFX900-NEXT: s_setpc_b64 s[30:31] 5025; 5026; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_3_4: 5027; GFX90A: ; %bb.0: 5028; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5029; GFX90A-NEXT: ;;#ASMSTART 5030; GFX90A-NEXT: ; def v[0:3] 5031; GFX90A-NEXT: ;;#ASMEND 5032; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5033; GFX90A-NEXT: ;;#ASMSTART 5034; GFX90A-NEXT: ; def v[4:7] 5035; GFX90A-NEXT: ;;#ASMEND 5036; GFX90A-NEXT: v_mov_b32_e32 v0, v7 5037; GFX90A-NEXT: v_mov_b32_e32 v1, v3 5038; GFX90A-NEXT: v_mov_b32_e32 v2, v4 5039; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 5040; GFX90A-NEXT: s_waitcnt vmcnt(0) 5041; GFX90A-NEXT: s_setpc_b64 s[30:31] 5042; 5043; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_4: 5044; GFX940: ; %bb.0: 5045; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5046; GFX940-NEXT: ;;#ASMSTART 5047; GFX940-NEXT: ; def v[0:3] 5048; GFX940-NEXT: ;;#ASMEND 5049; GFX940-NEXT: v_mov_b32_e32 v8, 0 5050; GFX940-NEXT: ;;#ASMSTART 5051; GFX940-NEXT: ; def v[4:7] 5052; GFX940-NEXT: ;;#ASMEND 5053; GFX940-NEXT: v_mov_b32_e32 v1, v3 5054; GFX940-NEXT: v_mov_b32_e32 v0, v7 5055; GFX940-NEXT: v_mov_b32_e32 v2, v4 5056; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 5057; GFX940-NEXT: s_waitcnt vmcnt(0) 5058; GFX940-NEXT: s_setpc_b64 s[30:31] 5059 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5060 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5061 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 4> 5062 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5063 ret void 5064} 5065 5066define void @v_shuffle_v3i32_v4i32__7_5_4(ptr addrspace(1) inreg %ptr) { 5067; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_4: 5068; GFX900: ; %bb.0: 5069; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5070; GFX900-NEXT: ;;#ASMSTART 5071; GFX900-NEXT: ; def v[0:3] 5072; GFX900-NEXT: ;;#ASMEND 5073; GFX900-NEXT: v_mov_b32_e32 v5, 0 5074; GFX900-NEXT: v_mov_b32_e32 v2, v3 5075; GFX900-NEXT: v_mov_b32_e32 v3, v1 5076; GFX900-NEXT: v_mov_b32_e32 v4, v0 5077; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 5078; GFX900-NEXT: s_waitcnt vmcnt(0) 5079; GFX900-NEXT: s_setpc_b64 s[30:31] 5080; 5081; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_5_4: 5082; GFX90A: ; %bb.0: 5083; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5084; GFX90A-NEXT: ;;#ASMSTART 5085; GFX90A-NEXT: ; def v[0:3] 5086; GFX90A-NEXT: ;;#ASMEND 5087; GFX90A-NEXT: v_mov_b32_e32 v5, 0 5088; GFX90A-NEXT: v_mov_b32_e32 v2, v3 5089; GFX90A-NEXT: v_mov_b32_e32 v3, v1 5090; GFX90A-NEXT: v_mov_b32_e32 v4, v0 5091; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 5092; GFX90A-NEXT: s_waitcnt vmcnt(0) 5093; GFX90A-NEXT: s_setpc_b64 s[30:31] 5094; 5095; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_4: 5096; GFX940: ; %bb.0: 5097; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5098; GFX940-NEXT: ;;#ASMSTART 5099; GFX940-NEXT: ; def v[0:3] 5100; GFX940-NEXT: ;;#ASMEND 5101; GFX940-NEXT: v_mov_b32_e32 v5, 0 5102; GFX940-NEXT: v_mov_b32_e32 v2, v3 5103; GFX940-NEXT: v_mov_b32_e32 v3, v1 5104; GFX940-NEXT: v_mov_b32_e32 v4, v0 5105; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 5106; GFX940-NEXT: s_waitcnt vmcnt(0) 5107; GFX940-NEXT: s_setpc_b64 s[30:31] 5108 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5109 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5110 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 4> 5111 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5112 ret void 5113} 5114 5115define void @v_shuffle_v3i32_v4i32__7_6_4(ptr addrspace(1) inreg %ptr) { 5116; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_4: 5117; GFX900: ; %bb.0: 5118; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5119; GFX900-NEXT: ;;#ASMSTART 5120; GFX900-NEXT: ; def v[0:3] 5121; GFX900-NEXT: ;;#ASMEND 5122; GFX900-NEXT: v_mov_b32_e32 v4, 0 5123; GFX900-NEXT: v_mov_b32_e32 v1, v3 5124; GFX900-NEXT: v_mov_b32_e32 v3, v0 5125; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 5126; GFX900-NEXT: s_waitcnt vmcnt(0) 5127; GFX900-NEXT: s_setpc_b64 s[30:31] 5128; 5129; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_6_4: 5130; GFX90A: ; %bb.0: 5131; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5132; GFX90A-NEXT: v_mov_b32_e32 v7, 0 5133; GFX90A-NEXT: ;;#ASMSTART 5134; GFX90A-NEXT: ; def v[0:3] 5135; GFX90A-NEXT: ;;#ASMEND 5136; GFX90A-NEXT: v_mov_b32_e32 v4, v3 5137; GFX90A-NEXT: v_mov_b32_e32 v5, v2 5138; GFX90A-NEXT: v_mov_b32_e32 v6, v0 5139; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 5140; GFX90A-NEXT: s_waitcnt vmcnt(0) 5141; GFX90A-NEXT: s_setpc_b64 s[30:31] 5142; 5143; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_4: 5144; GFX940: ; %bb.0: 5145; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5146; GFX940-NEXT: v_mov_b32_e32 v7, 0 5147; GFX940-NEXT: ;;#ASMSTART 5148; GFX940-NEXT: ; def v[0:3] 5149; GFX940-NEXT: ;;#ASMEND 5150; GFX940-NEXT: s_nop 0 5151; GFX940-NEXT: v_mov_b32_e32 v4, v3 5152; GFX940-NEXT: v_mov_b32_e32 v5, v2 5153; GFX940-NEXT: v_mov_b32_e32 v6, v0 5154; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 5155; GFX940-NEXT: s_waitcnt vmcnt(0) 5156; GFX940-NEXT: s_setpc_b64 s[30:31] 5157 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5158 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5159 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 4> 5160 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5161 ret void 5162} 5163 5164define void @v_shuffle_v3i32_v4i32__u_5_5(ptr addrspace(1) inreg %ptr) { 5165; GFX900-LABEL: v_shuffle_v3i32_v4i32__u_5_5: 5166; GFX900: ; %bb.0: 5167; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5168; GFX900-NEXT: ;;#ASMSTART 5169; GFX900-NEXT: ; def v[0:3] 5170; GFX900-NEXT: ;;#ASMEND 5171; GFX900-NEXT: v_mov_b32_e32 v4, 0 5172; GFX900-NEXT: v_mov_b32_e32 v2, v1 5173; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 5174; GFX900-NEXT: s_waitcnt vmcnt(0) 5175; GFX900-NEXT: s_setpc_b64 s[30:31] 5176; 5177; GFX90A-LABEL: v_shuffle_v3i32_v4i32__u_5_5: 5178; GFX90A: ; %bb.0: 5179; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5180; GFX90A-NEXT: ;;#ASMSTART 5181; GFX90A-NEXT: ; def v[0:3] 5182; GFX90A-NEXT: ;;#ASMEND 5183; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5184; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5185; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 5186; GFX90A-NEXT: s_waitcnt vmcnt(0) 5187; GFX90A-NEXT: s_setpc_b64 s[30:31] 5188; 5189; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_5_5: 5190; GFX940: ; %bb.0: 5191; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5192; GFX940-NEXT: ;;#ASMSTART 5193; GFX940-NEXT: ; def v[0:3] 5194; GFX940-NEXT: ;;#ASMEND 5195; GFX940-NEXT: v_mov_b32_e32 v4, 0 5196; GFX940-NEXT: v_mov_b32_e32 v2, v1 5197; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 5198; GFX940-NEXT: s_waitcnt vmcnt(0) 5199; GFX940-NEXT: s_setpc_b64 s[30:31] 5200 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5201 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5202 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 poison, i32 5, i32 5> 5203 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5204 ret void 5205} 5206 5207define void @v_shuffle_v3i32_v4i32__0_5_5(ptr addrspace(1) inreg %ptr) { 5208; GFX900-LABEL: v_shuffle_v3i32_v4i32__0_5_5: 5209; GFX900: ; %bb.0: 5210; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5211; GFX900-NEXT: ;;#ASMSTART 5212; GFX900-NEXT: ; def v[0:3] 5213; GFX900-NEXT: ;;#ASMEND 5214; GFX900-NEXT: ;;#ASMSTART 5215; GFX900-NEXT: ; def v[1:4] 5216; GFX900-NEXT: ;;#ASMEND 5217; GFX900-NEXT: v_mov_b32_e32 v5, 0 5218; GFX900-NEXT: v_mov_b32_e32 v1, v2 5219; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17] 5220; GFX900-NEXT: s_waitcnt vmcnt(0) 5221; GFX900-NEXT: s_setpc_b64 s[30:31] 5222; 5223; GFX90A-LABEL: v_shuffle_v3i32_v4i32__0_5_5: 5224; GFX90A: ; %bb.0: 5225; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5226; GFX90A-NEXT: ;;#ASMSTART 5227; GFX90A-NEXT: ; def v[0:3] 5228; GFX90A-NEXT: ;;#ASMEND 5229; GFX90A-NEXT: ;;#ASMSTART 5230; GFX90A-NEXT: ; def v[2:5] 5231; GFX90A-NEXT: ;;#ASMEND 5232; GFX90A-NEXT: v_mov_b32_e32 v6, 0 5233; GFX90A-NEXT: v_mov_b32_e32 v1, v3 5234; GFX90A-NEXT: v_mov_b32_e32 v2, v3 5235; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 5236; GFX90A-NEXT: s_waitcnt vmcnt(0) 5237; GFX90A-NEXT: s_setpc_b64 s[30:31] 5238; 5239; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_5_5: 5240; GFX940: ; %bb.0: 5241; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5242; GFX940-NEXT: ;;#ASMSTART 5243; GFX940-NEXT: ; def v[0:3] 5244; GFX940-NEXT: ;;#ASMEND 5245; GFX940-NEXT: v_mov_b32_e32 v6, 0 5246; GFX940-NEXT: ;;#ASMSTART 5247; GFX940-NEXT: ; def v[2:5] 5248; GFX940-NEXT: ;;#ASMEND 5249; GFX940-NEXT: s_nop 0 5250; GFX940-NEXT: v_mov_b32_e32 v1, v3 5251; GFX940-NEXT: v_mov_b32_e32 v2, v3 5252; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 5253; GFX940-NEXT: s_waitcnt vmcnt(0) 5254; GFX940-NEXT: s_setpc_b64 s[30:31] 5255 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5256 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5257 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 0, i32 5, i32 5> 5258 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5259 ret void 5260} 5261 5262define void @v_shuffle_v3i32_v4i32__1_5_5(ptr addrspace(1) inreg %ptr) { 5263; GFX900-LABEL: v_shuffle_v3i32_v4i32__1_5_5: 5264; GFX900: ; %bb.0: 5265; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5266; GFX900-NEXT: ;;#ASMSTART 5267; GFX900-NEXT: ; def v[0:3] 5268; GFX900-NEXT: ;;#ASMEND 5269; GFX900-NEXT: ;;#ASMSTART 5270; GFX900-NEXT: ; def v[2:5] 5271; GFX900-NEXT: ;;#ASMEND 5272; GFX900-NEXT: v_mov_b32_e32 v6, 0 5273; GFX900-NEXT: v_mov_b32_e32 v2, v3 5274; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17] 5275; GFX900-NEXT: s_waitcnt vmcnt(0) 5276; GFX900-NEXT: s_setpc_b64 s[30:31] 5277; 5278; GFX90A-LABEL: v_shuffle_v3i32_v4i32__1_5_5: 5279; GFX90A: ; %bb.0: 5280; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5281; GFX90A-NEXT: ;;#ASMSTART 5282; GFX90A-NEXT: ; def v[0:3] 5283; GFX90A-NEXT: ;;#ASMEND 5284; GFX90A-NEXT: ;;#ASMSTART 5285; GFX90A-NEXT: ; def v[2:5] 5286; GFX90A-NEXT: ;;#ASMEND 5287; GFX90A-NEXT: v_mov_b32_e32 v6, 0 5288; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5289; GFX90A-NEXT: v_mov_b32_e32 v4, v3 5290; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 5291; GFX90A-NEXT: s_waitcnt vmcnt(0) 5292; GFX90A-NEXT: s_setpc_b64 s[30:31] 5293; 5294; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_5_5: 5295; GFX940: ; %bb.0: 5296; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5297; GFX940-NEXT: ;;#ASMSTART 5298; GFX940-NEXT: ; def v[0:3] 5299; GFX940-NEXT: ;;#ASMEND 5300; GFX940-NEXT: v_mov_b32_e32 v6, 0 5301; GFX940-NEXT: ;;#ASMSTART 5302; GFX940-NEXT: ; def v[2:5] 5303; GFX940-NEXT: ;;#ASMEND 5304; GFX940-NEXT: s_nop 0 5305; GFX940-NEXT: v_mov_b32_e32 v2, v1 5306; GFX940-NEXT: v_mov_b32_e32 v4, v3 5307; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 5308; GFX940-NEXT: s_waitcnt vmcnt(0) 5309; GFX940-NEXT: s_setpc_b64 s[30:31] 5310 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5311 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5312 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 1, i32 5, i32 5> 5313 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5314 ret void 5315} 5316 5317define void @v_shuffle_v3i32_v4i32__2_5_5(ptr addrspace(1) inreg %ptr) { 5318; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_5_5: 5319; GFX900: ; %bb.0: 5320; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5321; GFX900-NEXT: ;;#ASMSTART 5322; GFX900-NEXT: ; def v[0:3] 5323; GFX900-NEXT: ;;#ASMEND 5324; GFX900-NEXT: ;;#ASMSTART 5325; GFX900-NEXT: ; def v[3:6] 5326; GFX900-NEXT: ;;#ASMEND 5327; GFX900-NEXT: v_mov_b32_e32 v7, 0 5328; GFX900-NEXT: v_mov_b32_e32 v3, v2 5329; GFX900-NEXT: v_mov_b32_e32 v5, v4 5330; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17] 5331; GFX900-NEXT: s_waitcnt vmcnt(0) 5332; GFX900-NEXT: s_setpc_b64 s[30:31] 5333; 5334; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_5_5: 5335; GFX90A: ; %bb.0: 5336; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5337; GFX90A-NEXT: ;;#ASMSTART 5338; GFX90A-NEXT: ; def v[4:7] 5339; GFX90A-NEXT: ;;#ASMEND 5340; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5341; GFX90A-NEXT: ;;#ASMSTART 5342; GFX90A-NEXT: ; def v[0:3] 5343; GFX90A-NEXT: ;;#ASMEND 5344; GFX90A-NEXT: v_mov_b32_e32 v4, v2 5345; GFX90A-NEXT: v_mov_b32_e32 v6, v5 5346; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 5347; GFX90A-NEXT: s_waitcnt vmcnt(0) 5348; GFX90A-NEXT: s_setpc_b64 s[30:31] 5349; 5350; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_5_5: 5351; GFX940: ; %bb.0: 5352; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5353; GFX940-NEXT: ;;#ASMSTART 5354; GFX940-NEXT: ; def v[4:7] 5355; GFX940-NEXT: ;;#ASMEND 5356; GFX940-NEXT: v_mov_b32_e32 v8, 0 5357; GFX940-NEXT: ;;#ASMSTART 5358; GFX940-NEXT: ; def v[0:3] 5359; GFX940-NEXT: ;;#ASMEND 5360; GFX940-NEXT: v_mov_b32_e32 v6, v5 5361; GFX940-NEXT: v_mov_b32_e32 v4, v2 5362; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 5363; GFX940-NEXT: s_waitcnt vmcnt(0) 5364; GFX940-NEXT: s_setpc_b64 s[30:31] 5365 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5366 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5367 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 2, i32 5, i32 5> 5368 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5369 ret void 5370} 5371 5372define void @v_shuffle_v3i32_v4i32__3_5_5(ptr addrspace(1) inreg %ptr) { 5373; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_5_5: 5374; GFX900: ; %bb.0: 5375; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5376; GFX900-NEXT: ;;#ASMSTART 5377; GFX900-NEXT: ; def v[4:7] 5378; GFX900-NEXT: ;;#ASMEND 5379; GFX900-NEXT: v_mov_b32_e32 v8, 0 5380; GFX900-NEXT: ;;#ASMSTART 5381; GFX900-NEXT: ; def v[0:3] 5382; GFX900-NEXT: ;;#ASMEND 5383; GFX900-NEXT: v_mov_b32_e32 v4, v3 5384; GFX900-NEXT: v_mov_b32_e32 v6, v5 5385; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 5386; GFX900-NEXT: s_waitcnt vmcnt(0) 5387; GFX900-NEXT: s_setpc_b64 s[30:31] 5388; 5389; GFX90A-LABEL: v_shuffle_v3i32_v4i32__3_5_5: 5390; GFX90A: ; %bb.0: 5391; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5392; GFX90A-NEXT: ;;#ASMSTART 5393; GFX90A-NEXT: ; def v[4:7] 5394; GFX90A-NEXT: ;;#ASMEND 5395; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5396; GFX90A-NEXT: ;;#ASMSTART 5397; GFX90A-NEXT: ; def v[0:3] 5398; GFX90A-NEXT: ;;#ASMEND 5399; GFX90A-NEXT: v_mov_b32_e32 v4, v3 5400; GFX90A-NEXT: v_mov_b32_e32 v6, v5 5401; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 5402; GFX90A-NEXT: s_waitcnt vmcnt(0) 5403; GFX90A-NEXT: s_setpc_b64 s[30:31] 5404; 5405; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_5_5: 5406; GFX940: ; %bb.0: 5407; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5408; GFX940-NEXT: ;;#ASMSTART 5409; GFX940-NEXT: ; def v[4:7] 5410; GFX940-NEXT: ;;#ASMEND 5411; GFX940-NEXT: v_mov_b32_e32 v8, 0 5412; GFX940-NEXT: ;;#ASMSTART 5413; GFX940-NEXT: ; def v[0:3] 5414; GFX940-NEXT: ;;#ASMEND 5415; GFX940-NEXT: v_mov_b32_e32 v6, v5 5416; GFX940-NEXT: v_mov_b32_e32 v4, v3 5417; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 5418; GFX940-NEXT: s_waitcnt vmcnt(0) 5419; GFX940-NEXT: s_setpc_b64 s[30:31] 5420 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5421 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5422 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 3, i32 5, i32 5> 5423 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5424 ret void 5425} 5426 5427define void @v_shuffle_v3i32_v4i32__4_5_5(ptr addrspace(1) inreg %ptr) { 5428; GFX900-LABEL: v_shuffle_v3i32_v4i32__4_5_5: 5429; GFX900: ; %bb.0: 5430; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5431; GFX900-NEXT: ;;#ASMSTART 5432; GFX900-NEXT: ; def v[0:3] 5433; GFX900-NEXT: ;;#ASMEND 5434; GFX900-NEXT: v_mov_b32_e32 v4, 0 5435; GFX900-NEXT: v_mov_b32_e32 v2, v1 5436; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 5437; GFX900-NEXT: s_waitcnt vmcnt(0) 5438; GFX900-NEXT: s_setpc_b64 s[30:31] 5439; 5440; GFX90A-LABEL: v_shuffle_v3i32_v4i32__4_5_5: 5441; GFX90A: ; %bb.0: 5442; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5443; GFX90A-NEXT: ;;#ASMSTART 5444; GFX90A-NEXT: ; def v[0:3] 5445; GFX90A-NEXT: ;;#ASMEND 5446; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5447; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5448; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 5449; GFX90A-NEXT: s_waitcnt vmcnt(0) 5450; GFX90A-NEXT: s_setpc_b64 s[30:31] 5451; 5452; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_5_5: 5453; GFX940: ; %bb.0: 5454; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5455; GFX940-NEXT: ;;#ASMSTART 5456; GFX940-NEXT: ; def v[0:3] 5457; GFX940-NEXT: ;;#ASMEND 5458; GFX940-NEXT: v_mov_b32_e32 v4, 0 5459; GFX940-NEXT: v_mov_b32_e32 v2, v1 5460; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 5461; GFX940-NEXT: s_waitcnt vmcnt(0) 5462; GFX940-NEXT: s_setpc_b64 s[30:31] 5463 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5464 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5465 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 4, i32 5, i32 5> 5466 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5467 ret void 5468} 5469 5470define void @v_shuffle_v3i32_v4i32__5_5_5(ptr addrspace(1) inreg %ptr) { 5471; GFX900-LABEL: v_shuffle_v3i32_v4i32__5_5_5: 5472; GFX900: ; %bb.0: 5473; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5474; GFX900-NEXT: ;;#ASMSTART 5475; GFX900-NEXT: ; def v[0:3] 5476; GFX900-NEXT: ;;#ASMEND 5477; GFX900-NEXT: v_mov_b32_e32 v4, 0 5478; GFX900-NEXT: v_mov_b32_e32 v2, v1 5479; GFX900-NEXT: v_mov_b32_e32 v3, v1 5480; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 5481; GFX900-NEXT: s_waitcnt vmcnt(0) 5482; GFX900-NEXT: s_setpc_b64 s[30:31] 5483; 5484; GFX90A-LABEL: v_shuffle_v3i32_v4i32__5_5_5: 5485; GFX90A: ; %bb.0: 5486; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5487; GFX90A-NEXT: ;;#ASMSTART 5488; GFX90A-NEXT: ; def v[0:3] 5489; GFX90A-NEXT: ;;#ASMEND 5490; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5491; GFX90A-NEXT: v_mov_b32_e32 v0, v1 5492; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5493; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 5494; GFX90A-NEXT: s_waitcnt vmcnt(0) 5495; GFX90A-NEXT: s_setpc_b64 s[30:31] 5496; 5497; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_5_5: 5498; GFX940: ; %bb.0: 5499; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5500; GFX940-NEXT: ;;#ASMSTART 5501; GFX940-NEXT: ; def v[0:3] 5502; GFX940-NEXT: ;;#ASMEND 5503; GFX940-NEXT: v_mov_b32_e32 v4, 0 5504; GFX940-NEXT: v_mov_b32_e32 v0, v1 5505; GFX940-NEXT: v_mov_b32_e32 v2, v1 5506; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 5507; GFX940-NEXT: s_waitcnt vmcnt(0) 5508; GFX940-NEXT: s_setpc_b64 s[30:31] 5509 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5510 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5511 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 5, i32 5> 5512 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5513 ret void 5514} 5515 5516define void @v_shuffle_v3i32_v4i32__6_5_5(ptr addrspace(1) inreg %ptr) { 5517; GFX900-LABEL: v_shuffle_v3i32_v4i32__6_5_5: 5518; GFX900: ; %bb.0: 5519; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5520; GFX900-NEXT: ;;#ASMSTART 5521; GFX900-NEXT: ; def v[0:3] 5522; GFX900-NEXT: ;;#ASMEND 5523; GFX900-NEXT: v_mov_b32_e32 v4, 0 5524; GFX900-NEXT: v_mov_b32_e32 v0, v2 5525; GFX900-NEXT: v_mov_b32_e32 v2, v1 5526; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 5527; GFX900-NEXT: s_waitcnt vmcnt(0) 5528; GFX900-NEXT: s_setpc_b64 s[30:31] 5529; 5530; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_5_5: 5531; GFX90A: ; %bb.0: 5532; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5533; GFX90A-NEXT: ;;#ASMSTART 5534; GFX90A-NEXT: ; def v[0:3] 5535; GFX90A-NEXT: ;;#ASMEND 5536; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5537; GFX90A-NEXT: v_mov_b32_e32 v0, v2 5538; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5539; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 5540; GFX90A-NEXT: s_waitcnt vmcnt(0) 5541; GFX90A-NEXT: s_setpc_b64 s[30:31] 5542; 5543; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_5_5: 5544; GFX940: ; %bb.0: 5545; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5546; GFX940-NEXT: ;;#ASMSTART 5547; GFX940-NEXT: ; def v[0:3] 5548; GFX940-NEXT: ;;#ASMEND 5549; GFX940-NEXT: v_mov_b32_e32 v4, 0 5550; GFX940-NEXT: v_mov_b32_e32 v0, v2 5551; GFX940-NEXT: v_mov_b32_e32 v2, v1 5552; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 5553; GFX940-NEXT: s_waitcnt vmcnt(0) 5554; GFX940-NEXT: s_setpc_b64 s[30:31] 5555 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5556 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5557 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 5, i32 5> 5558 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5559 ret void 5560} 5561 5562define void @v_shuffle_v3i32_v4i32__7_5_5(ptr addrspace(1) inreg %ptr) { 5563; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_5: 5564; GFX900: ; %bb.0: 5565; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5566; GFX900-NEXT: ;;#ASMSTART 5567; GFX900-NEXT: ; def v[0:3] 5568; GFX900-NEXT: ;;#ASMEND 5569; GFX900-NEXT: v_mov_b32_e32 v4, 0 5570; GFX900-NEXT: v_mov_b32_e32 v0, v3 5571; GFX900-NEXT: v_mov_b32_e32 v2, v1 5572; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 5573; GFX900-NEXT: s_waitcnt vmcnt(0) 5574; GFX900-NEXT: s_setpc_b64 s[30:31] 5575; 5576; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_5_5: 5577; GFX90A: ; %bb.0: 5578; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5579; GFX90A-NEXT: ;;#ASMSTART 5580; GFX90A-NEXT: ; def v[0:3] 5581; GFX90A-NEXT: ;;#ASMEND 5582; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5583; GFX90A-NEXT: v_mov_b32_e32 v0, v3 5584; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5585; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 5586; GFX90A-NEXT: s_waitcnt vmcnt(0) 5587; GFX90A-NEXT: s_setpc_b64 s[30:31] 5588; 5589; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_5: 5590; GFX940: ; %bb.0: 5591; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5592; GFX940-NEXT: ;;#ASMSTART 5593; GFX940-NEXT: ; def v[0:3] 5594; GFX940-NEXT: ;;#ASMEND 5595; GFX940-NEXT: v_mov_b32_e32 v4, 0 5596; GFX940-NEXT: v_mov_b32_e32 v0, v3 5597; GFX940-NEXT: v_mov_b32_e32 v2, v1 5598; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 5599; GFX940-NEXT: s_waitcnt vmcnt(0) 5600; GFX940-NEXT: s_setpc_b64 s[30:31] 5601 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5602 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5603 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 5> 5604 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5605 ret void 5606} 5607 5608define void @v_shuffle_v3i32_v4i32__7_u_5(ptr addrspace(1) inreg %ptr) { 5609; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_5: 5610; GFX900: ; %bb.0: 5611; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5612; GFX900-NEXT: ;;#ASMSTART 5613; GFX900-NEXT: ; def v[0:3] 5614; GFX900-NEXT: ;;#ASMEND 5615; GFX900-NEXT: v_mov_b32_e32 v4, 0 5616; GFX900-NEXT: v_mov_b32_e32 v0, v3 5617; GFX900-NEXT: v_mov_b32_e32 v2, v1 5618; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 5619; GFX900-NEXT: s_waitcnt vmcnt(0) 5620; GFX900-NEXT: s_setpc_b64 s[30:31] 5621; 5622; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_5: 5623; GFX90A: ; %bb.0: 5624; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5625; GFX90A-NEXT: ;;#ASMSTART 5626; GFX90A-NEXT: ; def v[0:3] 5627; GFX90A-NEXT: ;;#ASMEND 5628; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5629; GFX90A-NEXT: v_mov_b32_e32 v0, v3 5630; GFX90A-NEXT: v_mov_b32_e32 v2, v1 5631; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 5632; GFX90A-NEXT: s_waitcnt vmcnt(0) 5633; GFX90A-NEXT: s_setpc_b64 s[30:31] 5634; 5635; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_5: 5636; GFX940: ; %bb.0: 5637; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5638; GFX940-NEXT: ;;#ASMSTART 5639; GFX940-NEXT: ; def v[0:3] 5640; GFX940-NEXT: ;;#ASMEND 5641; GFX940-NEXT: v_mov_b32_e32 v4, 0 5642; GFX940-NEXT: v_mov_b32_e32 v0, v3 5643; GFX940-NEXT: v_mov_b32_e32 v2, v1 5644; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 5645; GFX940-NEXT: s_waitcnt vmcnt(0) 5646; GFX940-NEXT: s_setpc_b64 s[30:31] 5647 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5648 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5649 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 5> 5650 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5651 ret void 5652} 5653 5654define void @v_shuffle_v3i32_v4i32__7_0_5(ptr addrspace(1) inreg %ptr) { 5655; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_0_5: 5656; GFX900: ; %bb.0: 5657; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5658; GFX900-NEXT: ;;#ASMSTART 5659; GFX900-NEXT: ; def v[0:3] 5660; GFX900-NEXT: ;;#ASMEND 5661; GFX900-NEXT: ;;#ASMSTART 5662; GFX900-NEXT: ; def v[1:4] 5663; GFX900-NEXT: ;;#ASMEND 5664; GFX900-NEXT: v_mov_b32_e32 v6, 0 5665; GFX900-NEXT: v_mov_b32_e32 v3, v4 5666; GFX900-NEXT: v_mov_b32_e32 v4, v0 5667; GFX900-NEXT: v_mov_b32_e32 v5, v2 5668; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17] 5669; GFX900-NEXT: s_waitcnt vmcnt(0) 5670; GFX900-NEXT: s_setpc_b64 s[30:31] 5671; 5672; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_0_5: 5673; GFX90A: ; %bb.0: 5674; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5675; GFX90A-NEXT: ;;#ASMSTART 5676; GFX90A-NEXT: ; def v[0:3] 5677; GFX90A-NEXT: ;;#ASMEND 5678; GFX90A-NEXT: ;;#ASMSTART 5679; GFX90A-NEXT: ; def v[2:5] 5680; GFX90A-NEXT: ;;#ASMEND 5681; GFX90A-NEXT: v_mov_b32_e32 v7, 0 5682; GFX90A-NEXT: v_mov_b32_e32 v4, v5 5683; GFX90A-NEXT: v_mov_b32_e32 v5, v0 5684; GFX90A-NEXT: v_mov_b32_e32 v6, v3 5685; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 5686; GFX90A-NEXT: s_waitcnt vmcnt(0) 5687; GFX90A-NEXT: s_setpc_b64 s[30:31] 5688; 5689; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_5: 5690; GFX940: ; %bb.0: 5691; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5692; GFX940-NEXT: ;;#ASMSTART 5693; GFX940-NEXT: ; def v[0:3] 5694; GFX940-NEXT: ;;#ASMEND 5695; GFX940-NEXT: v_mov_b32_e32 v7, 0 5696; GFX940-NEXT: ;;#ASMSTART 5697; GFX940-NEXT: ; def v[2:5] 5698; GFX940-NEXT: ;;#ASMEND 5699; GFX940-NEXT: s_nop 0 5700; GFX940-NEXT: v_mov_b32_e32 v4, v5 5701; GFX940-NEXT: v_mov_b32_e32 v5, v0 5702; GFX940-NEXT: v_mov_b32_e32 v6, v3 5703; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 5704; GFX940-NEXT: s_waitcnt vmcnt(0) 5705; GFX940-NEXT: s_setpc_b64 s[30:31] 5706 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5707 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5708 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 5> 5709 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5710 ret void 5711} 5712 5713define void @v_shuffle_v3i32_v4i32__7_1_5(ptr addrspace(1) inreg %ptr) { 5714; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_5: 5715; GFX900: ; %bb.0: 5716; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5717; GFX900-NEXT: ;;#ASMSTART 5718; GFX900-NEXT: ; def v[0:3] 5719; GFX900-NEXT: ;;#ASMEND 5720; GFX900-NEXT: ;;#ASMSTART 5721; GFX900-NEXT: ; def v[2:5] 5722; GFX900-NEXT: ;;#ASMEND 5723; GFX900-NEXT: v_mov_b32_e32 v6, 0 5724; GFX900-NEXT: v_mov_b32_e32 v0, v5 5725; GFX900-NEXT: v_mov_b32_e32 v2, v3 5726; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 5727; GFX900-NEXT: s_waitcnt vmcnt(0) 5728; GFX900-NEXT: s_setpc_b64 s[30:31] 5729; 5730; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_1_5: 5731; GFX90A: ; %bb.0: 5732; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5733; GFX90A-NEXT: ;;#ASMSTART 5734; GFX90A-NEXT: ; def v[0:3] 5735; GFX90A-NEXT: ;;#ASMEND 5736; GFX90A-NEXT: ;;#ASMSTART 5737; GFX90A-NEXT: ; def v[2:5] 5738; GFX90A-NEXT: ;;#ASMEND 5739; GFX90A-NEXT: v_mov_b32_e32 v6, 0 5740; GFX90A-NEXT: v_mov_b32_e32 v0, v5 5741; GFX90A-NEXT: v_mov_b32_e32 v2, v3 5742; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 5743; GFX90A-NEXT: s_waitcnt vmcnt(0) 5744; GFX90A-NEXT: s_setpc_b64 s[30:31] 5745; 5746; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_5: 5747; GFX940: ; %bb.0: 5748; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5749; GFX940-NEXT: ;;#ASMSTART 5750; GFX940-NEXT: ; def v[0:3] 5751; GFX940-NEXT: ;;#ASMEND 5752; GFX940-NEXT: v_mov_b32_e32 v6, 0 5753; GFX940-NEXT: ;;#ASMSTART 5754; GFX940-NEXT: ; def v[2:5] 5755; GFX940-NEXT: ;;#ASMEND 5756; GFX940-NEXT: s_nop 0 5757; GFX940-NEXT: v_mov_b32_e32 v0, v5 5758; GFX940-NEXT: v_mov_b32_e32 v2, v3 5759; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 5760; GFX940-NEXT: s_waitcnt vmcnt(0) 5761; GFX940-NEXT: s_setpc_b64 s[30:31] 5762 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5763 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5764 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 5> 5765 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5766 ret void 5767} 5768 5769define void @v_shuffle_v3i32_v4i32__7_2_5(ptr addrspace(1) inreg %ptr) { 5770; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_5: 5771; GFX900: ; %bb.0: 5772; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5773; GFX900-NEXT: ;;#ASMSTART 5774; GFX900-NEXT: ; def v[0:3] 5775; GFX900-NEXT: ;;#ASMEND 5776; GFX900-NEXT: ;;#ASMSTART 5777; GFX900-NEXT: ; def v[3:6] 5778; GFX900-NEXT: ;;#ASMEND 5779; GFX900-NEXT: v_mov_b32_e32 v7, 0 5780; GFX900-NEXT: v_mov_b32_e32 v1, v6 5781; GFX900-NEXT: v_mov_b32_e32 v3, v4 5782; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17] 5783; GFX900-NEXT: s_waitcnt vmcnt(0) 5784; GFX900-NEXT: s_setpc_b64 s[30:31] 5785; 5786; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_2_5: 5787; GFX90A: ; %bb.0: 5788; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5789; GFX90A-NEXT: ;;#ASMSTART 5790; GFX90A-NEXT: ; def v[0:3] 5791; GFX90A-NEXT: ;;#ASMEND 5792; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5793; GFX90A-NEXT: ;;#ASMSTART 5794; GFX90A-NEXT: ; def v[4:7] 5795; GFX90A-NEXT: ;;#ASMEND 5796; GFX90A-NEXT: v_mov_b32_e32 v0, v7 5797; GFX90A-NEXT: v_mov_b32_e32 v1, v2 5798; GFX90A-NEXT: v_mov_b32_e32 v2, v5 5799; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 5800; GFX90A-NEXT: s_waitcnt vmcnt(0) 5801; GFX90A-NEXT: s_setpc_b64 s[30:31] 5802; 5803; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_5: 5804; GFX940: ; %bb.0: 5805; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5806; GFX940-NEXT: ;;#ASMSTART 5807; GFX940-NEXT: ; def v[0:3] 5808; GFX940-NEXT: ;;#ASMEND 5809; GFX940-NEXT: v_mov_b32_e32 v8, 0 5810; GFX940-NEXT: ;;#ASMSTART 5811; GFX940-NEXT: ; def v[4:7] 5812; GFX940-NEXT: ;;#ASMEND 5813; GFX940-NEXT: v_mov_b32_e32 v1, v2 5814; GFX940-NEXT: v_mov_b32_e32 v0, v7 5815; GFX940-NEXT: v_mov_b32_e32 v2, v5 5816; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 5817; GFX940-NEXT: s_waitcnt vmcnt(0) 5818; GFX940-NEXT: s_setpc_b64 s[30:31] 5819 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5820 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5821 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 5> 5822 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5823 ret void 5824} 5825 5826define void @v_shuffle_v3i32_v4i32__7_3_5(ptr addrspace(1) inreg %ptr) { 5827; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_5: 5828; GFX900: ; %bb.0: 5829; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5830; GFX900-NEXT: ;;#ASMSTART 5831; GFX900-NEXT: ; def v[0:3] 5832; GFX900-NEXT: ;;#ASMEND 5833; GFX900-NEXT: v_mov_b32_e32 v8, 0 5834; GFX900-NEXT: ;;#ASMSTART 5835; GFX900-NEXT: ; def v[4:7] 5836; GFX900-NEXT: ;;#ASMEND 5837; GFX900-NEXT: v_mov_b32_e32 v0, v7 5838; GFX900-NEXT: v_mov_b32_e32 v1, v3 5839; GFX900-NEXT: v_mov_b32_e32 v2, v5 5840; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 5841; GFX900-NEXT: s_waitcnt vmcnt(0) 5842; GFX900-NEXT: s_setpc_b64 s[30:31] 5843; 5844; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_3_5: 5845; GFX90A: ; %bb.0: 5846; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5847; GFX90A-NEXT: ;;#ASMSTART 5848; GFX90A-NEXT: ; def v[0:3] 5849; GFX90A-NEXT: ;;#ASMEND 5850; GFX90A-NEXT: v_mov_b32_e32 v8, 0 5851; GFX90A-NEXT: ;;#ASMSTART 5852; GFX90A-NEXT: ; def v[4:7] 5853; GFX90A-NEXT: ;;#ASMEND 5854; GFX90A-NEXT: v_mov_b32_e32 v0, v7 5855; GFX90A-NEXT: v_mov_b32_e32 v1, v3 5856; GFX90A-NEXT: v_mov_b32_e32 v2, v5 5857; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 5858; GFX90A-NEXT: s_waitcnt vmcnt(0) 5859; GFX90A-NEXT: s_setpc_b64 s[30:31] 5860; 5861; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_5: 5862; GFX940: ; %bb.0: 5863; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5864; GFX940-NEXT: ;;#ASMSTART 5865; GFX940-NEXT: ; def v[0:3] 5866; GFX940-NEXT: ;;#ASMEND 5867; GFX940-NEXT: v_mov_b32_e32 v8, 0 5868; GFX940-NEXT: ;;#ASMSTART 5869; GFX940-NEXT: ; def v[4:7] 5870; GFX940-NEXT: ;;#ASMEND 5871; GFX940-NEXT: v_mov_b32_e32 v1, v3 5872; GFX940-NEXT: v_mov_b32_e32 v0, v7 5873; GFX940-NEXT: v_mov_b32_e32 v2, v5 5874; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 5875; GFX940-NEXT: s_waitcnt vmcnt(0) 5876; GFX940-NEXT: s_setpc_b64 s[30:31] 5877 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5878 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5879 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 5> 5880 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5881 ret void 5882} 5883 5884define void @v_shuffle_v3i32_v4i32__7_4_5(ptr addrspace(1) inreg %ptr) { 5885; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_5: 5886; GFX900: ; %bb.0: 5887; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5888; GFX900-NEXT: ;;#ASMSTART 5889; GFX900-NEXT: ; def v[0:3] 5890; GFX900-NEXT: ;;#ASMEND 5891; GFX900-NEXT: v_mov_b32_e32 v5, 0 5892; GFX900-NEXT: v_mov_b32_e32 v2, v3 5893; GFX900-NEXT: v_mov_b32_e32 v3, v0 5894; GFX900-NEXT: v_mov_b32_e32 v4, v1 5895; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 5896; GFX900-NEXT: s_waitcnt vmcnt(0) 5897; GFX900-NEXT: s_setpc_b64 s[30:31] 5898; 5899; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_4_5: 5900; GFX90A: ; %bb.0: 5901; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5902; GFX90A-NEXT: ;;#ASMSTART 5903; GFX90A-NEXT: ; def v[0:3] 5904; GFX90A-NEXT: ;;#ASMEND 5905; GFX90A-NEXT: v_mov_b32_e32 v5, 0 5906; GFX90A-NEXT: v_mov_b32_e32 v2, v3 5907; GFX90A-NEXT: v_mov_b32_e32 v3, v0 5908; GFX90A-NEXT: v_mov_b32_e32 v4, v1 5909; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 5910; GFX90A-NEXT: s_waitcnt vmcnt(0) 5911; GFX90A-NEXT: s_setpc_b64 s[30:31] 5912; 5913; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_5: 5914; GFX940: ; %bb.0: 5915; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5916; GFX940-NEXT: ;;#ASMSTART 5917; GFX940-NEXT: ; def v[0:3] 5918; GFX940-NEXT: ;;#ASMEND 5919; GFX940-NEXT: v_mov_b32_e32 v5, 0 5920; GFX940-NEXT: v_mov_b32_e32 v2, v3 5921; GFX940-NEXT: v_mov_b32_e32 v3, v0 5922; GFX940-NEXT: v_mov_b32_e32 v4, v1 5923; GFX940-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1] sc0 sc1 5924; GFX940-NEXT: s_waitcnt vmcnt(0) 5925; GFX940-NEXT: s_setpc_b64 s[30:31] 5926 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5927 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5928 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 5> 5929 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5930 ret void 5931} 5932 5933define void @v_shuffle_v3i32_v4i32__7_6_5(ptr addrspace(1) inreg %ptr) { 5934; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_5: 5935; GFX900: ; %bb.0: 5936; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5937; GFX900-NEXT: v_mov_b32_e32 v6, 0 5938; GFX900-NEXT: ;;#ASMSTART 5939; GFX900-NEXT: ; def v[0:3] 5940; GFX900-NEXT: ;;#ASMEND 5941; GFX900-NEXT: v_mov_b32_e32 v4, v2 5942; GFX900-NEXT: v_mov_b32_e32 v5, v1 5943; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17] 5944; GFX900-NEXT: s_waitcnt vmcnt(0) 5945; GFX900-NEXT: s_setpc_b64 s[30:31] 5946; 5947; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_6_5: 5948; GFX90A: ; %bb.0: 5949; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5950; GFX90A-NEXT: v_mov_b32_e32 v7, 0 5951; GFX90A-NEXT: ;;#ASMSTART 5952; GFX90A-NEXT: ; def v[0:3] 5953; GFX90A-NEXT: ;;#ASMEND 5954; GFX90A-NEXT: v_mov_b32_e32 v4, v3 5955; GFX90A-NEXT: v_mov_b32_e32 v5, v2 5956; GFX90A-NEXT: v_mov_b32_e32 v6, v1 5957; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 5958; GFX90A-NEXT: s_waitcnt vmcnt(0) 5959; GFX90A-NEXT: s_setpc_b64 s[30:31] 5960; 5961; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_5: 5962; GFX940: ; %bb.0: 5963; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5964; GFX940-NEXT: v_mov_b32_e32 v7, 0 5965; GFX940-NEXT: ;;#ASMSTART 5966; GFX940-NEXT: ; def v[0:3] 5967; GFX940-NEXT: ;;#ASMEND 5968; GFX940-NEXT: s_nop 0 5969; GFX940-NEXT: v_mov_b32_e32 v4, v3 5970; GFX940-NEXT: v_mov_b32_e32 v5, v2 5971; GFX940-NEXT: v_mov_b32_e32 v6, v1 5972; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 5973; GFX940-NEXT: s_waitcnt vmcnt(0) 5974; GFX940-NEXT: s_setpc_b64 s[30:31] 5975 %vec0 = call <4 x i32> asm "; def $0", "=v"() 5976 %vec1 = call <4 x i32> asm "; def $0", "=v"() 5977 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 5> 5978 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 5979 ret void 5980} 5981 5982define void @v_shuffle_v3i32_v4i32__u_6_6(ptr addrspace(1) inreg %ptr) { 5983; GFX900-LABEL: v_shuffle_v3i32_v4i32__u_6_6: 5984; GFX900: ; %bb.0: 5985; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5986; GFX900-NEXT: ;;#ASMSTART 5987; GFX900-NEXT: ; def v[0:3] 5988; GFX900-NEXT: ;;#ASMEND 5989; GFX900-NEXT: v_mov_b32_e32 v4, 0 5990; GFX900-NEXT: v_mov_b32_e32 v3, v2 5991; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 5992; GFX900-NEXT: s_waitcnt vmcnt(0) 5993; GFX900-NEXT: s_setpc_b64 s[30:31] 5994; 5995; GFX90A-LABEL: v_shuffle_v3i32_v4i32__u_6_6: 5996; GFX90A: ; %bb.0: 5997; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5998; GFX90A-NEXT: ;;#ASMSTART 5999; GFX90A-NEXT: ; def v[0:3] 6000; GFX90A-NEXT: ;;#ASMEND 6001; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6002; GFX90A-NEXT: v_mov_b32_e32 v1, v2 6003; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 6004; GFX90A-NEXT: s_waitcnt vmcnt(0) 6005; GFX90A-NEXT: s_setpc_b64 s[30:31] 6006; 6007; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_6_6: 6008; GFX940: ; %bb.0: 6009; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6010; GFX940-NEXT: ;;#ASMSTART 6011; GFX940-NEXT: ; def v[0:3] 6012; GFX940-NEXT: ;;#ASMEND 6013; GFX940-NEXT: v_mov_b32_e32 v4, 0 6014; GFX940-NEXT: v_mov_b32_e32 v1, v2 6015; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 6016; GFX940-NEXT: s_waitcnt vmcnt(0) 6017; GFX940-NEXT: s_setpc_b64 s[30:31] 6018 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6019 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6020 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 poison, i32 6, i32 6> 6021 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6022 ret void 6023} 6024 6025define void @v_shuffle_v3i32_v4i32__0_6_6(ptr addrspace(1) inreg %ptr) { 6026; GFX900-LABEL: v_shuffle_v3i32_v4i32__0_6_6: 6027; GFX900: ; %bb.0: 6028; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6029; GFX900-NEXT: ;;#ASMSTART 6030; GFX900-NEXT: ; def v[0:3] 6031; GFX900-NEXT: ;;#ASMEND 6032; GFX900-NEXT: ;;#ASMSTART 6033; GFX900-NEXT: ; def v[1:4] 6034; GFX900-NEXT: ;;#ASMEND 6035; GFX900-NEXT: v_mov_b32_e32 v5, 0 6036; GFX900-NEXT: v_mov_b32_e32 v1, v3 6037; GFX900-NEXT: v_mov_b32_e32 v2, v3 6038; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17] 6039; GFX900-NEXT: s_waitcnt vmcnt(0) 6040; GFX900-NEXT: s_setpc_b64 s[30:31] 6041; 6042; GFX90A-LABEL: v_shuffle_v3i32_v4i32__0_6_6: 6043; GFX90A: ; %bb.0: 6044; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6045; GFX90A-NEXT: ;;#ASMSTART 6046; GFX90A-NEXT: ; def v[0:3] 6047; GFX90A-NEXT: ;;#ASMEND 6048; GFX90A-NEXT: ;;#ASMSTART 6049; GFX90A-NEXT: ; def v[2:5] 6050; GFX90A-NEXT: ;;#ASMEND 6051; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6052; GFX90A-NEXT: v_mov_b32_e32 v1, v4 6053; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6054; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 6055; GFX90A-NEXT: s_waitcnt vmcnt(0) 6056; GFX90A-NEXT: s_setpc_b64 s[30:31] 6057; 6058; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_6_6: 6059; GFX940: ; %bb.0: 6060; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6061; GFX940-NEXT: ;;#ASMSTART 6062; GFX940-NEXT: ; def v[0:3] 6063; GFX940-NEXT: ;;#ASMEND 6064; GFX940-NEXT: v_mov_b32_e32 v6, 0 6065; GFX940-NEXT: ;;#ASMSTART 6066; GFX940-NEXT: ; def v[2:5] 6067; GFX940-NEXT: ;;#ASMEND 6068; GFX940-NEXT: s_nop 0 6069; GFX940-NEXT: v_mov_b32_e32 v1, v4 6070; GFX940-NEXT: v_mov_b32_e32 v2, v4 6071; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 6072; GFX940-NEXT: s_waitcnt vmcnt(0) 6073; GFX940-NEXT: s_setpc_b64 s[30:31] 6074 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6075 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6076 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 0, i32 6, i32 6> 6077 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6078 ret void 6079} 6080 6081define void @v_shuffle_v3i32_v4i32__1_6_6(ptr addrspace(1) inreg %ptr) { 6082; GFX900-LABEL: v_shuffle_v3i32_v4i32__1_6_6: 6083; GFX900: ; %bb.0: 6084; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6085; GFX900-NEXT: ;;#ASMSTART 6086; GFX900-NEXT: ; def v[0:3] 6087; GFX900-NEXT: ;;#ASMEND 6088; GFX900-NEXT: ;;#ASMSTART 6089; GFX900-NEXT: ; def v[2:5] 6090; GFX900-NEXT: ;;#ASMEND 6091; GFX900-NEXT: v_mov_b32_e32 v6, 0 6092; GFX900-NEXT: v_mov_b32_e32 v2, v4 6093; GFX900-NEXT: v_mov_b32_e32 v3, v4 6094; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17] 6095; GFX900-NEXT: s_waitcnt vmcnt(0) 6096; GFX900-NEXT: s_setpc_b64 s[30:31] 6097; 6098; GFX90A-LABEL: v_shuffle_v3i32_v4i32__1_6_6: 6099; GFX90A: ; %bb.0: 6100; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6101; GFX90A-NEXT: ;;#ASMSTART 6102; GFX90A-NEXT: ; def v[0:3] 6103; GFX90A-NEXT: ;;#ASMEND 6104; GFX90A-NEXT: ;;#ASMSTART 6105; GFX90A-NEXT: ; def v[2:5] 6106; GFX90A-NEXT: ;;#ASMEND 6107; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6108; GFX90A-NEXT: v_mov_b32_e32 v2, v1 6109; GFX90A-NEXT: v_mov_b32_e32 v3, v4 6110; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 6111; GFX90A-NEXT: s_waitcnt vmcnt(0) 6112; GFX90A-NEXT: s_setpc_b64 s[30:31] 6113; 6114; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_6_6: 6115; GFX940: ; %bb.0: 6116; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6117; GFX940-NEXT: ;;#ASMSTART 6118; GFX940-NEXT: ; def v[0:3] 6119; GFX940-NEXT: ;;#ASMEND 6120; GFX940-NEXT: v_mov_b32_e32 v6, 0 6121; GFX940-NEXT: ;;#ASMSTART 6122; GFX940-NEXT: ; def v[2:5] 6123; GFX940-NEXT: ;;#ASMEND 6124; GFX940-NEXT: s_nop 0 6125; GFX940-NEXT: v_mov_b32_e32 v2, v1 6126; GFX940-NEXT: v_mov_b32_e32 v3, v4 6127; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 6128; GFX940-NEXT: s_waitcnt vmcnt(0) 6129; GFX940-NEXT: s_setpc_b64 s[30:31] 6130 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6131 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6132 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 1, i32 6, i32 6> 6133 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6134 ret void 6135} 6136 6137define void @v_shuffle_v3i32_v4i32__2_6_6(ptr addrspace(1) inreg %ptr) { 6138; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_6_6: 6139; GFX900: ; %bb.0: 6140; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6141; GFX900-NEXT: ;;#ASMSTART 6142; GFX900-NEXT: ; def v[0:3] 6143; GFX900-NEXT: ;;#ASMEND 6144; GFX900-NEXT: ;;#ASMSTART 6145; GFX900-NEXT: ; def v[3:6] 6146; GFX900-NEXT: ;;#ASMEND 6147; GFX900-NEXT: v_mov_b32_e32 v7, 0 6148; GFX900-NEXT: v_mov_b32_e32 v4, v2 6149; GFX900-NEXT: v_mov_b32_e32 v6, v5 6150; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 6151; GFX900-NEXT: s_waitcnt vmcnt(0) 6152; GFX900-NEXT: s_setpc_b64 s[30:31] 6153; 6154; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_6_6: 6155; GFX90A: ; %bb.0: 6156; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6157; GFX90A-NEXT: ;;#ASMSTART 6158; GFX90A-NEXT: ; def v[4:7] 6159; GFX90A-NEXT: ;;#ASMEND 6160; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6161; GFX90A-NEXT: ;;#ASMSTART 6162; GFX90A-NEXT: ; def v[0:3] 6163; GFX90A-NEXT: ;;#ASMEND 6164; GFX90A-NEXT: v_mov_b32_e32 v4, v2 6165; GFX90A-NEXT: v_mov_b32_e32 v5, v6 6166; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 6167; GFX90A-NEXT: s_waitcnt vmcnt(0) 6168; GFX90A-NEXT: s_setpc_b64 s[30:31] 6169; 6170; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_6_6: 6171; GFX940: ; %bb.0: 6172; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6173; GFX940-NEXT: ;;#ASMSTART 6174; GFX940-NEXT: ; def v[4:7] 6175; GFX940-NEXT: ;;#ASMEND 6176; GFX940-NEXT: v_mov_b32_e32 v8, 0 6177; GFX940-NEXT: ;;#ASMSTART 6178; GFX940-NEXT: ; def v[0:3] 6179; GFX940-NEXT: ;;#ASMEND 6180; GFX940-NEXT: v_mov_b32_e32 v5, v6 6181; GFX940-NEXT: v_mov_b32_e32 v4, v2 6182; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 6183; GFX940-NEXT: s_waitcnt vmcnt(0) 6184; GFX940-NEXT: s_setpc_b64 s[30:31] 6185 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6186 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6187 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 2, i32 6, i32 6> 6188 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6189 ret void 6190} 6191 6192define void @v_shuffle_v3i32_v4i32__3_6_6(ptr addrspace(1) inreg %ptr) { 6193; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_6_6: 6194; GFX900: ; %bb.0: 6195; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6196; GFX900-NEXT: ;;#ASMSTART 6197; GFX900-NEXT: ; def v[4:7] 6198; GFX900-NEXT: ;;#ASMEND 6199; GFX900-NEXT: v_mov_b32_e32 v8, 0 6200; GFX900-NEXT: ;;#ASMSTART 6201; GFX900-NEXT: ; def v[0:3] 6202; GFX900-NEXT: ;;#ASMEND 6203; GFX900-NEXT: v_mov_b32_e32 v5, v3 6204; GFX900-NEXT: v_mov_b32_e32 v7, v6 6205; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17] 6206; GFX900-NEXT: s_waitcnt vmcnt(0) 6207; GFX900-NEXT: s_setpc_b64 s[30:31] 6208; 6209; GFX90A-LABEL: v_shuffle_v3i32_v4i32__3_6_6: 6210; GFX90A: ; %bb.0: 6211; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6212; GFX90A-NEXT: ;;#ASMSTART 6213; GFX90A-NEXT: ; def v[4:7] 6214; GFX90A-NEXT: ;;#ASMEND 6215; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6216; GFX90A-NEXT: ;;#ASMSTART 6217; GFX90A-NEXT: ; def v[0:3] 6218; GFX90A-NEXT: ;;#ASMEND 6219; GFX90A-NEXT: v_mov_b32_e32 v4, v3 6220; GFX90A-NEXT: v_mov_b32_e32 v5, v6 6221; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 6222; GFX90A-NEXT: s_waitcnt vmcnt(0) 6223; GFX90A-NEXT: s_setpc_b64 s[30:31] 6224; 6225; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_6_6: 6226; GFX940: ; %bb.0: 6227; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6228; GFX940-NEXT: ;;#ASMSTART 6229; GFX940-NEXT: ; def v[4:7] 6230; GFX940-NEXT: ;;#ASMEND 6231; GFX940-NEXT: v_mov_b32_e32 v8, 0 6232; GFX940-NEXT: ;;#ASMSTART 6233; GFX940-NEXT: ; def v[0:3] 6234; GFX940-NEXT: ;;#ASMEND 6235; GFX940-NEXT: v_mov_b32_e32 v5, v6 6236; GFX940-NEXT: v_mov_b32_e32 v4, v3 6237; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 6238; GFX940-NEXT: s_waitcnt vmcnt(0) 6239; GFX940-NEXT: s_setpc_b64 s[30:31] 6240 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6241 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6242 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 3, i32 6, i32 6> 6243 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6244 ret void 6245} 6246 6247define void @v_shuffle_v3i32_v4i32__4_6_6(ptr addrspace(1) inreg %ptr) { 6248; GFX900-LABEL: v_shuffle_v3i32_v4i32__4_6_6: 6249; GFX900: ; %bb.0: 6250; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6251; GFX900-NEXT: ;;#ASMSTART 6252; GFX900-NEXT: ; def v[0:3] 6253; GFX900-NEXT: ;;#ASMEND 6254; GFX900-NEXT: v_mov_b32_e32 v4, 0 6255; GFX900-NEXT: v_mov_b32_e32 v1, v2 6256; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 6257; GFX900-NEXT: s_waitcnt vmcnt(0) 6258; GFX900-NEXT: s_setpc_b64 s[30:31] 6259; 6260; GFX90A-LABEL: v_shuffle_v3i32_v4i32__4_6_6: 6261; GFX90A: ; %bb.0: 6262; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6263; GFX90A-NEXT: ;;#ASMSTART 6264; GFX90A-NEXT: ; def v[0:3] 6265; GFX90A-NEXT: ;;#ASMEND 6266; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6267; GFX90A-NEXT: v_mov_b32_e32 v1, v2 6268; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 6269; GFX90A-NEXT: s_waitcnt vmcnt(0) 6270; GFX90A-NEXT: s_setpc_b64 s[30:31] 6271; 6272; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_6_6: 6273; GFX940: ; %bb.0: 6274; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6275; GFX940-NEXT: ;;#ASMSTART 6276; GFX940-NEXT: ; def v[0:3] 6277; GFX940-NEXT: ;;#ASMEND 6278; GFX940-NEXT: v_mov_b32_e32 v4, 0 6279; GFX940-NEXT: v_mov_b32_e32 v1, v2 6280; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 6281; GFX940-NEXT: s_waitcnt vmcnt(0) 6282; GFX940-NEXT: s_setpc_b64 s[30:31] 6283 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6284 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6285 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 4, i32 6, i32 6> 6286 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6287 ret void 6288} 6289 6290define void @v_shuffle_v3i32_v4i32__5_6_6(ptr addrspace(1) inreg %ptr) { 6291; GFX900-LABEL: v_shuffle_v3i32_v4i32__5_6_6: 6292; GFX900: ; %bb.0: 6293; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6294; GFX900-NEXT: ;;#ASMSTART 6295; GFX900-NEXT: ; def v[0:3] 6296; GFX900-NEXT: ;;#ASMEND 6297; GFX900-NEXT: v_mov_b32_e32 v4, 0 6298; GFX900-NEXT: v_mov_b32_e32 v3, v2 6299; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 6300; GFX900-NEXT: s_waitcnt vmcnt(0) 6301; GFX900-NEXT: s_setpc_b64 s[30:31] 6302; 6303; GFX90A-LABEL: v_shuffle_v3i32_v4i32__5_6_6: 6304; GFX90A: ; %bb.0: 6305; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6306; GFX90A-NEXT: ;;#ASMSTART 6307; GFX90A-NEXT: ; def v[0:3] 6308; GFX90A-NEXT: ;;#ASMEND 6309; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6310; GFX90A-NEXT: v_mov_b32_e32 v0, v1 6311; GFX90A-NEXT: v_mov_b32_e32 v1, v2 6312; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 6313; GFX90A-NEXT: s_waitcnt vmcnt(0) 6314; GFX90A-NEXT: s_setpc_b64 s[30:31] 6315; 6316; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_6_6: 6317; GFX940: ; %bb.0: 6318; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6319; GFX940-NEXT: ;;#ASMSTART 6320; GFX940-NEXT: ; def v[0:3] 6321; GFX940-NEXT: ;;#ASMEND 6322; GFX940-NEXT: v_mov_b32_e32 v4, 0 6323; GFX940-NEXT: v_mov_b32_e32 v0, v1 6324; GFX940-NEXT: v_mov_b32_e32 v1, v2 6325; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 6326; GFX940-NEXT: s_waitcnt vmcnt(0) 6327; GFX940-NEXT: s_setpc_b64 s[30:31] 6328 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6329 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6330 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 6, i32 6> 6331 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6332 ret void 6333} 6334 6335define void @v_shuffle_v3i32_v4i32__6_6_6(ptr addrspace(1) inreg %ptr) { 6336; GFX900-LABEL: v_shuffle_v3i32_v4i32__6_6_6: 6337; GFX900: ; %bb.0: 6338; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6339; GFX900-NEXT: ;;#ASMSTART 6340; GFX900-NEXT: ; def v[0:3] 6341; GFX900-NEXT: ;;#ASMEND 6342; GFX900-NEXT: v_mov_b32_e32 v4, 0 6343; GFX900-NEXT: v_mov_b32_e32 v1, v2 6344; GFX900-NEXT: v_mov_b32_e32 v3, v2 6345; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 6346; GFX900-NEXT: s_waitcnt vmcnt(0) 6347; GFX900-NEXT: s_setpc_b64 s[30:31] 6348; 6349; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_6_6: 6350; GFX90A: ; %bb.0: 6351; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6352; GFX90A-NEXT: ;;#ASMSTART 6353; GFX90A-NEXT: ; def v[0:3] 6354; GFX90A-NEXT: ;;#ASMEND 6355; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6356; GFX90A-NEXT: v_mov_b32_e32 v0, v2 6357; GFX90A-NEXT: v_mov_b32_e32 v1, v2 6358; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 6359; GFX90A-NEXT: s_waitcnt vmcnt(0) 6360; GFX90A-NEXT: s_setpc_b64 s[30:31] 6361; 6362; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_6_6: 6363; GFX940: ; %bb.0: 6364; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6365; GFX940-NEXT: ;;#ASMSTART 6366; GFX940-NEXT: ; def v[0:3] 6367; GFX940-NEXT: ;;#ASMEND 6368; GFX940-NEXT: v_mov_b32_e32 v4, 0 6369; GFX940-NEXT: v_mov_b32_e32 v0, v2 6370; GFX940-NEXT: v_mov_b32_e32 v1, v2 6371; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 6372; GFX940-NEXT: s_waitcnt vmcnt(0) 6373; GFX940-NEXT: s_setpc_b64 s[30:31] 6374 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6375 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6376 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 6, i32 6> 6377 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6378 ret void 6379} 6380 6381define void @v_shuffle_v3i32_v4i32__7_6_6(ptr addrspace(1) inreg %ptr) { 6382; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_6: 6383; GFX900: ; %bb.0: 6384; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6385; GFX900-NEXT: ;;#ASMSTART 6386; GFX900-NEXT: ; def v[0:3] 6387; GFX900-NEXT: ;;#ASMEND 6388; GFX900-NEXT: v_mov_b32_e32 v4, 0 6389; GFX900-NEXT: v_mov_b32_e32 v1, v3 6390; GFX900-NEXT: v_mov_b32_e32 v3, v2 6391; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 6392; GFX900-NEXT: s_waitcnt vmcnt(0) 6393; GFX900-NEXT: s_setpc_b64 s[30:31] 6394; 6395; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_6_6: 6396; GFX90A: ; %bb.0: 6397; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6398; GFX90A-NEXT: ;;#ASMSTART 6399; GFX90A-NEXT: ; def v[0:3] 6400; GFX90A-NEXT: ;;#ASMEND 6401; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6402; GFX90A-NEXT: v_mov_b32_e32 v0, v3 6403; GFX90A-NEXT: v_mov_b32_e32 v1, v2 6404; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 6405; GFX90A-NEXT: s_waitcnt vmcnt(0) 6406; GFX90A-NEXT: s_setpc_b64 s[30:31] 6407; 6408; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_6: 6409; GFX940: ; %bb.0: 6410; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6411; GFX940-NEXT: ;;#ASMSTART 6412; GFX940-NEXT: ; def v[0:3] 6413; GFX940-NEXT: ;;#ASMEND 6414; GFX940-NEXT: v_mov_b32_e32 v4, 0 6415; GFX940-NEXT: v_mov_b32_e32 v0, v3 6416; GFX940-NEXT: v_mov_b32_e32 v1, v2 6417; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 6418; GFX940-NEXT: s_waitcnt vmcnt(0) 6419; GFX940-NEXT: s_setpc_b64 s[30:31] 6420 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6421 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6422 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 6> 6423 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6424 ret void 6425} 6426 6427define void @v_shuffle_v3i32_v4i32__7_u_6(ptr addrspace(1) inreg %ptr) { 6428; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_6: 6429; GFX900: ; %bb.0: 6430; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6431; GFX900-NEXT: ;;#ASMSTART 6432; GFX900-NEXT: ; def v[0:3] 6433; GFX900-NEXT: ;;#ASMEND 6434; GFX900-NEXT: v_mov_b32_e32 v4, 0 6435; GFX900-NEXT: v_mov_b32_e32 v0, v3 6436; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 6437; GFX900-NEXT: s_waitcnt vmcnt(0) 6438; GFX900-NEXT: s_setpc_b64 s[30:31] 6439; 6440; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_6: 6441; GFX90A: ; %bb.0: 6442; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6443; GFX90A-NEXT: ;;#ASMSTART 6444; GFX90A-NEXT: ; def v[0:3] 6445; GFX90A-NEXT: ;;#ASMEND 6446; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6447; GFX90A-NEXT: v_mov_b32_e32 v0, v3 6448; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 6449; GFX90A-NEXT: s_waitcnt vmcnt(0) 6450; GFX90A-NEXT: s_setpc_b64 s[30:31] 6451; 6452; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_6: 6453; GFX940: ; %bb.0: 6454; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6455; GFX940-NEXT: ;;#ASMSTART 6456; GFX940-NEXT: ; def v[0:3] 6457; GFX940-NEXT: ;;#ASMEND 6458; GFX940-NEXT: v_mov_b32_e32 v4, 0 6459; GFX940-NEXT: v_mov_b32_e32 v0, v3 6460; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 6461; GFX940-NEXT: s_waitcnt vmcnt(0) 6462; GFX940-NEXT: s_setpc_b64 s[30:31] 6463 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6464 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6465 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 6> 6466 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6467 ret void 6468} 6469 6470define void @v_shuffle_v3i32_v4i32__7_0_6(ptr addrspace(1) inreg %ptr) { 6471; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_0_6: 6472; GFX900: ; %bb.0: 6473; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6474; GFX900-NEXT: ;;#ASMSTART 6475; GFX900-NEXT: ; def v[0:3] 6476; GFX900-NEXT: ;;#ASMEND 6477; GFX900-NEXT: ;;#ASMSTART 6478; GFX900-NEXT: ; def v[1:4] 6479; GFX900-NEXT: ;;#ASMEND 6480; GFX900-NEXT: v_mov_b32_e32 v5, 0 6481; GFX900-NEXT: v_mov_b32_e32 v1, v4 6482; GFX900-NEXT: v_mov_b32_e32 v2, v0 6483; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17] 6484; GFX900-NEXT: s_waitcnt vmcnt(0) 6485; GFX900-NEXT: s_setpc_b64 s[30:31] 6486; 6487; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_0_6: 6488; GFX90A: ; %bb.0: 6489; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6490; GFX90A-NEXT: ;;#ASMSTART 6491; GFX90A-NEXT: ; def v[0:3] 6492; GFX90A-NEXT: ;;#ASMEND 6493; GFX90A-NEXT: ;;#ASMSTART 6494; GFX90A-NEXT: ; def v[2:5] 6495; GFX90A-NEXT: ;;#ASMEND 6496; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6497; GFX90A-NEXT: v_mov_b32_e32 v2, v5 6498; GFX90A-NEXT: v_mov_b32_e32 v3, v0 6499; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 6500; GFX90A-NEXT: s_waitcnt vmcnt(0) 6501; GFX90A-NEXT: s_setpc_b64 s[30:31] 6502; 6503; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_6: 6504; GFX940: ; %bb.0: 6505; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6506; GFX940-NEXT: ;;#ASMSTART 6507; GFX940-NEXT: ; def v[0:3] 6508; GFX940-NEXT: ;;#ASMEND 6509; GFX940-NEXT: v_mov_b32_e32 v6, 0 6510; GFX940-NEXT: ;;#ASMSTART 6511; GFX940-NEXT: ; def v[2:5] 6512; GFX940-NEXT: ;;#ASMEND 6513; GFX940-NEXT: s_nop 0 6514; GFX940-NEXT: v_mov_b32_e32 v2, v5 6515; GFX940-NEXT: v_mov_b32_e32 v3, v0 6516; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 6517; GFX940-NEXT: s_waitcnt vmcnt(0) 6518; GFX940-NEXT: s_setpc_b64 s[30:31] 6519 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6520 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6521 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 6> 6522 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6523 ret void 6524} 6525 6526define void @v_shuffle_v3i32_v4i32__7_1_6(ptr addrspace(1) inreg %ptr) { 6527; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_6: 6528; GFX900: ; %bb.0: 6529; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6530; GFX900-NEXT: ;;#ASMSTART 6531; GFX900-NEXT: ; def v[0:3] 6532; GFX900-NEXT: ;;#ASMEND 6533; GFX900-NEXT: ;;#ASMSTART 6534; GFX900-NEXT: ; def v[2:5] 6535; GFX900-NEXT: ;;#ASMEND 6536; GFX900-NEXT: v_mov_b32_e32 v6, 0 6537; GFX900-NEXT: v_mov_b32_e32 v0, v5 6538; GFX900-NEXT: v_mov_b32_e32 v2, v4 6539; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 6540; GFX900-NEXT: s_waitcnt vmcnt(0) 6541; GFX900-NEXT: s_setpc_b64 s[30:31] 6542; 6543; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_1_6: 6544; GFX90A: ; %bb.0: 6545; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6546; GFX90A-NEXT: ;;#ASMSTART 6547; GFX90A-NEXT: ; def v[0:3] 6548; GFX90A-NEXT: ;;#ASMEND 6549; GFX90A-NEXT: ;;#ASMSTART 6550; GFX90A-NEXT: ; def v[2:5] 6551; GFX90A-NEXT: ;;#ASMEND 6552; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6553; GFX90A-NEXT: v_mov_b32_e32 v0, v5 6554; GFX90A-NEXT: v_mov_b32_e32 v2, v4 6555; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 6556; GFX90A-NEXT: s_waitcnt vmcnt(0) 6557; GFX90A-NEXT: s_setpc_b64 s[30:31] 6558; 6559; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_6: 6560; GFX940: ; %bb.0: 6561; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6562; GFX940-NEXT: ;;#ASMSTART 6563; GFX940-NEXT: ; def v[0:3] 6564; GFX940-NEXT: ;;#ASMEND 6565; GFX940-NEXT: v_mov_b32_e32 v6, 0 6566; GFX940-NEXT: ;;#ASMSTART 6567; GFX940-NEXT: ; def v[2:5] 6568; GFX940-NEXT: ;;#ASMEND 6569; GFX940-NEXT: s_nop 0 6570; GFX940-NEXT: v_mov_b32_e32 v0, v5 6571; GFX940-NEXT: v_mov_b32_e32 v2, v4 6572; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 6573; GFX940-NEXT: s_waitcnt vmcnt(0) 6574; GFX940-NEXT: s_setpc_b64 s[30:31] 6575 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6576 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6577 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 6> 6578 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6579 ret void 6580} 6581 6582define void @v_shuffle_v3i32_v4i32__7_2_6(ptr addrspace(1) inreg %ptr) { 6583; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_6: 6584; GFX900: ; %bb.0: 6585; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6586; GFX900-NEXT: ;;#ASMSTART 6587; GFX900-NEXT: ; def v[0:3] 6588; GFX900-NEXT: ;;#ASMEND 6589; GFX900-NEXT: ;;#ASMSTART 6590; GFX900-NEXT: ; def v[3:6] 6591; GFX900-NEXT: ;;#ASMEND 6592; GFX900-NEXT: v_mov_b32_e32 v7, 0 6593; GFX900-NEXT: v_mov_b32_e32 v1, v6 6594; GFX900-NEXT: v_mov_b32_e32 v3, v5 6595; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17] 6596; GFX900-NEXT: s_waitcnt vmcnt(0) 6597; GFX900-NEXT: s_setpc_b64 s[30:31] 6598; 6599; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_2_6: 6600; GFX90A: ; %bb.0: 6601; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6602; GFX90A-NEXT: ;;#ASMSTART 6603; GFX90A-NEXT: ; def v[4:7] 6604; GFX90A-NEXT: ;;#ASMEND 6605; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6606; GFX90A-NEXT: ;;#ASMSTART 6607; GFX90A-NEXT: ; def v[0:3] 6608; GFX90A-NEXT: ;;#ASMEND 6609; GFX90A-NEXT: v_mov_b32_e32 v4, v7 6610; GFX90A-NEXT: v_mov_b32_e32 v5, v2 6611; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 6612; GFX90A-NEXT: s_waitcnt vmcnt(0) 6613; GFX90A-NEXT: s_setpc_b64 s[30:31] 6614; 6615; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_6: 6616; GFX940: ; %bb.0: 6617; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6618; GFX940-NEXT: ;;#ASMSTART 6619; GFX940-NEXT: ; def v[4:7] 6620; GFX940-NEXT: ;;#ASMEND 6621; GFX940-NEXT: v_mov_b32_e32 v8, 0 6622; GFX940-NEXT: ;;#ASMSTART 6623; GFX940-NEXT: ; def v[0:3] 6624; GFX940-NEXT: ;;#ASMEND 6625; GFX940-NEXT: v_mov_b32_e32 v4, v7 6626; GFX940-NEXT: v_mov_b32_e32 v5, v2 6627; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 6628; GFX940-NEXT: s_waitcnt vmcnt(0) 6629; GFX940-NEXT: s_setpc_b64 s[30:31] 6630 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6631 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6632 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 6> 6633 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6634 ret void 6635} 6636 6637define void @v_shuffle_v3i32_v4i32__7_3_6(ptr addrspace(1) inreg %ptr) { 6638; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_6: 6639; GFX900: ; %bb.0: 6640; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6641; GFX900-NEXT: ;;#ASMSTART 6642; GFX900-NEXT: ; def v[4:7] 6643; GFX900-NEXT: ;;#ASMEND 6644; GFX900-NEXT: v_mov_b32_e32 v8, 0 6645; GFX900-NEXT: ;;#ASMSTART 6646; GFX900-NEXT: ; def v[0:3] 6647; GFX900-NEXT: ;;#ASMEND 6648; GFX900-NEXT: v_mov_b32_e32 v4, v7 6649; GFX900-NEXT: v_mov_b32_e32 v5, v3 6650; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 6651; GFX900-NEXT: s_waitcnt vmcnt(0) 6652; GFX900-NEXT: s_setpc_b64 s[30:31] 6653; 6654; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_3_6: 6655; GFX90A: ; %bb.0: 6656; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6657; GFX90A-NEXT: ;;#ASMSTART 6658; GFX90A-NEXT: ; def v[4:7] 6659; GFX90A-NEXT: ;;#ASMEND 6660; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6661; GFX90A-NEXT: ;;#ASMSTART 6662; GFX90A-NEXT: ; def v[0:3] 6663; GFX90A-NEXT: ;;#ASMEND 6664; GFX90A-NEXT: v_mov_b32_e32 v4, v7 6665; GFX90A-NEXT: v_mov_b32_e32 v5, v3 6666; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17] 6667; GFX90A-NEXT: s_waitcnt vmcnt(0) 6668; GFX90A-NEXT: s_setpc_b64 s[30:31] 6669; 6670; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_6: 6671; GFX940: ; %bb.0: 6672; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6673; GFX940-NEXT: ;;#ASMSTART 6674; GFX940-NEXT: ; def v[4:7] 6675; GFX940-NEXT: ;;#ASMEND 6676; GFX940-NEXT: v_mov_b32_e32 v8, 0 6677; GFX940-NEXT: ;;#ASMSTART 6678; GFX940-NEXT: ; def v[0:3] 6679; GFX940-NEXT: ;;#ASMEND 6680; GFX940-NEXT: v_mov_b32_e32 v4, v7 6681; GFX940-NEXT: v_mov_b32_e32 v5, v3 6682; GFX940-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1] sc0 sc1 6683; GFX940-NEXT: s_waitcnt vmcnt(0) 6684; GFX940-NEXT: s_setpc_b64 s[30:31] 6685 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6686 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6687 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 6> 6688 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6689 ret void 6690} 6691 6692define void @v_shuffle_v3i32_v4i32__7_4_6(ptr addrspace(1) inreg %ptr) { 6693; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_6: 6694; GFX900: ; %bb.0: 6695; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6696; GFX900-NEXT: v_mov_b32_e32 v6, 0 6697; GFX900-NEXT: ;;#ASMSTART 6698; GFX900-NEXT: ; def v[0:3] 6699; GFX900-NEXT: ;;#ASMEND 6700; GFX900-NEXT: v_mov_b32_e32 v4, v0 6701; GFX900-NEXT: v_mov_b32_e32 v5, v2 6702; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17] 6703; GFX900-NEXT: s_waitcnt vmcnt(0) 6704; GFX900-NEXT: s_setpc_b64 s[30:31] 6705; 6706; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_4_6: 6707; GFX90A: ; %bb.0: 6708; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6709; GFX90A-NEXT: v_mov_b32_e32 v7, 0 6710; GFX90A-NEXT: ;;#ASMSTART 6711; GFX90A-NEXT: ; def v[0:3] 6712; GFX90A-NEXT: ;;#ASMEND 6713; GFX90A-NEXT: v_mov_b32_e32 v4, v3 6714; GFX90A-NEXT: v_mov_b32_e32 v5, v0 6715; GFX90A-NEXT: v_mov_b32_e32 v6, v2 6716; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 6717; GFX90A-NEXT: s_waitcnt vmcnt(0) 6718; GFX90A-NEXT: s_setpc_b64 s[30:31] 6719; 6720; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_6: 6721; GFX940: ; %bb.0: 6722; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6723; GFX940-NEXT: v_mov_b32_e32 v7, 0 6724; GFX940-NEXT: ;;#ASMSTART 6725; GFX940-NEXT: ; def v[0:3] 6726; GFX940-NEXT: ;;#ASMEND 6727; GFX940-NEXT: s_nop 0 6728; GFX940-NEXT: v_mov_b32_e32 v4, v3 6729; GFX940-NEXT: v_mov_b32_e32 v5, v0 6730; GFX940-NEXT: v_mov_b32_e32 v6, v2 6731; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 6732; GFX940-NEXT: s_waitcnt vmcnt(0) 6733; GFX940-NEXT: s_setpc_b64 s[30:31] 6734 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6735 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6736 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 6> 6737 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6738 ret void 6739} 6740 6741define void @v_shuffle_v3i32_v4i32__7_5_6(ptr addrspace(1) inreg %ptr) { 6742; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_6: 6743; GFX900: ; %bb.0: 6744; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6745; GFX900-NEXT: ;;#ASMSTART 6746; GFX900-NEXT: ; def v[0:3] 6747; GFX900-NEXT: ;;#ASMEND 6748; GFX900-NEXT: v_mov_b32_e32 v4, 0 6749; GFX900-NEXT: v_mov_b32_e32 v0, v3 6750; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 6751; GFX900-NEXT: s_waitcnt vmcnt(0) 6752; GFX900-NEXT: s_setpc_b64 s[30:31] 6753; 6754; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_5_6: 6755; GFX90A: ; %bb.0: 6756; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6757; GFX90A-NEXT: ;;#ASMSTART 6758; GFX90A-NEXT: ; def v[0:3] 6759; GFX90A-NEXT: ;;#ASMEND 6760; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6761; GFX90A-NEXT: v_mov_b32_e32 v0, v3 6762; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 6763; GFX90A-NEXT: s_waitcnt vmcnt(0) 6764; GFX90A-NEXT: s_setpc_b64 s[30:31] 6765; 6766; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_6: 6767; GFX940: ; %bb.0: 6768; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6769; GFX940-NEXT: ;;#ASMSTART 6770; GFX940-NEXT: ; def v[0:3] 6771; GFX940-NEXT: ;;#ASMEND 6772; GFX940-NEXT: v_mov_b32_e32 v4, 0 6773; GFX940-NEXT: v_mov_b32_e32 v0, v3 6774; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 6775; GFX940-NEXT: s_waitcnt vmcnt(0) 6776; GFX940-NEXT: s_setpc_b64 s[30:31] 6777 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6778 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6779 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 6> 6780 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6781 ret void 6782} 6783 6784define void @v_shuffle_v3i32_v4i32__u_7_7(ptr addrspace(1) inreg %ptr) { 6785; GFX900-LABEL: v_shuffle_v3i32_v4i32__u_7_7: 6786; GFX900: ; %bb.0: 6787; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6788; GFX900-NEXT: ;;#ASMSTART 6789; GFX900-NEXT: ; def v[0:3] 6790; GFX900-NEXT: ;;#ASMEND 6791; GFX900-NEXT: v_mov_b32_e32 v4, 0 6792; GFX900-NEXT: v_mov_b32_e32 v2, v3 6793; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 6794; GFX900-NEXT: s_waitcnt vmcnt(0) 6795; GFX900-NEXT: s_setpc_b64 s[30:31] 6796; 6797; GFX90A-LABEL: v_shuffle_v3i32_v4i32__u_7_7: 6798; GFX90A: ; %bb.0: 6799; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6800; GFX90A-NEXT: ;;#ASMSTART 6801; GFX90A-NEXT: ; def v[0:3] 6802; GFX90A-NEXT: ;;#ASMEND 6803; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6804; GFX90A-NEXT: v_mov_b32_e32 v1, v3 6805; GFX90A-NEXT: v_mov_b32_e32 v2, v3 6806; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 6807; GFX90A-NEXT: s_waitcnt vmcnt(0) 6808; GFX90A-NEXT: s_setpc_b64 s[30:31] 6809; 6810; GFX940-LABEL: v_shuffle_v3i32_v4i32__u_7_7: 6811; GFX940: ; %bb.0: 6812; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6813; GFX940-NEXT: ;;#ASMSTART 6814; GFX940-NEXT: ; def v[0:3] 6815; GFX940-NEXT: ;;#ASMEND 6816; GFX940-NEXT: v_mov_b32_e32 v4, 0 6817; GFX940-NEXT: v_mov_b32_e32 v1, v3 6818; GFX940-NEXT: v_mov_b32_e32 v2, v3 6819; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 6820; GFX940-NEXT: s_waitcnt vmcnt(0) 6821; GFX940-NEXT: s_setpc_b64 s[30:31] 6822 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6823 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6824 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 poison, i32 7, i32 7> 6825 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6826 ret void 6827} 6828 6829define void @v_shuffle_v3i32_v4i32__0_7_7(ptr addrspace(1) inreg %ptr) { 6830; GFX900-LABEL: v_shuffle_v3i32_v4i32__0_7_7: 6831; GFX900: ; %bb.0: 6832; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6833; GFX900-NEXT: ;;#ASMSTART 6834; GFX900-NEXT: ; def v[0:3] 6835; GFX900-NEXT: ;;#ASMEND 6836; GFX900-NEXT: ;;#ASMSTART 6837; GFX900-NEXT: ; def v[1:4] 6838; GFX900-NEXT: ;;#ASMEND 6839; GFX900-NEXT: v_mov_b32_e32 v5, 0 6840; GFX900-NEXT: v_mov_b32_e32 v1, v4 6841; GFX900-NEXT: v_mov_b32_e32 v2, v4 6842; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17] 6843; GFX900-NEXT: s_waitcnt vmcnt(0) 6844; GFX900-NEXT: s_setpc_b64 s[30:31] 6845; 6846; GFX90A-LABEL: v_shuffle_v3i32_v4i32__0_7_7: 6847; GFX90A: ; %bb.0: 6848; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6849; GFX90A-NEXT: ;;#ASMSTART 6850; GFX90A-NEXT: ; def v[0:3] 6851; GFX90A-NEXT: ;;#ASMEND 6852; GFX90A-NEXT: ;;#ASMSTART 6853; GFX90A-NEXT: ; def v[2:5] 6854; GFX90A-NEXT: ;;#ASMEND 6855; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6856; GFX90A-NEXT: v_mov_b32_e32 v1, v5 6857; GFX90A-NEXT: v_mov_b32_e32 v2, v5 6858; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 6859; GFX90A-NEXT: s_waitcnt vmcnt(0) 6860; GFX90A-NEXT: s_setpc_b64 s[30:31] 6861; 6862; GFX940-LABEL: v_shuffle_v3i32_v4i32__0_7_7: 6863; GFX940: ; %bb.0: 6864; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6865; GFX940-NEXT: ;;#ASMSTART 6866; GFX940-NEXT: ; def v[0:3] 6867; GFX940-NEXT: ;;#ASMEND 6868; GFX940-NEXT: v_mov_b32_e32 v6, 0 6869; GFX940-NEXT: ;;#ASMSTART 6870; GFX940-NEXT: ; def v[2:5] 6871; GFX940-NEXT: ;;#ASMEND 6872; GFX940-NEXT: s_nop 0 6873; GFX940-NEXT: v_mov_b32_e32 v1, v5 6874; GFX940-NEXT: v_mov_b32_e32 v2, v5 6875; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 6876; GFX940-NEXT: s_waitcnt vmcnt(0) 6877; GFX940-NEXT: s_setpc_b64 s[30:31] 6878 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6879 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6880 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 0, i32 7, i32 7> 6881 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6882 ret void 6883} 6884 6885define void @v_shuffle_v3i32_v4i32__1_7_7(ptr addrspace(1) inreg %ptr) { 6886; GFX900-LABEL: v_shuffle_v3i32_v4i32__1_7_7: 6887; GFX900: ; %bb.0: 6888; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6889; GFX900-NEXT: ;;#ASMSTART 6890; GFX900-NEXT: ; def v[0:3] 6891; GFX900-NEXT: ;;#ASMEND 6892; GFX900-NEXT: ;;#ASMSTART 6893; GFX900-NEXT: ; def v[2:5] 6894; GFX900-NEXT: ;;#ASMEND 6895; GFX900-NEXT: v_mov_b32_e32 v6, 0 6896; GFX900-NEXT: v_mov_b32_e32 v2, v5 6897; GFX900-NEXT: v_mov_b32_e32 v3, v5 6898; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17] 6899; GFX900-NEXT: s_waitcnt vmcnt(0) 6900; GFX900-NEXT: s_setpc_b64 s[30:31] 6901; 6902; GFX90A-LABEL: v_shuffle_v3i32_v4i32__1_7_7: 6903; GFX90A: ; %bb.0: 6904; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6905; GFX90A-NEXT: ;;#ASMSTART 6906; GFX90A-NEXT: ; def v[0:3] 6907; GFX90A-NEXT: ;;#ASMEND 6908; GFX90A-NEXT: ;;#ASMSTART 6909; GFX90A-NEXT: ; def v[2:5] 6910; GFX90A-NEXT: ;;#ASMEND 6911; GFX90A-NEXT: v_mov_b32_e32 v6, 0 6912; GFX90A-NEXT: v_mov_b32_e32 v0, v1 6913; GFX90A-NEXT: v_mov_b32_e32 v1, v5 6914; GFX90A-NEXT: v_mov_b32_e32 v2, v5 6915; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 6916; GFX90A-NEXT: s_waitcnt vmcnt(0) 6917; GFX90A-NEXT: s_setpc_b64 s[30:31] 6918; 6919; GFX940-LABEL: v_shuffle_v3i32_v4i32__1_7_7: 6920; GFX940: ; %bb.0: 6921; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6922; GFX940-NEXT: ;;#ASMSTART 6923; GFX940-NEXT: ; def v[0:3] 6924; GFX940-NEXT: ;;#ASMEND 6925; GFX940-NEXT: v_mov_b32_e32 v6, 0 6926; GFX940-NEXT: ;;#ASMSTART 6927; GFX940-NEXT: ; def v[2:5] 6928; GFX940-NEXT: ;;#ASMEND 6929; GFX940-NEXT: v_mov_b32_e32 v0, v1 6930; GFX940-NEXT: v_mov_b32_e32 v1, v5 6931; GFX940-NEXT: v_mov_b32_e32 v2, v5 6932; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 6933; GFX940-NEXT: s_waitcnt vmcnt(0) 6934; GFX940-NEXT: s_setpc_b64 s[30:31] 6935 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6936 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6937 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 1, i32 7, i32 7> 6938 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6939 ret void 6940} 6941 6942define void @v_shuffle_v3i32_v4i32__2_7_7(ptr addrspace(1) inreg %ptr) { 6943; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_7_7: 6944; GFX900: ; %bb.0: 6945; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6946; GFX900-NEXT: ;;#ASMSTART 6947; GFX900-NEXT: ; def v[0:3] 6948; GFX900-NEXT: ;;#ASMEND 6949; GFX900-NEXT: ;;#ASMSTART 6950; GFX900-NEXT: ; def v[3:6] 6951; GFX900-NEXT: ;;#ASMEND 6952; GFX900-NEXT: v_mov_b32_e32 v7, 0 6953; GFX900-NEXT: v_mov_b32_e32 v4, v2 6954; GFX900-NEXT: v_mov_b32_e32 v5, v6 6955; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 6956; GFX900-NEXT: s_waitcnt vmcnt(0) 6957; GFX900-NEXT: s_setpc_b64 s[30:31] 6958; 6959; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_7_7: 6960; GFX90A: ; %bb.0: 6961; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6962; GFX90A-NEXT: ;;#ASMSTART 6963; GFX90A-NEXT: ; def v[0:3] 6964; GFX90A-NEXT: ;;#ASMEND 6965; GFX90A-NEXT: v_mov_b32_e32 v8, 0 6966; GFX90A-NEXT: ;;#ASMSTART 6967; GFX90A-NEXT: ; def v[4:7] 6968; GFX90A-NEXT: ;;#ASMEND 6969; GFX90A-NEXT: v_mov_b32_e32 v0, v2 6970; GFX90A-NEXT: v_mov_b32_e32 v1, v7 6971; GFX90A-NEXT: v_mov_b32_e32 v2, v7 6972; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 6973; GFX90A-NEXT: s_waitcnt vmcnt(0) 6974; GFX90A-NEXT: s_setpc_b64 s[30:31] 6975; 6976; GFX940-LABEL: v_shuffle_v3i32_v4i32__2_7_7: 6977; GFX940: ; %bb.0: 6978; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6979; GFX940-NEXT: ;;#ASMSTART 6980; GFX940-NEXT: ; def v[0:3] 6981; GFX940-NEXT: ;;#ASMEND 6982; GFX940-NEXT: v_mov_b32_e32 v8, 0 6983; GFX940-NEXT: ;;#ASMSTART 6984; GFX940-NEXT: ; def v[4:7] 6985; GFX940-NEXT: ;;#ASMEND 6986; GFX940-NEXT: v_mov_b32_e32 v0, v2 6987; GFX940-NEXT: v_mov_b32_e32 v1, v7 6988; GFX940-NEXT: v_mov_b32_e32 v2, v7 6989; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 6990; GFX940-NEXT: s_waitcnt vmcnt(0) 6991; GFX940-NEXT: s_setpc_b64 s[30:31] 6992 %vec0 = call <4 x i32> asm "; def $0", "=v"() 6993 %vec1 = call <4 x i32> asm "; def $0", "=v"() 6994 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 2, i32 7, i32 7> 6995 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 6996 ret void 6997} 6998 6999define void @v_shuffle_v3i32_v4i32__3_7_7(ptr addrspace(1) inreg %ptr) { 7000; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_7_7: 7001; GFX900: ; %bb.0: 7002; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7003; GFX900-NEXT: ;;#ASMSTART 7004; GFX900-NEXT: ; def v[4:7] 7005; GFX900-NEXT: ;;#ASMEND 7006; GFX900-NEXT: v_mov_b32_e32 v8, 0 7007; GFX900-NEXT: ;;#ASMSTART 7008; GFX900-NEXT: ; def v[0:3] 7009; GFX900-NEXT: ;;#ASMEND 7010; GFX900-NEXT: v_mov_b32_e32 v5, v3 7011; GFX900-NEXT: v_mov_b32_e32 v6, v7 7012; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17] 7013; GFX900-NEXT: s_waitcnt vmcnt(0) 7014; GFX900-NEXT: s_setpc_b64 s[30:31] 7015; 7016; GFX90A-LABEL: v_shuffle_v3i32_v4i32__3_7_7: 7017; GFX90A: ; %bb.0: 7018; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7019; GFX90A-NEXT: ;;#ASMSTART 7020; GFX90A-NEXT: ; def v[0:3] 7021; GFX90A-NEXT: ;;#ASMEND 7022; GFX90A-NEXT: v_mov_b32_e32 v8, 0 7023; GFX90A-NEXT: ;;#ASMSTART 7024; GFX90A-NEXT: ; def v[4:7] 7025; GFX90A-NEXT: ;;#ASMEND 7026; GFX90A-NEXT: v_mov_b32_e32 v0, v3 7027; GFX90A-NEXT: v_mov_b32_e32 v1, v7 7028; GFX90A-NEXT: v_mov_b32_e32 v2, v7 7029; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 7030; GFX90A-NEXT: s_waitcnt vmcnt(0) 7031; GFX90A-NEXT: s_setpc_b64 s[30:31] 7032; 7033; GFX940-LABEL: v_shuffle_v3i32_v4i32__3_7_7: 7034; GFX940: ; %bb.0: 7035; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7036; GFX940-NEXT: ;;#ASMSTART 7037; GFX940-NEXT: ; def v[0:3] 7038; GFX940-NEXT: ;;#ASMEND 7039; GFX940-NEXT: v_mov_b32_e32 v8, 0 7040; GFX940-NEXT: ;;#ASMSTART 7041; GFX940-NEXT: ; def v[4:7] 7042; GFX940-NEXT: ;;#ASMEND 7043; GFX940-NEXT: v_mov_b32_e32 v0, v3 7044; GFX940-NEXT: v_mov_b32_e32 v1, v7 7045; GFX940-NEXT: v_mov_b32_e32 v2, v7 7046; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 7047; GFX940-NEXT: s_waitcnt vmcnt(0) 7048; GFX940-NEXT: s_setpc_b64 s[30:31] 7049 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7050 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7051 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 3, i32 7, i32 7> 7052 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7053 ret void 7054} 7055 7056define void @v_shuffle_v3i32_v4i32__4_7_7(ptr addrspace(1) inreg %ptr) { 7057; GFX900-LABEL: v_shuffle_v3i32_v4i32__4_7_7: 7058; GFX900: ; %bb.0: 7059; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7060; GFX900-NEXT: ;;#ASMSTART 7061; GFX900-NEXT: ; def v[0:3] 7062; GFX900-NEXT: ;;#ASMEND 7063; GFX900-NEXT: v_mov_b32_e32 v4, 0 7064; GFX900-NEXT: v_mov_b32_e32 v1, v3 7065; GFX900-NEXT: v_mov_b32_e32 v2, v3 7066; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 7067; GFX900-NEXT: s_waitcnt vmcnt(0) 7068; GFX900-NEXT: s_setpc_b64 s[30:31] 7069; 7070; GFX90A-LABEL: v_shuffle_v3i32_v4i32__4_7_7: 7071; GFX90A: ; %bb.0: 7072; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7073; GFX90A-NEXT: ;;#ASMSTART 7074; GFX90A-NEXT: ; def v[0:3] 7075; GFX90A-NEXT: ;;#ASMEND 7076; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7077; GFX90A-NEXT: v_mov_b32_e32 v1, v3 7078; GFX90A-NEXT: v_mov_b32_e32 v2, v3 7079; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 7080; GFX90A-NEXT: s_waitcnt vmcnt(0) 7081; GFX90A-NEXT: s_setpc_b64 s[30:31] 7082; 7083; GFX940-LABEL: v_shuffle_v3i32_v4i32__4_7_7: 7084; GFX940: ; %bb.0: 7085; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7086; GFX940-NEXT: ;;#ASMSTART 7087; GFX940-NEXT: ; def v[0:3] 7088; GFX940-NEXT: ;;#ASMEND 7089; GFX940-NEXT: v_mov_b32_e32 v4, 0 7090; GFX940-NEXT: v_mov_b32_e32 v1, v3 7091; GFX940-NEXT: v_mov_b32_e32 v2, v3 7092; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 7093; GFX940-NEXT: s_waitcnt vmcnt(0) 7094; GFX940-NEXT: s_setpc_b64 s[30:31] 7095 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7096 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7097 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 4, i32 7, i32 7> 7098 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7099 ret void 7100} 7101 7102define void @v_shuffle_v3i32_v4i32__5_7_7(ptr addrspace(1) inreg %ptr) { 7103; GFX900-LABEL: v_shuffle_v3i32_v4i32__5_7_7: 7104; GFX900: ; %bb.0: 7105; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7106; GFX900-NEXT: ;;#ASMSTART 7107; GFX900-NEXT: ; def v[0:3] 7108; GFX900-NEXT: ;;#ASMEND 7109; GFX900-NEXT: v_mov_b32_e32 v4, 0 7110; GFX900-NEXT: v_mov_b32_e32 v2, v3 7111; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 7112; GFX900-NEXT: s_waitcnt vmcnt(0) 7113; GFX900-NEXT: s_setpc_b64 s[30:31] 7114; 7115; GFX90A-LABEL: v_shuffle_v3i32_v4i32__5_7_7: 7116; GFX90A: ; %bb.0: 7117; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7118; GFX90A-NEXT: ;;#ASMSTART 7119; GFX90A-NEXT: ; def v[0:3] 7120; GFX90A-NEXT: ;;#ASMEND 7121; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7122; GFX90A-NEXT: v_mov_b32_e32 v0, v1 7123; GFX90A-NEXT: v_mov_b32_e32 v1, v3 7124; GFX90A-NEXT: v_mov_b32_e32 v2, v3 7125; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 7126; GFX90A-NEXT: s_waitcnt vmcnt(0) 7127; GFX90A-NEXT: s_setpc_b64 s[30:31] 7128; 7129; GFX940-LABEL: v_shuffle_v3i32_v4i32__5_7_7: 7130; GFX940: ; %bb.0: 7131; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7132; GFX940-NEXT: ;;#ASMSTART 7133; GFX940-NEXT: ; def v[0:3] 7134; GFX940-NEXT: ;;#ASMEND 7135; GFX940-NEXT: v_mov_b32_e32 v4, 0 7136; GFX940-NEXT: v_mov_b32_e32 v0, v1 7137; GFX940-NEXT: v_mov_b32_e32 v1, v3 7138; GFX940-NEXT: v_mov_b32_e32 v2, v3 7139; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 7140; GFX940-NEXT: s_waitcnt vmcnt(0) 7141; GFX940-NEXT: s_setpc_b64 s[30:31] 7142 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7143 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7144 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 7, i32 7> 7145 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7146 ret void 7147} 7148 7149define void @v_shuffle_v3i32_v4i32__6_7_7(ptr addrspace(1) inreg %ptr) { 7150; GFX900-LABEL: v_shuffle_v3i32_v4i32__6_7_7: 7151; GFX900: ; %bb.0: 7152; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7153; GFX900-NEXT: ;;#ASMSTART 7154; GFX900-NEXT: ; def v[0:3] 7155; GFX900-NEXT: ;;#ASMEND 7156; GFX900-NEXT: v_mov_b32_e32 v4, 0 7157; GFX900-NEXT: v_mov_b32_e32 v1, v2 7158; GFX900-NEXT: v_mov_b32_e32 v2, v3 7159; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 7160; GFX900-NEXT: s_waitcnt vmcnt(0) 7161; GFX900-NEXT: s_setpc_b64 s[30:31] 7162; 7163; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_7_7: 7164; GFX90A: ; %bb.0: 7165; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7166; GFX90A-NEXT: ;;#ASMSTART 7167; GFX90A-NEXT: ; def v[0:3] 7168; GFX90A-NEXT: ;;#ASMEND 7169; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7170; GFX90A-NEXT: v_mov_b32_e32 v0, v2 7171; GFX90A-NEXT: v_mov_b32_e32 v1, v3 7172; GFX90A-NEXT: v_mov_b32_e32 v2, v3 7173; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 7174; GFX90A-NEXT: s_waitcnt vmcnt(0) 7175; GFX90A-NEXT: s_setpc_b64 s[30:31] 7176; 7177; GFX940-LABEL: v_shuffle_v3i32_v4i32__6_7_7: 7178; GFX940: ; %bb.0: 7179; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7180; GFX940-NEXT: ;;#ASMSTART 7181; GFX940-NEXT: ; def v[0:3] 7182; GFX940-NEXT: ;;#ASMEND 7183; GFX940-NEXT: v_mov_b32_e32 v4, 0 7184; GFX940-NEXT: v_mov_b32_e32 v0, v2 7185; GFX940-NEXT: v_mov_b32_e32 v1, v3 7186; GFX940-NEXT: v_mov_b32_e32 v2, v3 7187; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 7188; GFX940-NEXT: s_waitcnt vmcnt(0) 7189; GFX940-NEXT: s_setpc_b64 s[30:31] 7190 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7191 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7192 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 7, i32 7> 7193 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7194 ret void 7195} 7196 7197define void @v_shuffle_v3i32_v4i32__7_u_7(ptr addrspace(1) inreg %ptr) { 7198; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_7: 7199; GFX900: ; %bb.0: 7200; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7201; GFX900-NEXT: ;;#ASMSTART 7202; GFX900-NEXT: ; def v[0:3] 7203; GFX900-NEXT: ;;#ASMEND 7204; GFX900-NEXT: v_mov_b32_e32 v4, 0 7205; GFX900-NEXT: v_mov_b32_e32 v1, v3 7206; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 7207; GFX900-NEXT: s_waitcnt vmcnt(0) 7208; GFX900-NEXT: s_setpc_b64 s[30:31] 7209; 7210; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_7: 7211; GFX90A: ; %bb.0: 7212; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7213; GFX90A-NEXT: ;;#ASMSTART 7214; GFX90A-NEXT: ; def v[0:3] 7215; GFX90A-NEXT: ;;#ASMEND 7216; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7217; GFX90A-NEXT: v_mov_b32_e32 v0, v3 7218; GFX90A-NEXT: v_mov_b32_e32 v2, v3 7219; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 7220; GFX90A-NEXT: s_waitcnt vmcnt(0) 7221; GFX90A-NEXT: s_setpc_b64 s[30:31] 7222; 7223; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_u_7: 7224; GFX940: ; %bb.0: 7225; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7226; GFX940-NEXT: ;;#ASMSTART 7227; GFX940-NEXT: ; def v[0:3] 7228; GFX940-NEXT: ;;#ASMEND 7229; GFX940-NEXT: v_mov_b32_e32 v4, 0 7230; GFX940-NEXT: v_mov_b32_e32 v0, v3 7231; GFX940-NEXT: v_mov_b32_e32 v2, v3 7232; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 7233; GFX940-NEXT: s_waitcnt vmcnt(0) 7234; GFX940-NEXT: s_setpc_b64 s[30:31] 7235 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7236 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7237 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 7> 7238 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7239 ret void 7240} 7241 7242define void @v_shuffle_v3i32_v4i32__7_0_7(ptr addrspace(1) inreg %ptr) { 7243; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_0_7: 7244; GFX900: ; %bb.0: 7245; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7246; GFX900-NEXT: ;;#ASMSTART 7247; GFX900-NEXT: ; def v[0:3] 7248; GFX900-NEXT: ;;#ASMEND 7249; GFX900-NEXT: ;;#ASMSTART 7250; GFX900-NEXT: ; def v[1:4] 7251; GFX900-NEXT: ;;#ASMEND 7252; GFX900-NEXT: v_mov_b32_e32 v5, 0 7253; GFX900-NEXT: v_mov_b32_e32 v2, v4 7254; GFX900-NEXT: v_mov_b32_e32 v3, v0 7255; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17] 7256; GFX900-NEXT: s_waitcnt vmcnt(0) 7257; GFX900-NEXT: s_setpc_b64 s[30:31] 7258; 7259; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_0_7: 7260; GFX90A: ; %bb.0: 7261; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7262; GFX90A-NEXT: ;;#ASMSTART 7263; GFX90A-NEXT: ; def v[0:3] 7264; GFX90A-NEXT: ;;#ASMEND 7265; GFX90A-NEXT: ;;#ASMSTART 7266; GFX90A-NEXT: ; def v[2:5] 7267; GFX90A-NEXT: ;;#ASMEND 7268; GFX90A-NEXT: v_mov_b32_e32 v6, 0 7269; GFX90A-NEXT: v_mov_b32_e32 v2, v5 7270; GFX90A-NEXT: v_mov_b32_e32 v3, v0 7271; GFX90A-NEXT: v_mov_b32_e32 v4, v5 7272; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17] 7273; GFX90A-NEXT: s_waitcnt vmcnt(0) 7274; GFX90A-NEXT: s_setpc_b64 s[30:31] 7275; 7276; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_0_7: 7277; GFX940: ; %bb.0: 7278; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7279; GFX940-NEXT: ;;#ASMSTART 7280; GFX940-NEXT: ; def v[0:3] 7281; GFX940-NEXT: ;;#ASMEND 7282; GFX940-NEXT: v_mov_b32_e32 v6, 0 7283; GFX940-NEXT: ;;#ASMSTART 7284; GFX940-NEXT: ; def v[2:5] 7285; GFX940-NEXT: ;;#ASMEND 7286; GFX940-NEXT: s_nop 0 7287; GFX940-NEXT: v_mov_b32_e32 v2, v5 7288; GFX940-NEXT: v_mov_b32_e32 v3, v0 7289; GFX940-NEXT: v_mov_b32_e32 v4, v5 7290; GFX940-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1] sc0 sc1 7291; GFX940-NEXT: s_waitcnt vmcnt(0) 7292; GFX940-NEXT: s_setpc_b64 s[30:31] 7293 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7294 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7295 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 7> 7296 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7297 ret void 7298} 7299 7300define void @v_shuffle_v3i32_v4i32__7_1_7(ptr addrspace(1) inreg %ptr) { 7301; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_7: 7302; GFX900: ; %bb.0: 7303; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7304; GFX900-NEXT: ;;#ASMSTART 7305; GFX900-NEXT: ; def v[0:3] 7306; GFX900-NEXT: ;;#ASMEND 7307; GFX900-NEXT: ;;#ASMSTART 7308; GFX900-NEXT: ; def v[2:5] 7309; GFX900-NEXT: ;;#ASMEND 7310; GFX900-NEXT: v_mov_b32_e32 v6, 0 7311; GFX900-NEXT: v_mov_b32_e32 v0, v5 7312; GFX900-NEXT: v_mov_b32_e32 v2, v5 7313; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 7314; GFX900-NEXT: s_waitcnt vmcnt(0) 7315; GFX900-NEXT: s_setpc_b64 s[30:31] 7316; 7317; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_1_7: 7318; GFX90A: ; %bb.0: 7319; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7320; GFX90A-NEXT: ;;#ASMSTART 7321; GFX90A-NEXT: ; def v[0:3] 7322; GFX90A-NEXT: ;;#ASMEND 7323; GFX90A-NEXT: ;;#ASMSTART 7324; GFX90A-NEXT: ; def v[2:5] 7325; GFX90A-NEXT: ;;#ASMEND 7326; GFX90A-NEXT: v_mov_b32_e32 v6, 0 7327; GFX90A-NEXT: v_mov_b32_e32 v0, v5 7328; GFX90A-NEXT: v_mov_b32_e32 v2, v5 7329; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17] 7330; GFX90A-NEXT: s_waitcnt vmcnt(0) 7331; GFX90A-NEXT: s_setpc_b64 s[30:31] 7332; 7333; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_1_7: 7334; GFX940: ; %bb.0: 7335; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7336; GFX940-NEXT: ;;#ASMSTART 7337; GFX940-NEXT: ; def v[0:3] 7338; GFX940-NEXT: ;;#ASMEND 7339; GFX940-NEXT: v_mov_b32_e32 v6, 0 7340; GFX940-NEXT: ;;#ASMSTART 7341; GFX940-NEXT: ; def v[2:5] 7342; GFX940-NEXT: ;;#ASMEND 7343; GFX940-NEXT: s_nop 0 7344; GFX940-NEXT: v_mov_b32_e32 v0, v5 7345; GFX940-NEXT: v_mov_b32_e32 v2, v5 7346; GFX940-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1] sc0 sc1 7347; GFX940-NEXT: s_waitcnt vmcnt(0) 7348; GFX940-NEXT: s_setpc_b64 s[30:31] 7349 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7350 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7351 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 7> 7352 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7353 ret void 7354} 7355 7356define void @v_shuffle_v3i32_v4i32__7_2_7(ptr addrspace(1) inreg %ptr) { 7357; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_7: 7358; GFX900: ; %bb.0: 7359; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7360; GFX900-NEXT: ;;#ASMSTART 7361; GFX900-NEXT: ; def v[0:3] 7362; GFX900-NEXT: ;;#ASMEND 7363; GFX900-NEXT: ;;#ASMSTART 7364; GFX900-NEXT: ; def v[3:6] 7365; GFX900-NEXT: ;;#ASMEND 7366; GFX900-NEXT: v_mov_b32_e32 v7, 0 7367; GFX900-NEXT: v_mov_b32_e32 v1, v6 7368; GFX900-NEXT: v_mov_b32_e32 v3, v6 7369; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17] 7370; GFX900-NEXT: s_waitcnt vmcnt(0) 7371; GFX900-NEXT: s_setpc_b64 s[30:31] 7372; 7373; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_2_7: 7374; GFX90A: ; %bb.0: 7375; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7376; GFX90A-NEXT: ;;#ASMSTART 7377; GFX90A-NEXT: ; def v[0:3] 7378; GFX90A-NEXT: ;;#ASMEND 7379; GFX90A-NEXT: v_mov_b32_e32 v8, 0 7380; GFX90A-NEXT: ;;#ASMSTART 7381; GFX90A-NEXT: ; def v[4:7] 7382; GFX90A-NEXT: ;;#ASMEND 7383; GFX90A-NEXT: v_mov_b32_e32 v0, v7 7384; GFX90A-NEXT: v_mov_b32_e32 v1, v2 7385; GFX90A-NEXT: v_mov_b32_e32 v2, v7 7386; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 7387; GFX90A-NEXT: s_waitcnt vmcnt(0) 7388; GFX90A-NEXT: s_setpc_b64 s[30:31] 7389; 7390; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_2_7: 7391; GFX940: ; %bb.0: 7392; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7393; GFX940-NEXT: ;;#ASMSTART 7394; GFX940-NEXT: ; def v[0:3] 7395; GFX940-NEXT: ;;#ASMEND 7396; GFX940-NEXT: v_mov_b32_e32 v8, 0 7397; GFX940-NEXT: ;;#ASMSTART 7398; GFX940-NEXT: ; def v[4:7] 7399; GFX940-NEXT: ;;#ASMEND 7400; GFX940-NEXT: v_mov_b32_e32 v1, v2 7401; GFX940-NEXT: v_mov_b32_e32 v0, v7 7402; GFX940-NEXT: v_mov_b32_e32 v2, v7 7403; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 7404; GFX940-NEXT: s_waitcnt vmcnt(0) 7405; GFX940-NEXT: s_setpc_b64 s[30:31] 7406 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7407 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7408 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 7> 7409 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7410 ret void 7411} 7412 7413define void @v_shuffle_v3i32_v4i32__7_3_7(ptr addrspace(1) inreg %ptr) { 7414; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_7: 7415; GFX900: ; %bb.0: 7416; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7417; GFX900-NEXT: ;;#ASMSTART 7418; GFX900-NEXT: ; def v[4:7] 7419; GFX900-NEXT: ;;#ASMEND 7420; GFX900-NEXT: v_mov_b32_e32 v8, 0 7421; GFX900-NEXT: ;;#ASMSTART 7422; GFX900-NEXT: ; def v[0:3] 7423; GFX900-NEXT: ;;#ASMEND 7424; GFX900-NEXT: v_mov_b32_e32 v5, v7 7425; GFX900-NEXT: v_mov_b32_e32 v6, v3 7426; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17] 7427; GFX900-NEXT: s_waitcnt vmcnt(0) 7428; GFX900-NEXT: s_setpc_b64 s[30:31] 7429; 7430; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_3_7: 7431; GFX90A: ; %bb.0: 7432; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7433; GFX90A-NEXT: ;;#ASMSTART 7434; GFX90A-NEXT: ; def v[0:3] 7435; GFX90A-NEXT: ;;#ASMEND 7436; GFX90A-NEXT: v_mov_b32_e32 v8, 0 7437; GFX90A-NEXT: ;;#ASMSTART 7438; GFX90A-NEXT: ; def v[4:7] 7439; GFX90A-NEXT: ;;#ASMEND 7440; GFX90A-NEXT: v_mov_b32_e32 v0, v7 7441; GFX90A-NEXT: v_mov_b32_e32 v1, v3 7442; GFX90A-NEXT: v_mov_b32_e32 v2, v7 7443; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17] 7444; GFX90A-NEXT: s_waitcnt vmcnt(0) 7445; GFX90A-NEXT: s_setpc_b64 s[30:31] 7446; 7447; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_3_7: 7448; GFX940: ; %bb.0: 7449; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7450; GFX940-NEXT: ;;#ASMSTART 7451; GFX940-NEXT: ; def v[0:3] 7452; GFX940-NEXT: ;;#ASMEND 7453; GFX940-NEXT: v_mov_b32_e32 v8, 0 7454; GFX940-NEXT: ;;#ASMSTART 7455; GFX940-NEXT: ; def v[4:7] 7456; GFX940-NEXT: ;;#ASMEND 7457; GFX940-NEXT: v_mov_b32_e32 v1, v3 7458; GFX940-NEXT: v_mov_b32_e32 v0, v7 7459; GFX940-NEXT: v_mov_b32_e32 v2, v7 7460; GFX940-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1] sc0 sc1 7461; GFX940-NEXT: s_waitcnt vmcnt(0) 7462; GFX940-NEXT: s_setpc_b64 s[30:31] 7463 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7464 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7465 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 7> 7466 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7467 ret void 7468} 7469 7470define void @v_shuffle_v3i32_v4i32__7_4_7(ptr addrspace(1) inreg %ptr) { 7471; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_7: 7472; GFX900: ; %bb.0: 7473; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7474; GFX900-NEXT: ;;#ASMSTART 7475; GFX900-NEXT: ; def v[0:3] 7476; GFX900-NEXT: ;;#ASMEND 7477; GFX900-NEXT: v_mov_b32_e32 v4, 0 7478; GFX900-NEXT: v_mov_b32_e32 v1, v3 7479; GFX900-NEXT: v_mov_b32_e32 v2, v0 7480; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 7481; GFX900-NEXT: s_waitcnt vmcnt(0) 7482; GFX900-NEXT: s_setpc_b64 s[30:31] 7483; 7484; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_4_7: 7485; GFX90A: ; %bb.0: 7486; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7487; GFX90A-NEXT: v_mov_b32_e32 v7, 0 7488; GFX90A-NEXT: ;;#ASMSTART 7489; GFX90A-NEXT: ; def v[0:3] 7490; GFX90A-NEXT: ;;#ASMEND 7491; GFX90A-NEXT: v_mov_b32_e32 v4, v3 7492; GFX90A-NEXT: v_mov_b32_e32 v5, v0 7493; GFX90A-NEXT: v_mov_b32_e32 v6, v3 7494; GFX90A-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17] 7495; GFX90A-NEXT: s_waitcnt vmcnt(0) 7496; GFX90A-NEXT: s_setpc_b64 s[30:31] 7497; 7498; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_4_7: 7499; GFX940: ; %bb.0: 7500; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7501; GFX940-NEXT: v_mov_b32_e32 v7, 0 7502; GFX940-NEXT: ;;#ASMSTART 7503; GFX940-NEXT: ; def v[0:3] 7504; GFX940-NEXT: ;;#ASMEND 7505; GFX940-NEXT: s_nop 0 7506; GFX940-NEXT: v_mov_b32_e32 v4, v3 7507; GFX940-NEXT: v_mov_b32_e32 v5, v0 7508; GFX940-NEXT: v_mov_b32_e32 v6, v3 7509; GFX940-NEXT: global_store_dwordx3 v7, v[4:6], s[0:1] sc0 sc1 7510; GFX940-NEXT: s_waitcnt vmcnt(0) 7511; GFX940-NEXT: s_setpc_b64 s[30:31] 7512 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7513 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7514 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 7> 7515 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7516 ret void 7517} 7518 7519define void @v_shuffle_v3i32_v4i32__7_5_7(ptr addrspace(1) inreg %ptr) { 7520; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_7: 7521; GFX900: ; %bb.0: 7522; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7523; GFX900-NEXT: ;;#ASMSTART 7524; GFX900-NEXT: ; def v[0:3] 7525; GFX900-NEXT: ;;#ASMEND 7526; GFX900-NEXT: v_mov_b32_e32 v4, 0 7527; GFX900-NEXT: v_mov_b32_e32 v0, v3 7528; GFX900-NEXT: v_mov_b32_e32 v2, v3 7529; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 7530; GFX900-NEXT: s_waitcnt vmcnt(0) 7531; GFX900-NEXT: s_setpc_b64 s[30:31] 7532; 7533; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_5_7: 7534; GFX90A: ; %bb.0: 7535; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7536; GFX90A-NEXT: ;;#ASMSTART 7537; GFX90A-NEXT: ; def v[0:3] 7538; GFX90A-NEXT: ;;#ASMEND 7539; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7540; GFX90A-NEXT: v_mov_b32_e32 v0, v3 7541; GFX90A-NEXT: v_mov_b32_e32 v2, v3 7542; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 7543; GFX90A-NEXT: s_waitcnt vmcnt(0) 7544; GFX90A-NEXT: s_setpc_b64 s[30:31] 7545; 7546; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_5_7: 7547; GFX940: ; %bb.0: 7548; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7549; GFX940-NEXT: ;;#ASMSTART 7550; GFX940-NEXT: ; def v[0:3] 7551; GFX940-NEXT: ;;#ASMEND 7552; GFX940-NEXT: v_mov_b32_e32 v4, 0 7553; GFX940-NEXT: v_mov_b32_e32 v0, v3 7554; GFX940-NEXT: v_mov_b32_e32 v2, v3 7555; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 7556; GFX940-NEXT: s_waitcnt vmcnt(0) 7557; GFX940-NEXT: s_setpc_b64 s[30:31] 7558 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7559 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7560 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 7> 7561 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7562 ret void 7563} 7564 7565define void @v_shuffle_v3i32_v4i32__7_6_7(ptr addrspace(1) inreg %ptr) { 7566; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_7: 7567; GFX900: ; %bb.0: 7568; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7569; GFX900-NEXT: ;;#ASMSTART 7570; GFX900-NEXT: ; def v[0:3] 7571; GFX900-NEXT: ;;#ASMEND 7572; GFX900-NEXT: v_mov_b32_e32 v4, 0 7573; GFX900-NEXT: v_mov_b32_e32 v1, v3 7574; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17] 7575; GFX900-NEXT: s_waitcnt vmcnt(0) 7576; GFX900-NEXT: s_setpc_b64 s[30:31] 7577; 7578; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_6_7: 7579; GFX90A: ; %bb.0: 7580; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7581; GFX90A-NEXT: ;;#ASMSTART 7582; GFX90A-NEXT: ; def v[0:3] 7583; GFX90A-NEXT: ;;#ASMEND 7584; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7585; GFX90A-NEXT: v_mov_b32_e32 v0, v3 7586; GFX90A-NEXT: v_mov_b32_e32 v1, v2 7587; GFX90A-NEXT: v_mov_b32_e32 v2, v3 7588; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17] 7589; GFX90A-NEXT: s_waitcnt vmcnt(0) 7590; GFX90A-NEXT: s_setpc_b64 s[30:31] 7591; 7592; GFX940-LABEL: v_shuffle_v3i32_v4i32__7_6_7: 7593; GFX940: ; %bb.0: 7594; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7595; GFX940-NEXT: ;;#ASMSTART 7596; GFX940-NEXT: ; def v[0:3] 7597; GFX940-NEXT: ;;#ASMEND 7598; GFX940-NEXT: v_mov_b32_e32 v4, 0 7599; GFX940-NEXT: v_mov_b32_e32 v0, v3 7600; GFX940-NEXT: v_mov_b32_e32 v1, v2 7601; GFX940-NEXT: v_mov_b32_e32 v2, v3 7602; GFX940-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1] sc0 sc1 7603; GFX940-NEXT: s_waitcnt vmcnt(0) 7604; GFX940-NEXT: s_setpc_b64 s[30:31] 7605 %vec0 = call <4 x i32> asm "; def $0", "=v"() 7606 %vec1 = call <4 x i32> asm "; def $0", "=v"() 7607 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 7> 7608 store <3 x i32> %shuf, ptr addrspace(1) %ptr, align 16 7609 ret void 7610} 7611 7612define void @s_shuffle_v3i32_v4i32__u_u_u() { 7613; GFX9-LABEL: s_shuffle_v3i32_v4i32__u_u_u: 7614; GFX9: ; %bb.0: 7615; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7616; GFX9-NEXT: ;;#ASMSTART 7617; GFX9-NEXT: ; use s[8:10] 7618; GFX9-NEXT: ;;#ASMEND 7619; GFX9-NEXT: s_setpc_b64 s[30:31] 7620 %vec0 = call <4 x i32> asm "; def $0", "=s"() 7621 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> poison 7622 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 7623 ret void 7624} 7625 7626define void @s_shuffle_v3i32_v4i32__0_u_u() { 7627; GFX900-LABEL: s_shuffle_v3i32_v4i32__0_u_u: 7628; GFX900: ; %bb.0: 7629; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7630; GFX900-NEXT: ;;#ASMSTART 7631; GFX900-NEXT: ; def s[8:11] 7632; GFX900-NEXT: ;;#ASMEND 7633; GFX900-NEXT: ;;#ASMSTART 7634; GFX900-NEXT: ; use s[8:10] 7635; GFX900-NEXT: ;;#ASMEND 7636; GFX900-NEXT: s_setpc_b64 s[30:31] 7637; 7638; GFX90A-LABEL: s_shuffle_v3i32_v4i32__0_u_u: 7639; GFX90A: ; %bb.0: 7640; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7641; GFX90A-NEXT: ;;#ASMSTART 7642; GFX90A-NEXT: ; def s[8:11] 7643; GFX90A-NEXT: ;;#ASMEND 7644; GFX90A-NEXT: ;;#ASMSTART 7645; GFX90A-NEXT: ; use s[8:10] 7646; GFX90A-NEXT: ;;#ASMEND 7647; GFX90A-NEXT: s_setpc_b64 s[30:31] 7648; 7649; GFX940-LABEL: s_shuffle_v3i32_v4i32__0_u_u: 7650; GFX940: ; %bb.0: 7651; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7652; GFX940-NEXT: ;;#ASMSTART 7653; GFX940-NEXT: ; def s[8:11] 7654; GFX940-NEXT: ;;#ASMEND 7655; GFX940-NEXT: s_nop 0 7656; GFX940-NEXT: ;;#ASMSTART 7657; GFX940-NEXT: ; use s[8:10] 7658; GFX940-NEXT: ;;#ASMEND 7659; GFX940-NEXT: s_setpc_b64 s[30:31] 7660 %vec0 = call <4 x i32> asm "; def $0", "=s"() 7661 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 poison> 7662 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 7663 ret void 7664} 7665 7666define void @s_shuffle_v3i32_v4i32__1_u_u() { 7667; GFX900-LABEL: s_shuffle_v3i32_v4i32__1_u_u: 7668; GFX900: ; %bb.0: 7669; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7670; GFX900-NEXT: ;;#ASMSTART 7671; GFX900-NEXT: ; def s[4:7] 7672; GFX900-NEXT: ;;#ASMEND 7673; GFX900-NEXT: s_mov_b32 s8, s5 7674; GFX900-NEXT: ;;#ASMSTART 7675; GFX900-NEXT: ; use s[8:10] 7676; GFX900-NEXT: ;;#ASMEND 7677; GFX900-NEXT: s_setpc_b64 s[30:31] 7678; 7679; GFX90A-LABEL: s_shuffle_v3i32_v4i32__1_u_u: 7680; GFX90A: ; %bb.0: 7681; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7682; GFX90A-NEXT: ;;#ASMSTART 7683; GFX90A-NEXT: ; def s[4:7] 7684; GFX90A-NEXT: ;;#ASMEND 7685; GFX90A-NEXT: s_mov_b32 s8, s5 7686; GFX90A-NEXT: ;;#ASMSTART 7687; GFX90A-NEXT: ; use s[8:10] 7688; GFX90A-NEXT: ;;#ASMEND 7689; GFX90A-NEXT: s_setpc_b64 s[30:31] 7690; 7691; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_u_u: 7692; GFX940: ; %bb.0: 7693; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7694; GFX940-NEXT: ;;#ASMSTART 7695; GFX940-NEXT: ; def s[0:3] 7696; GFX940-NEXT: ;;#ASMEND 7697; GFX940-NEXT: s_mov_b32 s8, s1 7698; GFX940-NEXT: ;;#ASMSTART 7699; GFX940-NEXT: ; use s[8:10] 7700; GFX940-NEXT: ;;#ASMEND 7701; GFX940-NEXT: s_setpc_b64 s[30:31] 7702 %vec0 = call <4 x i32> asm "; def $0", "=s"() 7703 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 poison> 7704 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 7705 ret void 7706} 7707 7708define void @s_shuffle_v3i32_v4i32__2_u_u() { 7709; GFX900-LABEL: s_shuffle_v3i32_v4i32__2_u_u: 7710; GFX900: ; %bb.0: 7711; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7712; GFX900-NEXT: ;;#ASMSTART 7713; GFX900-NEXT: ; def s[4:7] 7714; GFX900-NEXT: ;;#ASMEND 7715; GFX900-NEXT: s_mov_b32 s8, s6 7716; GFX900-NEXT: ;;#ASMSTART 7717; GFX900-NEXT: ; use s[8:10] 7718; GFX900-NEXT: ;;#ASMEND 7719; GFX900-NEXT: s_setpc_b64 s[30:31] 7720; 7721; GFX90A-LABEL: s_shuffle_v3i32_v4i32__2_u_u: 7722; GFX90A: ; %bb.0: 7723; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7724; GFX90A-NEXT: ;;#ASMSTART 7725; GFX90A-NEXT: ; def s[4:7] 7726; GFX90A-NEXT: ;;#ASMEND 7727; GFX90A-NEXT: s_mov_b32 s8, s6 7728; GFX90A-NEXT: ;;#ASMSTART 7729; GFX90A-NEXT: ; use s[8:10] 7730; GFX90A-NEXT: ;;#ASMEND 7731; GFX90A-NEXT: s_setpc_b64 s[30:31] 7732; 7733; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_u_u: 7734; GFX940: ; %bb.0: 7735; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7736; GFX940-NEXT: ;;#ASMSTART 7737; GFX940-NEXT: ; def s[0:3] 7738; GFX940-NEXT: ;;#ASMEND 7739; GFX940-NEXT: s_mov_b32 s8, s2 7740; GFX940-NEXT: ;;#ASMSTART 7741; GFX940-NEXT: ; use s[8:10] 7742; GFX940-NEXT: ;;#ASMEND 7743; GFX940-NEXT: s_setpc_b64 s[30:31] 7744 %vec0 = call <4 x i32> asm "; def $0", "=s"() 7745 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 poison, i32 poison> 7746 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 7747 ret void 7748} 7749 7750define void @s_shuffle_v3i32_v4i32__3_u_u() { 7751; GFX900-LABEL: s_shuffle_v3i32_v4i32__3_u_u: 7752; GFX900: ; %bb.0: 7753; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7754; GFX900-NEXT: ;;#ASMSTART 7755; GFX900-NEXT: ; def s[4:7] 7756; GFX900-NEXT: ;;#ASMEND 7757; GFX900-NEXT: s_mov_b32 s8, s7 7758; GFX900-NEXT: ;;#ASMSTART 7759; GFX900-NEXT: ; use s[8:10] 7760; GFX900-NEXT: ;;#ASMEND 7761; GFX900-NEXT: s_setpc_b64 s[30:31] 7762; 7763; GFX90A-LABEL: s_shuffle_v3i32_v4i32__3_u_u: 7764; GFX90A: ; %bb.0: 7765; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7766; GFX90A-NEXT: ;;#ASMSTART 7767; GFX90A-NEXT: ; def s[4:7] 7768; GFX90A-NEXT: ;;#ASMEND 7769; GFX90A-NEXT: s_mov_b32 s8, s7 7770; GFX90A-NEXT: ;;#ASMSTART 7771; GFX90A-NEXT: ; use s[8:10] 7772; GFX90A-NEXT: ;;#ASMEND 7773; GFX90A-NEXT: s_setpc_b64 s[30:31] 7774; 7775; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_u_u: 7776; GFX940: ; %bb.0: 7777; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7778; GFX940-NEXT: ;;#ASMSTART 7779; GFX940-NEXT: ; def s[0:3] 7780; GFX940-NEXT: ;;#ASMEND 7781; GFX940-NEXT: s_mov_b32 s8, s3 7782; GFX940-NEXT: ;;#ASMSTART 7783; GFX940-NEXT: ; use s[8:10] 7784; GFX940-NEXT: ;;#ASMEND 7785; GFX940-NEXT: s_setpc_b64 s[30:31] 7786 %vec0 = call <4 x i32> asm "; def $0", "=s"() 7787 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 poison, i32 poison> 7788 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 7789 ret void 7790} 7791 7792define void @s_shuffle_v3i32_v4i32__4_u_u() { 7793; GFX9-LABEL: s_shuffle_v3i32_v4i32__4_u_u: 7794; GFX9: ; %bb.0: 7795; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7796; GFX9-NEXT: ;;#ASMSTART 7797; GFX9-NEXT: ; use s[8:10] 7798; GFX9-NEXT: ;;#ASMEND 7799; GFX9-NEXT: s_setpc_b64 s[30:31] 7800 %vec0 = call <4 x i32> asm "; def $0", "=s"() 7801 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 poison, i32 poison> 7802 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 7803 ret void 7804} 7805 7806define void @s_shuffle_v3i32_v4i32__5_u_u() { 7807; GFX900-LABEL: s_shuffle_v3i32_v4i32__5_u_u: 7808; GFX900: ; %bb.0: 7809; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7810; GFX900-NEXT: ;;#ASMSTART 7811; GFX900-NEXT: ; def s[4:7] 7812; GFX900-NEXT: ;;#ASMEND 7813; GFX900-NEXT: s_mov_b32 s8, s5 7814; GFX900-NEXT: ;;#ASMSTART 7815; GFX900-NEXT: ; use s[8:10] 7816; GFX900-NEXT: ;;#ASMEND 7817; GFX900-NEXT: s_setpc_b64 s[30:31] 7818; 7819; GFX90A-LABEL: s_shuffle_v3i32_v4i32__5_u_u: 7820; GFX90A: ; %bb.0: 7821; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7822; GFX90A-NEXT: ;;#ASMSTART 7823; GFX90A-NEXT: ; def s[4:7] 7824; GFX90A-NEXT: ;;#ASMEND 7825; GFX90A-NEXT: s_mov_b32 s8, s5 7826; GFX90A-NEXT: ;;#ASMSTART 7827; GFX90A-NEXT: ; use s[8:10] 7828; GFX90A-NEXT: ;;#ASMEND 7829; GFX90A-NEXT: s_setpc_b64 s[30:31] 7830; 7831; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_u_u: 7832; GFX940: ; %bb.0: 7833; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7834; GFX940-NEXT: ;;#ASMSTART 7835; GFX940-NEXT: ; def s[0:3] 7836; GFX940-NEXT: ;;#ASMEND 7837; GFX940-NEXT: s_mov_b32 s8, s1 7838; GFX940-NEXT: ;;#ASMSTART 7839; GFX940-NEXT: ; use s[8:10] 7840; GFX940-NEXT: ;;#ASMEND 7841; GFX940-NEXT: s_setpc_b64 s[30:31] 7842 %vec0 = call <4 x i32> asm "; def $0", "=s"() 7843 %vec1 = call <4 x i32> asm "; def $0", "=s"() 7844 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison> 7845 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 7846 ret void 7847} 7848 7849define void @s_shuffle_v3i32_v4i32__6_u_u() { 7850; GFX900-LABEL: s_shuffle_v3i32_v4i32__6_u_u: 7851; GFX900: ; %bb.0: 7852; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7853; GFX900-NEXT: ;;#ASMSTART 7854; GFX900-NEXT: ; def s[4:7] 7855; GFX900-NEXT: ;;#ASMEND 7856; GFX900-NEXT: s_mov_b32 s8, s6 7857; GFX900-NEXT: ;;#ASMSTART 7858; GFX900-NEXT: ; use s[8:10] 7859; GFX900-NEXT: ;;#ASMEND 7860; GFX900-NEXT: s_setpc_b64 s[30:31] 7861; 7862; GFX90A-LABEL: s_shuffle_v3i32_v4i32__6_u_u: 7863; GFX90A: ; %bb.0: 7864; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7865; GFX90A-NEXT: ;;#ASMSTART 7866; GFX90A-NEXT: ; def s[4:7] 7867; GFX90A-NEXT: ;;#ASMEND 7868; GFX90A-NEXT: s_mov_b32 s8, s6 7869; GFX90A-NEXT: ;;#ASMSTART 7870; GFX90A-NEXT: ; use s[8:10] 7871; GFX90A-NEXT: ;;#ASMEND 7872; GFX90A-NEXT: s_setpc_b64 s[30:31] 7873; 7874; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_u_u: 7875; GFX940: ; %bb.0: 7876; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7877; GFX940-NEXT: ;;#ASMSTART 7878; GFX940-NEXT: ; def s[0:3] 7879; GFX940-NEXT: ;;#ASMEND 7880; GFX940-NEXT: s_mov_b32 s8, s2 7881; GFX940-NEXT: ;;#ASMSTART 7882; GFX940-NEXT: ; use s[8:10] 7883; GFX940-NEXT: ;;#ASMEND 7884; GFX940-NEXT: s_setpc_b64 s[30:31] 7885 %vec0 = call <4 x i32> asm "; def $0", "=s"() 7886 %vec1 = call <4 x i32> asm "; def $0", "=s"() 7887 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison> 7888 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 7889 ret void 7890} 7891 7892define void @s_shuffle_v3i32_v4i32__7_u_u() { 7893; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_u: 7894; GFX900: ; %bb.0: 7895; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7896; GFX900-NEXT: ;;#ASMSTART 7897; GFX900-NEXT: ; def s[4:7] 7898; GFX900-NEXT: ;;#ASMEND 7899; GFX900-NEXT: s_mov_b32 s8, s7 7900; GFX900-NEXT: ;;#ASMSTART 7901; GFX900-NEXT: ; use s[8:10] 7902; GFX900-NEXT: ;;#ASMEND 7903; GFX900-NEXT: s_setpc_b64 s[30:31] 7904; 7905; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_u: 7906; GFX90A: ; %bb.0: 7907; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7908; GFX90A-NEXT: ;;#ASMSTART 7909; GFX90A-NEXT: ; def s[4:7] 7910; GFX90A-NEXT: ;;#ASMEND 7911; GFX90A-NEXT: s_mov_b32 s8, s7 7912; GFX90A-NEXT: ;;#ASMSTART 7913; GFX90A-NEXT: ; use s[8:10] 7914; GFX90A-NEXT: ;;#ASMEND 7915; GFX90A-NEXT: s_setpc_b64 s[30:31] 7916; 7917; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_u: 7918; GFX940: ; %bb.0: 7919; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7920; GFX940-NEXT: ;;#ASMSTART 7921; GFX940-NEXT: ; def s[0:3] 7922; GFX940-NEXT: ;;#ASMEND 7923; GFX940-NEXT: s_mov_b32 s8, s3 7924; GFX940-NEXT: ;;#ASMSTART 7925; GFX940-NEXT: ; use s[8:10] 7926; GFX940-NEXT: ;;#ASMEND 7927; GFX940-NEXT: s_setpc_b64 s[30:31] 7928 %vec0 = call <4 x i32> asm "; def $0", "=s"() 7929 %vec1 = call <4 x i32> asm "; def $0", "=s"() 7930 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison> 7931 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 7932 ret void 7933} 7934 7935define void @s_shuffle_v3i32_v4i32__7_0_u() { 7936; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_0_u: 7937; GFX900: ; %bb.0: 7938; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7939; GFX900-NEXT: ;;#ASMSTART 7940; GFX900-NEXT: ; def s[8:11] 7941; GFX900-NEXT: ;;#ASMEND 7942; GFX900-NEXT: ;;#ASMSTART 7943; GFX900-NEXT: ; def s[4:7] 7944; GFX900-NEXT: ;;#ASMEND 7945; GFX900-NEXT: s_mov_b32 s8, s11 7946; GFX900-NEXT: s_mov_b32 s9, s4 7947; GFX900-NEXT: ;;#ASMSTART 7948; GFX900-NEXT: ; use s[8:10] 7949; GFX900-NEXT: ;;#ASMEND 7950; GFX900-NEXT: s_setpc_b64 s[30:31] 7951; 7952; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_0_u: 7953; GFX90A: ; %bb.0: 7954; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7955; GFX90A-NEXT: ;;#ASMSTART 7956; GFX90A-NEXT: ; def s[8:11] 7957; GFX90A-NEXT: ;;#ASMEND 7958; GFX90A-NEXT: ;;#ASMSTART 7959; GFX90A-NEXT: ; def s[4:7] 7960; GFX90A-NEXT: ;;#ASMEND 7961; GFX90A-NEXT: s_mov_b32 s8, s11 7962; GFX90A-NEXT: s_mov_b32 s9, s4 7963; GFX90A-NEXT: ;;#ASMSTART 7964; GFX90A-NEXT: ; use s[8:10] 7965; GFX90A-NEXT: ;;#ASMEND 7966; GFX90A-NEXT: s_setpc_b64 s[30:31] 7967; 7968; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_u: 7969; GFX940: ; %bb.0: 7970; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7971; GFX940-NEXT: ;;#ASMSTART 7972; GFX940-NEXT: ; def s[0:3] 7973; GFX940-NEXT: ;;#ASMEND 7974; GFX940-NEXT: ;;#ASMSTART 7975; GFX940-NEXT: ; def s[4:7] 7976; GFX940-NEXT: ;;#ASMEND 7977; GFX940-NEXT: s_mov_b32 s8, s7 7978; GFX940-NEXT: s_mov_b32 s9, s0 7979; GFX940-NEXT: ;;#ASMSTART 7980; GFX940-NEXT: ; use s[8:10] 7981; GFX940-NEXT: ;;#ASMEND 7982; GFX940-NEXT: s_setpc_b64 s[30:31] 7983 %vec0 = call <4 x i32> asm "; def $0", "=s"() 7984 %vec1 = call <4 x i32> asm "; def $0", "=s"() 7985 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 poison> 7986 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 7987 ret void 7988} 7989 7990define void @s_shuffle_v3i32_v4i32__7_1_u() { 7991; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_1_u: 7992; GFX900: ; %bb.0: 7993; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7994; GFX900-NEXT: ;;#ASMSTART 7995; GFX900-NEXT: ; def s[8:11] 7996; GFX900-NEXT: ;;#ASMEND 7997; GFX900-NEXT: ;;#ASMSTART 7998; GFX900-NEXT: ; def s[4:7] 7999; GFX900-NEXT: ;;#ASMEND 8000; GFX900-NEXT: s_mov_b32 s8, s7 8001; GFX900-NEXT: ;;#ASMSTART 8002; GFX900-NEXT: ; use s[8:10] 8003; GFX900-NEXT: ;;#ASMEND 8004; GFX900-NEXT: s_setpc_b64 s[30:31] 8005; 8006; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_1_u: 8007; GFX90A: ; %bb.0: 8008; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8009; GFX90A-NEXT: ;;#ASMSTART 8010; GFX90A-NEXT: ; def s[8:11] 8011; GFX90A-NEXT: ;;#ASMEND 8012; GFX90A-NEXT: ;;#ASMSTART 8013; GFX90A-NEXT: ; def s[4:7] 8014; GFX90A-NEXT: ;;#ASMEND 8015; GFX90A-NEXT: s_mov_b32 s8, s7 8016; GFX90A-NEXT: ;;#ASMSTART 8017; GFX90A-NEXT: ; use s[8:10] 8018; GFX90A-NEXT: ;;#ASMEND 8019; GFX90A-NEXT: s_setpc_b64 s[30:31] 8020; 8021; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_u: 8022; GFX940: ; %bb.0: 8023; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8024; GFX940-NEXT: ;;#ASMSTART 8025; GFX940-NEXT: ; def s[8:11] 8026; GFX940-NEXT: ;;#ASMEND 8027; GFX940-NEXT: ;;#ASMSTART 8028; GFX940-NEXT: ; def s[0:3] 8029; GFX940-NEXT: ;;#ASMEND 8030; GFX940-NEXT: s_mov_b32 s8, s3 8031; GFX940-NEXT: ;;#ASMSTART 8032; GFX940-NEXT: ; use s[8:10] 8033; GFX940-NEXT: ;;#ASMEND 8034; GFX940-NEXT: s_setpc_b64 s[30:31] 8035 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8036 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8037 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 poison> 8038 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8039 ret void 8040} 8041 8042define void @s_shuffle_v3i32_v4i32__7_2_u() { 8043; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_2_u: 8044; GFX900: ; %bb.0: 8045; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8046; GFX900-NEXT: ;;#ASMSTART 8047; GFX900-NEXT: ; def s[8:11] 8048; GFX900-NEXT: ;;#ASMEND 8049; GFX900-NEXT: ;;#ASMSTART 8050; GFX900-NEXT: ; def s[4:7] 8051; GFX900-NEXT: ;;#ASMEND 8052; GFX900-NEXT: s_mov_b32 s8, s11 8053; GFX900-NEXT: s_mov_b32 s9, s6 8054; GFX900-NEXT: ;;#ASMSTART 8055; GFX900-NEXT: ; use s[8:10] 8056; GFX900-NEXT: ;;#ASMEND 8057; GFX900-NEXT: s_setpc_b64 s[30:31] 8058; 8059; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_2_u: 8060; GFX90A: ; %bb.0: 8061; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8062; GFX90A-NEXT: ;;#ASMSTART 8063; GFX90A-NEXT: ; def s[8:11] 8064; GFX90A-NEXT: ;;#ASMEND 8065; GFX90A-NEXT: ;;#ASMSTART 8066; GFX90A-NEXT: ; def s[4:7] 8067; GFX90A-NEXT: ;;#ASMEND 8068; GFX90A-NEXT: s_mov_b32 s8, s11 8069; GFX90A-NEXT: s_mov_b32 s9, s6 8070; GFX90A-NEXT: ;;#ASMSTART 8071; GFX90A-NEXT: ; use s[8:10] 8072; GFX90A-NEXT: ;;#ASMEND 8073; GFX90A-NEXT: s_setpc_b64 s[30:31] 8074; 8075; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_u: 8076; GFX940: ; %bb.0: 8077; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8078; GFX940-NEXT: ;;#ASMSTART 8079; GFX940-NEXT: ; def s[0:3] 8080; GFX940-NEXT: ;;#ASMEND 8081; GFX940-NEXT: ;;#ASMSTART 8082; GFX940-NEXT: ; def s[4:7] 8083; GFX940-NEXT: ;;#ASMEND 8084; GFX940-NEXT: s_mov_b32 s8, s7 8085; GFX940-NEXT: s_mov_b32 s9, s2 8086; GFX940-NEXT: ;;#ASMSTART 8087; GFX940-NEXT: ; use s[8:10] 8088; GFX940-NEXT: ;;#ASMEND 8089; GFX940-NEXT: s_setpc_b64 s[30:31] 8090 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8091 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8092 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 poison> 8093 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8094 ret void 8095} 8096 8097define void @s_shuffle_v3i32_v4i32__7_3_u() { 8098; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_3_u: 8099; GFX900: ; %bb.0: 8100; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8101; GFX900-NEXT: ;;#ASMSTART 8102; GFX900-NEXT: ; def s[8:11] 8103; GFX900-NEXT: ;;#ASMEND 8104; GFX900-NEXT: ;;#ASMSTART 8105; GFX900-NEXT: ; def s[4:7] 8106; GFX900-NEXT: ;;#ASMEND 8107; GFX900-NEXT: s_mov_b32 s8, s11 8108; GFX900-NEXT: s_mov_b32 s9, s7 8109; GFX900-NEXT: ;;#ASMSTART 8110; GFX900-NEXT: ; use s[8:10] 8111; GFX900-NEXT: ;;#ASMEND 8112; GFX900-NEXT: s_setpc_b64 s[30:31] 8113; 8114; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_3_u: 8115; GFX90A: ; %bb.0: 8116; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8117; GFX90A-NEXT: ;;#ASMSTART 8118; GFX90A-NEXT: ; def s[8:11] 8119; GFX90A-NEXT: ;;#ASMEND 8120; GFX90A-NEXT: ;;#ASMSTART 8121; GFX90A-NEXT: ; def s[4:7] 8122; GFX90A-NEXT: ;;#ASMEND 8123; GFX90A-NEXT: s_mov_b32 s8, s11 8124; GFX90A-NEXT: s_mov_b32 s9, s7 8125; GFX90A-NEXT: ;;#ASMSTART 8126; GFX90A-NEXT: ; use s[8:10] 8127; GFX90A-NEXT: ;;#ASMEND 8128; GFX90A-NEXT: s_setpc_b64 s[30:31] 8129; 8130; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_u: 8131; GFX940: ; %bb.0: 8132; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8133; GFX940-NEXT: ;;#ASMSTART 8134; GFX940-NEXT: ; def s[0:3] 8135; GFX940-NEXT: ;;#ASMEND 8136; GFX940-NEXT: ;;#ASMSTART 8137; GFX940-NEXT: ; def s[4:7] 8138; GFX940-NEXT: ;;#ASMEND 8139; GFX940-NEXT: s_mov_b32 s8, s7 8140; GFX940-NEXT: s_mov_b32 s9, s3 8141; GFX940-NEXT: ;;#ASMSTART 8142; GFX940-NEXT: ; use s[8:10] 8143; GFX940-NEXT: ;;#ASMEND 8144; GFX940-NEXT: s_setpc_b64 s[30:31] 8145 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8146 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8147 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 poison> 8148 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8149 ret void 8150} 8151 8152define void @s_shuffle_v3i32_v4i32__7_4_u() { 8153; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_4_u: 8154; GFX900: ; %bb.0: 8155; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8156; GFX900-NEXT: ;;#ASMSTART 8157; GFX900-NEXT: ; def s[4:7] 8158; GFX900-NEXT: ;;#ASMEND 8159; GFX900-NEXT: s_mov_b32 s8, s7 8160; GFX900-NEXT: s_mov_b32 s9, s4 8161; GFX900-NEXT: ;;#ASMSTART 8162; GFX900-NEXT: ; use s[8:10] 8163; GFX900-NEXT: ;;#ASMEND 8164; GFX900-NEXT: s_setpc_b64 s[30:31] 8165; 8166; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_4_u: 8167; GFX90A: ; %bb.0: 8168; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8169; GFX90A-NEXT: ;;#ASMSTART 8170; GFX90A-NEXT: ; def s[4:7] 8171; GFX90A-NEXT: ;;#ASMEND 8172; GFX90A-NEXT: s_mov_b32 s8, s7 8173; GFX90A-NEXT: s_mov_b32 s9, s4 8174; GFX90A-NEXT: ;;#ASMSTART 8175; GFX90A-NEXT: ; use s[8:10] 8176; GFX90A-NEXT: ;;#ASMEND 8177; GFX90A-NEXT: s_setpc_b64 s[30:31] 8178; 8179; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_u: 8180; GFX940: ; %bb.0: 8181; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8182; GFX940-NEXT: ;;#ASMSTART 8183; GFX940-NEXT: ; def s[0:3] 8184; GFX940-NEXT: ;;#ASMEND 8185; GFX940-NEXT: s_mov_b32 s8, s3 8186; GFX940-NEXT: s_mov_b32 s9, s0 8187; GFX940-NEXT: ;;#ASMSTART 8188; GFX940-NEXT: ; use s[8:10] 8189; GFX940-NEXT: ;;#ASMEND 8190; GFX940-NEXT: s_setpc_b64 s[30:31] 8191 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8192 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8193 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 poison> 8194 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8195 ret void 8196} 8197 8198define void @s_shuffle_v3i32_v4i32__7_5_u() { 8199; GFX9-LABEL: s_shuffle_v3i32_v4i32__7_5_u: 8200; GFX9: ; %bb.0: 8201; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8202; GFX9-NEXT: ;;#ASMSTART 8203; GFX9-NEXT: ; def s[8:11] 8204; GFX9-NEXT: ;;#ASMEND 8205; GFX9-NEXT: s_mov_b32 s8, s11 8206; GFX9-NEXT: ;;#ASMSTART 8207; GFX9-NEXT: ; use s[8:10] 8208; GFX9-NEXT: ;;#ASMEND 8209; GFX9-NEXT: s_setpc_b64 s[30:31] 8210 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8211 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8212 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 poison> 8213 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8214 ret void 8215} 8216 8217define void @s_shuffle_v3i32_v4i32__7_6_u() { 8218; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_6_u: 8219; GFX900: ; %bb.0: 8220; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8221; GFX900-NEXT: ;;#ASMSTART 8222; GFX900-NEXT: ; def s[4:7] 8223; GFX900-NEXT: ;;#ASMEND 8224; GFX900-NEXT: s_mov_b32 s8, s7 8225; GFX900-NEXT: s_mov_b32 s9, s6 8226; GFX900-NEXT: ;;#ASMSTART 8227; GFX900-NEXT: ; use s[8:10] 8228; GFX900-NEXT: ;;#ASMEND 8229; GFX900-NEXT: s_setpc_b64 s[30:31] 8230; 8231; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_6_u: 8232; GFX90A: ; %bb.0: 8233; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8234; GFX90A-NEXT: ;;#ASMSTART 8235; GFX90A-NEXT: ; def s[4:7] 8236; GFX90A-NEXT: ;;#ASMEND 8237; GFX90A-NEXT: s_mov_b32 s8, s7 8238; GFX90A-NEXT: s_mov_b32 s9, s6 8239; GFX90A-NEXT: ;;#ASMSTART 8240; GFX90A-NEXT: ; use s[8:10] 8241; GFX90A-NEXT: ;;#ASMEND 8242; GFX90A-NEXT: s_setpc_b64 s[30:31] 8243; 8244; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_u: 8245; GFX940: ; %bb.0: 8246; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8247; GFX940-NEXT: ;;#ASMSTART 8248; GFX940-NEXT: ; def s[0:3] 8249; GFX940-NEXT: ;;#ASMEND 8250; GFX940-NEXT: s_mov_b32 s8, s3 8251; GFX940-NEXT: s_mov_b32 s9, s2 8252; GFX940-NEXT: ;;#ASMSTART 8253; GFX940-NEXT: ; use s[8:10] 8254; GFX940-NEXT: ;;#ASMEND 8255; GFX940-NEXT: s_setpc_b64 s[30:31] 8256 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8257 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8258 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 poison> 8259 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8260 ret void 8261} 8262 8263define void @s_shuffle_v3i32_v4i32__7_7_u() { 8264; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_7_u: 8265; GFX900: ; %bb.0: 8266; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8267; GFX900-NEXT: ;;#ASMSTART 8268; GFX900-NEXT: ; def s[4:7] 8269; GFX900-NEXT: ;;#ASMEND 8270; GFX900-NEXT: s_mov_b32 s8, s7 8271; GFX900-NEXT: s_mov_b32 s9, s7 8272; GFX900-NEXT: ;;#ASMSTART 8273; GFX900-NEXT: ; use s[8:10] 8274; GFX900-NEXT: ;;#ASMEND 8275; GFX900-NEXT: s_setpc_b64 s[30:31] 8276; 8277; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_7_u: 8278; GFX90A: ; %bb.0: 8279; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8280; GFX90A-NEXT: ;;#ASMSTART 8281; GFX90A-NEXT: ; def s[4:7] 8282; GFX90A-NEXT: ;;#ASMEND 8283; GFX90A-NEXT: s_mov_b32 s8, s7 8284; GFX90A-NEXT: s_mov_b32 s9, s7 8285; GFX90A-NEXT: ;;#ASMSTART 8286; GFX90A-NEXT: ; use s[8:10] 8287; GFX90A-NEXT: ;;#ASMEND 8288; GFX90A-NEXT: s_setpc_b64 s[30:31] 8289; 8290; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_u: 8291; GFX940: ; %bb.0: 8292; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8293; GFX940-NEXT: ;;#ASMSTART 8294; GFX940-NEXT: ; def s[0:3] 8295; GFX940-NEXT: ;;#ASMEND 8296; GFX940-NEXT: s_mov_b32 s8, s3 8297; GFX940-NEXT: s_mov_b32 s9, s3 8298; GFX940-NEXT: ;;#ASMSTART 8299; GFX940-NEXT: ; use s[8:10] 8300; GFX940-NEXT: ;;#ASMEND 8301; GFX940-NEXT: s_setpc_b64 s[30:31] 8302 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8303 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8304 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 poison> 8305 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8306 ret void 8307} 8308 8309define void @s_shuffle_v3i32_v4i32__7_7_0() { 8310; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_7_0: 8311; GFX900: ; %bb.0: 8312; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8313; GFX900-NEXT: ;;#ASMSTART 8314; GFX900-NEXT: ; def s[8:11] 8315; GFX900-NEXT: ;;#ASMEND 8316; GFX900-NEXT: ;;#ASMSTART 8317; GFX900-NEXT: ; def s[4:7] 8318; GFX900-NEXT: ;;#ASMEND 8319; GFX900-NEXT: s_mov_b32 s8, s11 8320; GFX900-NEXT: s_mov_b32 s9, s11 8321; GFX900-NEXT: s_mov_b32 s10, s4 8322; GFX900-NEXT: ;;#ASMSTART 8323; GFX900-NEXT: ; use s[8:10] 8324; GFX900-NEXT: ;;#ASMEND 8325; GFX900-NEXT: s_setpc_b64 s[30:31] 8326; 8327; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_7_0: 8328; GFX90A: ; %bb.0: 8329; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8330; GFX90A-NEXT: ;;#ASMSTART 8331; GFX90A-NEXT: ; def s[8:11] 8332; GFX90A-NEXT: ;;#ASMEND 8333; GFX90A-NEXT: ;;#ASMSTART 8334; GFX90A-NEXT: ; def s[4:7] 8335; GFX90A-NEXT: ;;#ASMEND 8336; GFX90A-NEXT: s_mov_b32 s8, s11 8337; GFX90A-NEXT: s_mov_b32 s9, s11 8338; GFX90A-NEXT: s_mov_b32 s10, s4 8339; GFX90A-NEXT: ;;#ASMSTART 8340; GFX90A-NEXT: ; use s[8:10] 8341; GFX90A-NEXT: ;;#ASMEND 8342; GFX90A-NEXT: s_setpc_b64 s[30:31] 8343; 8344; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_0: 8345; GFX940: ; %bb.0: 8346; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8347; GFX940-NEXT: ;;#ASMSTART 8348; GFX940-NEXT: ; def s[0:3] 8349; GFX940-NEXT: ;;#ASMEND 8350; GFX940-NEXT: ;;#ASMSTART 8351; GFX940-NEXT: ; def s[4:7] 8352; GFX940-NEXT: ;;#ASMEND 8353; GFX940-NEXT: s_mov_b32 s8, s7 8354; GFX940-NEXT: s_mov_b32 s9, s7 8355; GFX940-NEXT: s_mov_b32 s10, s0 8356; GFX940-NEXT: ;;#ASMSTART 8357; GFX940-NEXT: ; use s[8:10] 8358; GFX940-NEXT: ;;#ASMEND 8359; GFX940-NEXT: s_setpc_b64 s[30:31] 8360 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8361 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8362 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 0> 8363 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8364 ret void 8365} 8366 8367define void @s_shuffle_v3i32_v4i32__7_7_1() { 8368; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_7_1: 8369; GFX900: ; %bb.0: 8370; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8371; GFX900-NEXT: ;;#ASMSTART 8372; GFX900-NEXT: ; def s[8:11] 8373; GFX900-NEXT: ;;#ASMEND 8374; GFX900-NEXT: ;;#ASMSTART 8375; GFX900-NEXT: ; def s[4:7] 8376; GFX900-NEXT: ;;#ASMEND 8377; GFX900-NEXT: s_mov_b32 s8, s11 8378; GFX900-NEXT: s_mov_b32 s9, s11 8379; GFX900-NEXT: s_mov_b32 s10, s5 8380; GFX900-NEXT: ;;#ASMSTART 8381; GFX900-NEXT: ; use s[8:10] 8382; GFX900-NEXT: ;;#ASMEND 8383; GFX900-NEXT: s_setpc_b64 s[30:31] 8384; 8385; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_7_1: 8386; GFX90A: ; %bb.0: 8387; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8388; GFX90A-NEXT: ;;#ASMSTART 8389; GFX90A-NEXT: ; def s[8:11] 8390; GFX90A-NEXT: ;;#ASMEND 8391; GFX90A-NEXT: ;;#ASMSTART 8392; GFX90A-NEXT: ; def s[4:7] 8393; GFX90A-NEXT: ;;#ASMEND 8394; GFX90A-NEXT: s_mov_b32 s8, s11 8395; GFX90A-NEXT: s_mov_b32 s9, s11 8396; GFX90A-NEXT: s_mov_b32 s10, s5 8397; GFX90A-NEXT: ;;#ASMSTART 8398; GFX90A-NEXT: ; use s[8:10] 8399; GFX90A-NEXT: ;;#ASMEND 8400; GFX90A-NEXT: s_setpc_b64 s[30:31] 8401; 8402; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_1: 8403; GFX940: ; %bb.0: 8404; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8405; GFX940-NEXT: ;;#ASMSTART 8406; GFX940-NEXT: ; def s[0:3] 8407; GFX940-NEXT: ;;#ASMEND 8408; GFX940-NEXT: ;;#ASMSTART 8409; GFX940-NEXT: ; def s[4:7] 8410; GFX940-NEXT: ;;#ASMEND 8411; GFX940-NEXT: s_mov_b32 s8, s7 8412; GFX940-NEXT: s_mov_b32 s9, s7 8413; GFX940-NEXT: s_mov_b32 s10, s1 8414; GFX940-NEXT: ;;#ASMSTART 8415; GFX940-NEXT: ; use s[8:10] 8416; GFX940-NEXT: ;;#ASMEND 8417; GFX940-NEXT: s_setpc_b64 s[30:31] 8418 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8419 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8420 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 1> 8421 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8422 ret void 8423} 8424 8425define void @s_shuffle_v3i32_v4i32__7_7_2() { 8426; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_7_2: 8427; GFX900: ; %bb.0: 8428; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8429; GFX900-NEXT: ;;#ASMSTART 8430; GFX900-NEXT: ; def s[8:11] 8431; GFX900-NEXT: ;;#ASMEND 8432; GFX900-NEXT: ;;#ASMSTART 8433; GFX900-NEXT: ; def s[4:7] 8434; GFX900-NEXT: ;;#ASMEND 8435; GFX900-NEXT: s_mov_b32 s8, s7 8436; GFX900-NEXT: s_mov_b32 s9, s7 8437; GFX900-NEXT: ;;#ASMSTART 8438; GFX900-NEXT: ; use s[8:10] 8439; GFX900-NEXT: ;;#ASMEND 8440; GFX900-NEXT: s_setpc_b64 s[30:31] 8441; 8442; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_7_2: 8443; GFX90A: ; %bb.0: 8444; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8445; GFX90A-NEXT: ;;#ASMSTART 8446; GFX90A-NEXT: ; def s[8:11] 8447; GFX90A-NEXT: ;;#ASMEND 8448; GFX90A-NEXT: ;;#ASMSTART 8449; GFX90A-NEXT: ; def s[4:7] 8450; GFX90A-NEXT: ;;#ASMEND 8451; GFX90A-NEXT: s_mov_b32 s8, s7 8452; GFX90A-NEXT: s_mov_b32 s9, s7 8453; GFX90A-NEXT: ;;#ASMSTART 8454; GFX90A-NEXT: ; use s[8:10] 8455; GFX90A-NEXT: ;;#ASMEND 8456; GFX90A-NEXT: s_setpc_b64 s[30:31] 8457; 8458; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_2: 8459; GFX940: ; %bb.0: 8460; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8461; GFX940-NEXT: ;;#ASMSTART 8462; GFX940-NEXT: ; def s[8:11] 8463; GFX940-NEXT: ;;#ASMEND 8464; GFX940-NEXT: ;;#ASMSTART 8465; GFX940-NEXT: ; def s[0:3] 8466; GFX940-NEXT: ;;#ASMEND 8467; GFX940-NEXT: s_mov_b32 s8, s3 8468; GFX940-NEXT: s_mov_b32 s9, s3 8469; GFX940-NEXT: ;;#ASMSTART 8470; GFX940-NEXT: ; use s[8:10] 8471; GFX940-NEXT: ;;#ASMEND 8472; GFX940-NEXT: s_setpc_b64 s[30:31] 8473 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8474 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8475 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 2> 8476 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8477 ret void 8478} 8479 8480define void @s_shuffle_v3i32_v4i32__7_7_3() { 8481; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_7_3: 8482; GFX900: ; %bb.0: 8483; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8484; GFX900-NEXT: ;;#ASMSTART 8485; GFX900-NEXT: ; def s[8:11] 8486; GFX900-NEXT: ;;#ASMEND 8487; GFX900-NEXT: ;;#ASMSTART 8488; GFX900-NEXT: ; def s[4:7] 8489; GFX900-NEXT: ;;#ASMEND 8490; GFX900-NEXT: s_mov_b32 s8, s11 8491; GFX900-NEXT: s_mov_b32 s9, s11 8492; GFX900-NEXT: s_mov_b32 s10, s7 8493; GFX900-NEXT: ;;#ASMSTART 8494; GFX900-NEXT: ; use s[8:10] 8495; GFX900-NEXT: ;;#ASMEND 8496; GFX900-NEXT: s_setpc_b64 s[30:31] 8497; 8498; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_7_3: 8499; GFX90A: ; %bb.0: 8500; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8501; GFX90A-NEXT: ;;#ASMSTART 8502; GFX90A-NEXT: ; def s[8:11] 8503; GFX90A-NEXT: ;;#ASMEND 8504; GFX90A-NEXT: ;;#ASMSTART 8505; GFX90A-NEXT: ; def s[4:7] 8506; GFX90A-NEXT: ;;#ASMEND 8507; GFX90A-NEXT: s_mov_b32 s8, s11 8508; GFX90A-NEXT: s_mov_b32 s9, s11 8509; GFX90A-NEXT: s_mov_b32 s10, s7 8510; GFX90A-NEXT: ;;#ASMSTART 8511; GFX90A-NEXT: ; use s[8:10] 8512; GFX90A-NEXT: ;;#ASMEND 8513; GFX90A-NEXT: s_setpc_b64 s[30:31] 8514; 8515; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_3: 8516; GFX940: ; %bb.0: 8517; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8518; GFX940-NEXT: ;;#ASMSTART 8519; GFX940-NEXT: ; def s[0:3] 8520; GFX940-NEXT: ;;#ASMEND 8521; GFX940-NEXT: ;;#ASMSTART 8522; GFX940-NEXT: ; def s[4:7] 8523; GFX940-NEXT: ;;#ASMEND 8524; GFX940-NEXT: s_mov_b32 s8, s7 8525; GFX940-NEXT: s_mov_b32 s9, s7 8526; GFX940-NEXT: s_mov_b32 s10, s3 8527; GFX940-NEXT: ;;#ASMSTART 8528; GFX940-NEXT: ; use s[8:10] 8529; GFX940-NEXT: ;;#ASMEND 8530; GFX940-NEXT: s_setpc_b64 s[30:31] 8531 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8532 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8533 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 3> 8534 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8535 ret void 8536} 8537 8538define void @s_shuffle_v3i32_v4i32__7_7_4() { 8539; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_7_4: 8540; GFX900: ; %bb.0: 8541; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8542; GFX900-NEXT: ;;#ASMSTART 8543; GFX900-NEXT: ; def s[4:7] 8544; GFX900-NEXT: ;;#ASMEND 8545; GFX900-NEXT: s_mov_b32 s8, s7 8546; GFX900-NEXT: s_mov_b32 s9, s7 8547; GFX900-NEXT: s_mov_b32 s10, s4 8548; GFX900-NEXT: ;;#ASMSTART 8549; GFX900-NEXT: ; use s[8:10] 8550; GFX900-NEXT: ;;#ASMEND 8551; GFX900-NEXT: s_setpc_b64 s[30:31] 8552; 8553; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_7_4: 8554; GFX90A: ; %bb.0: 8555; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8556; GFX90A-NEXT: ;;#ASMSTART 8557; GFX90A-NEXT: ; def s[4:7] 8558; GFX90A-NEXT: ;;#ASMEND 8559; GFX90A-NEXT: s_mov_b32 s8, s7 8560; GFX90A-NEXT: s_mov_b32 s9, s7 8561; GFX90A-NEXT: s_mov_b32 s10, s4 8562; GFX90A-NEXT: ;;#ASMSTART 8563; GFX90A-NEXT: ; use s[8:10] 8564; GFX90A-NEXT: ;;#ASMEND 8565; GFX90A-NEXT: s_setpc_b64 s[30:31] 8566; 8567; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_4: 8568; GFX940: ; %bb.0: 8569; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8570; GFX940-NEXT: ;;#ASMSTART 8571; GFX940-NEXT: ; def s[0:3] 8572; GFX940-NEXT: ;;#ASMEND 8573; GFX940-NEXT: s_mov_b32 s8, s3 8574; GFX940-NEXT: s_mov_b32 s9, s3 8575; GFX940-NEXT: s_mov_b32 s10, s0 8576; GFX940-NEXT: ;;#ASMSTART 8577; GFX940-NEXT: ; use s[8:10] 8578; GFX940-NEXT: ;;#ASMEND 8579; GFX940-NEXT: s_setpc_b64 s[30:31] 8580 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8581 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8582 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 4> 8583 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8584 ret void 8585} 8586 8587define void @s_shuffle_v3i32_v4i32__7_7_5() { 8588; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_7_5: 8589; GFX900: ; %bb.0: 8590; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8591; GFX900-NEXT: ;;#ASMSTART 8592; GFX900-NEXT: ; def s[4:7] 8593; GFX900-NEXT: ;;#ASMEND 8594; GFX900-NEXT: s_mov_b32 s8, s7 8595; GFX900-NEXT: s_mov_b32 s9, s7 8596; GFX900-NEXT: s_mov_b32 s10, s5 8597; GFX900-NEXT: ;;#ASMSTART 8598; GFX900-NEXT: ; use s[8:10] 8599; GFX900-NEXT: ;;#ASMEND 8600; GFX900-NEXT: s_setpc_b64 s[30:31] 8601; 8602; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_7_5: 8603; GFX90A: ; %bb.0: 8604; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8605; GFX90A-NEXT: ;;#ASMSTART 8606; GFX90A-NEXT: ; def s[4:7] 8607; GFX90A-NEXT: ;;#ASMEND 8608; GFX90A-NEXT: s_mov_b32 s8, s7 8609; GFX90A-NEXT: s_mov_b32 s9, s7 8610; GFX90A-NEXT: s_mov_b32 s10, s5 8611; GFX90A-NEXT: ;;#ASMSTART 8612; GFX90A-NEXT: ; use s[8:10] 8613; GFX90A-NEXT: ;;#ASMEND 8614; GFX90A-NEXT: s_setpc_b64 s[30:31] 8615; 8616; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_5: 8617; GFX940: ; %bb.0: 8618; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8619; GFX940-NEXT: ;;#ASMSTART 8620; GFX940-NEXT: ; def s[0:3] 8621; GFX940-NEXT: ;;#ASMEND 8622; GFX940-NEXT: s_mov_b32 s8, s3 8623; GFX940-NEXT: s_mov_b32 s9, s3 8624; GFX940-NEXT: s_mov_b32 s10, s1 8625; GFX940-NEXT: ;;#ASMSTART 8626; GFX940-NEXT: ; use s[8:10] 8627; GFX940-NEXT: ;;#ASMEND 8628; GFX940-NEXT: s_setpc_b64 s[30:31] 8629 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8630 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8631 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 5> 8632 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8633 ret void 8634} 8635 8636define void @s_shuffle_v3i32_v4i32__7_7_6() { 8637; GFX9-LABEL: s_shuffle_v3i32_v4i32__7_7_6: 8638; GFX9: ; %bb.0: 8639; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8640; GFX9-NEXT: ;;#ASMSTART 8641; GFX9-NEXT: ; def s[8:11] 8642; GFX9-NEXT: ;;#ASMEND 8643; GFX9-NEXT: s_mov_b32 s8, s11 8644; GFX9-NEXT: s_mov_b32 s9, s11 8645; GFX9-NEXT: ;;#ASMSTART 8646; GFX9-NEXT: ; use s[8:10] 8647; GFX9-NEXT: ;;#ASMEND 8648; GFX9-NEXT: s_setpc_b64 s[30:31] 8649 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8650 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8651 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 6> 8652 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8653 ret void 8654} 8655 8656define void @s_shuffle_v3i32_v4i32__7_7_7() { 8657; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_7_7: 8658; GFX900: ; %bb.0: 8659; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8660; GFX900-NEXT: ;;#ASMSTART 8661; GFX900-NEXT: ; def s[4:7] 8662; GFX900-NEXT: ;;#ASMEND 8663; GFX900-NEXT: s_mov_b32 s8, s7 8664; GFX900-NEXT: s_mov_b32 s9, s7 8665; GFX900-NEXT: s_mov_b32 s10, s7 8666; GFX900-NEXT: ;;#ASMSTART 8667; GFX900-NEXT: ; use s[8:10] 8668; GFX900-NEXT: ;;#ASMEND 8669; GFX900-NEXT: s_setpc_b64 s[30:31] 8670; 8671; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_7_7: 8672; GFX90A: ; %bb.0: 8673; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8674; GFX90A-NEXT: ;;#ASMSTART 8675; GFX90A-NEXT: ; def s[4:7] 8676; GFX90A-NEXT: ;;#ASMEND 8677; GFX90A-NEXT: s_mov_b32 s8, s7 8678; GFX90A-NEXT: s_mov_b32 s9, s7 8679; GFX90A-NEXT: s_mov_b32 s10, s7 8680; GFX90A-NEXT: ;;#ASMSTART 8681; GFX90A-NEXT: ; use s[8:10] 8682; GFX90A-NEXT: ;;#ASMEND 8683; GFX90A-NEXT: s_setpc_b64 s[30:31] 8684; 8685; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_7_7: 8686; GFX940: ; %bb.0: 8687; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8688; GFX940-NEXT: ;;#ASMSTART 8689; GFX940-NEXT: ; def s[0:3] 8690; GFX940-NEXT: ;;#ASMEND 8691; GFX940-NEXT: s_mov_b32 s8, s3 8692; GFX940-NEXT: s_mov_b32 s9, s3 8693; GFX940-NEXT: s_mov_b32 s10, s3 8694; GFX940-NEXT: ;;#ASMSTART 8695; GFX940-NEXT: ; use s[8:10] 8696; GFX940-NEXT: ;;#ASMEND 8697; GFX940-NEXT: s_setpc_b64 s[30:31] 8698 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8699 %vec1 = call <4 x i32> asm "; def $0", "=s"() 8700 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 7, i32 7> 8701 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8702 ret void 8703} 8704 8705define void @s_shuffle_v3i32_v4i32__u_0_0() { 8706; GFX900-LABEL: s_shuffle_v3i32_v4i32__u_0_0: 8707; GFX900: ; %bb.0: 8708; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8709; GFX900-NEXT: ;;#ASMSTART 8710; GFX900-NEXT: ; def s[4:7] 8711; GFX900-NEXT: ;;#ASMEND 8712; GFX900-NEXT: s_mov_b32 s9, s4 8713; GFX900-NEXT: s_mov_b32 s10, s4 8714; GFX900-NEXT: ;;#ASMSTART 8715; GFX900-NEXT: ; use s[8:10] 8716; GFX900-NEXT: ;;#ASMEND 8717; GFX900-NEXT: s_setpc_b64 s[30:31] 8718; 8719; GFX90A-LABEL: s_shuffle_v3i32_v4i32__u_0_0: 8720; GFX90A: ; %bb.0: 8721; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8722; GFX90A-NEXT: ;;#ASMSTART 8723; GFX90A-NEXT: ; def s[4:7] 8724; GFX90A-NEXT: ;;#ASMEND 8725; GFX90A-NEXT: s_mov_b32 s9, s4 8726; GFX90A-NEXT: s_mov_b32 s10, s4 8727; GFX90A-NEXT: ;;#ASMSTART 8728; GFX90A-NEXT: ; use s[8:10] 8729; GFX90A-NEXT: ;;#ASMEND 8730; GFX90A-NEXT: s_setpc_b64 s[30:31] 8731; 8732; GFX940-LABEL: s_shuffle_v3i32_v4i32__u_0_0: 8733; GFX940: ; %bb.0: 8734; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8735; GFX940-NEXT: ;;#ASMSTART 8736; GFX940-NEXT: ; def s[0:3] 8737; GFX940-NEXT: ;;#ASMEND 8738; GFX940-NEXT: s_mov_b32 s9, s0 8739; GFX940-NEXT: s_mov_b32 s10, s0 8740; GFX940-NEXT: ;;#ASMSTART 8741; GFX940-NEXT: ; use s[8:10] 8742; GFX940-NEXT: ;;#ASMEND 8743; GFX940-NEXT: s_setpc_b64 s[30:31] 8744 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8745 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 poison, i32 0, i32 0> 8746 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8747 ret void 8748} 8749 8750define void @s_shuffle_v3i32_v4i32__0_0_0() { 8751; GFX9-LABEL: s_shuffle_v3i32_v4i32__0_0_0: 8752; GFX9: ; %bb.0: 8753; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8754; GFX9-NEXT: ;;#ASMSTART 8755; GFX9-NEXT: ; def s[8:11] 8756; GFX9-NEXT: ;;#ASMEND 8757; GFX9-NEXT: s_mov_b32 s9, s8 8758; GFX9-NEXT: s_mov_b32 s10, s8 8759; GFX9-NEXT: ;;#ASMSTART 8760; GFX9-NEXT: ; use s[8:10] 8761; GFX9-NEXT: ;;#ASMEND 8762; GFX9-NEXT: s_setpc_b64 s[30:31] 8763 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8764 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> zeroinitializer 8765 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8766 ret void 8767} 8768 8769define void @s_shuffle_v3i32_v4i32__1_0_0() { 8770; GFX900-LABEL: s_shuffle_v3i32_v4i32__1_0_0: 8771; GFX900: ; %bb.0: 8772; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8773; GFX900-NEXT: ;;#ASMSTART 8774; GFX900-NEXT: ; def s[4:7] 8775; GFX900-NEXT: ;;#ASMEND 8776; GFX900-NEXT: s_mov_b32 s8, s5 8777; GFX900-NEXT: s_mov_b32 s9, s4 8778; GFX900-NEXT: s_mov_b32 s10, s4 8779; GFX900-NEXT: ;;#ASMSTART 8780; GFX900-NEXT: ; use s[8:10] 8781; GFX900-NEXT: ;;#ASMEND 8782; GFX900-NEXT: s_setpc_b64 s[30:31] 8783; 8784; GFX90A-LABEL: s_shuffle_v3i32_v4i32__1_0_0: 8785; GFX90A: ; %bb.0: 8786; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8787; GFX90A-NEXT: ;;#ASMSTART 8788; GFX90A-NEXT: ; def s[4:7] 8789; GFX90A-NEXT: ;;#ASMEND 8790; GFX90A-NEXT: s_mov_b32 s8, s5 8791; GFX90A-NEXT: s_mov_b32 s9, s4 8792; GFX90A-NEXT: s_mov_b32 s10, s4 8793; GFX90A-NEXT: ;;#ASMSTART 8794; GFX90A-NEXT: ; use s[8:10] 8795; GFX90A-NEXT: ;;#ASMEND 8796; GFX90A-NEXT: s_setpc_b64 s[30:31] 8797; 8798; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_0_0: 8799; GFX940: ; %bb.0: 8800; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8801; GFX940-NEXT: ;;#ASMSTART 8802; GFX940-NEXT: ; def s[0:3] 8803; GFX940-NEXT: ;;#ASMEND 8804; GFX940-NEXT: s_mov_b32 s8, s1 8805; GFX940-NEXT: s_mov_b32 s9, s0 8806; GFX940-NEXT: s_mov_b32 s10, s0 8807; GFX940-NEXT: ;;#ASMSTART 8808; GFX940-NEXT: ; use s[8:10] 8809; GFX940-NEXT: ;;#ASMEND 8810; GFX940-NEXT: s_setpc_b64 s[30:31] 8811 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8812 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 0, i32 0> 8813 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8814 ret void 8815} 8816 8817define void @s_shuffle_v3i32_v4i32__2_0_0() { 8818; GFX900-LABEL: s_shuffle_v3i32_v4i32__2_0_0: 8819; GFX900: ; %bb.0: 8820; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8821; GFX900-NEXT: ;;#ASMSTART 8822; GFX900-NEXT: ; def s[4:7] 8823; GFX900-NEXT: ;;#ASMEND 8824; GFX900-NEXT: s_mov_b32 s8, s6 8825; GFX900-NEXT: s_mov_b32 s9, s4 8826; GFX900-NEXT: s_mov_b32 s10, s4 8827; GFX900-NEXT: ;;#ASMSTART 8828; GFX900-NEXT: ; use s[8:10] 8829; GFX900-NEXT: ;;#ASMEND 8830; GFX900-NEXT: s_setpc_b64 s[30:31] 8831; 8832; GFX90A-LABEL: s_shuffle_v3i32_v4i32__2_0_0: 8833; GFX90A: ; %bb.0: 8834; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8835; GFX90A-NEXT: ;;#ASMSTART 8836; GFX90A-NEXT: ; def s[4:7] 8837; GFX90A-NEXT: ;;#ASMEND 8838; GFX90A-NEXT: s_mov_b32 s8, s6 8839; GFX90A-NEXT: s_mov_b32 s9, s4 8840; GFX90A-NEXT: s_mov_b32 s10, s4 8841; GFX90A-NEXT: ;;#ASMSTART 8842; GFX90A-NEXT: ; use s[8:10] 8843; GFX90A-NEXT: ;;#ASMEND 8844; GFX90A-NEXT: s_setpc_b64 s[30:31] 8845; 8846; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_0_0: 8847; GFX940: ; %bb.0: 8848; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8849; GFX940-NEXT: ;;#ASMSTART 8850; GFX940-NEXT: ; def s[0:3] 8851; GFX940-NEXT: ;;#ASMEND 8852; GFX940-NEXT: s_mov_b32 s8, s2 8853; GFX940-NEXT: s_mov_b32 s9, s0 8854; GFX940-NEXT: s_mov_b32 s10, s0 8855; GFX940-NEXT: ;;#ASMSTART 8856; GFX940-NEXT: ; use s[8:10] 8857; GFX940-NEXT: ;;#ASMEND 8858; GFX940-NEXT: s_setpc_b64 s[30:31] 8859 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8860 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 0, i32 0> 8861 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8862 ret void 8863} 8864 8865define void @s_shuffle_v3i32_v4i32__3_0_0() { 8866; GFX900-LABEL: s_shuffle_v3i32_v4i32__3_0_0: 8867; GFX900: ; %bb.0: 8868; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8869; GFX900-NEXT: ;;#ASMSTART 8870; GFX900-NEXT: ; def s[4:7] 8871; GFX900-NEXT: ;;#ASMEND 8872; GFX900-NEXT: s_mov_b32 s8, s7 8873; GFX900-NEXT: s_mov_b32 s9, s4 8874; GFX900-NEXT: s_mov_b32 s10, s4 8875; GFX900-NEXT: ;;#ASMSTART 8876; GFX900-NEXT: ; use s[8:10] 8877; GFX900-NEXT: ;;#ASMEND 8878; GFX900-NEXT: s_setpc_b64 s[30:31] 8879; 8880; GFX90A-LABEL: s_shuffle_v3i32_v4i32__3_0_0: 8881; GFX90A: ; %bb.0: 8882; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8883; GFX90A-NEXT: ;;#ASMSTART 8884; GFX90A-NEXT: ; def s[4:7] 8885; GFX90A-NEXT: ;;#ASMEND 8886; GFX90A-NEXT: s_mov_b32 s8, s7 8887; GFX90A-NEXT: s_mov_b32 s9, s4 8888; GFX90A-NEXT: s_mov_b32 s10, s4 8889; GFX90A-NEXT: ;;#ASMSTART 8890; GFX90A-NEXT: ; use s[8:10] 8891; GFX90A-NEXT: ;;#ASMEND 8892; GFX90A-NEXT: s_setpc_b64 s[30:31] 8893; 8894; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_0_0: 8895; GFX940: ; %bb.0: 8896; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8897; GFX940-NEXT: ;;#ASMSTART 8898; GFX940-NEXT: ; def s[0:3] 8899; GFX940-NEXT: ;;#ASMEND 8900; GFX940-NEXT: s_mov_b32 s8, s3 8901; GFX940-NEXT: s_mov_b32 s9, s0 8902; GFX940-NEXT: s_mov_b32 s10, s0 8903; GFX940-NEXT: ;;#ASMSTART 8904; GFX940-NEXT: ; use s[8:10] 8905; GFX940-NEXT: ;;#ASMEND 8906; GFX940-NEXT: s_setpc_b64 s[30:31] 8907 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8908 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 0, i32 0> 8909 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8910 ret void 8911} 8912 8913define void @s_shuffle_v3i32_v4i32__4_0_0() { 8914; GFX900-LABEL: s_shuffle_v3i32_v4i32__4_0_0: 8915; GFX900: ; %bb.0: 8916; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8917; GFX900-NEXT: ;;#ASMSTART 8918; GFX900-NEXT: ; def s[4:7] 8919; GFX900-NEXT: ;;#ASMEND 8920; GFX900-NEXT: s_mov_b32 s9, s4 8921; GFX900-NEXT: s_mov_b32 s10, s4 8922; GFX900-NEXT: ;;#ASMSTART 8923; GFX900-NEXT: ; use s[8:10] 8924; GFX900-NEXT: ;;#ASMEND 8925; GFX900-NEXT: s_setpc_b64 s[30:31] 8926; 8927; GFX90A-LABEL: s_shuffle_v3i32_v4i32__4_0_0: 8928; GFX90A: ; %bb.0: 8929; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8930; GFX90A-NEXT: ;;#ASMSTART 8931; GFX90A-NEXT: ; def s[4:7] 8932; GFX90A-NEXT: ;;#ASMEND 8933; GFX90A-NEXT: s_mov_b32 s9, s4 8934; GFX90A-NEXT: s_mov_b32 s10, s4 8935; GFX90A-NEXT: ;;#ASMSTART 8936; GFX90A-NEXT: ; use s[8:10] 8937; GFX90A-NEXT: ;;#ASMEND 8938; GFX90A-NEXT: s_setpc_b64 s[30:31] 8939; 8940; GFX940-LABEL: s_shuffle_v3i32_v4i32__4_0_0: 8941; GFX940: ; %bb.0: 8942; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8943; GFX940-NEXT: ;;#ASMSTART 8944; GFX940-NEXT: ; def s[0:3] 8945; GFX940-NEXT: ;;#ASMEND 8946; GFX940-NEXT: s_mov_b32 s9, s0 8947; GFX940-NEXT: s_mov_b32 s10, s0 8948; GFX940-NEXT: ;;#ASMSTART 8949; GFX940-NEXT: ; use s[8:10] 8950; GFX940-NEXT: ;;#ASMEND 8951; GFX940-NEXT: s_setpc_b64 s[30:31] 8952 %vec0 = call <4 x i32> asm "; def $0", "=s"() 8953 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 0, i32 0> 8954 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 8955 ret void 8956} 8957 8958define void @s_shuffle_v3i32_v4i32__5_0_0() { 8959; GFX900-LABEL: s_shuffle_v3i32_v4i32__5_0_0: 8960; GFX900: ; %bb.0: 8961; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8962; GFX900-NEXT: ;;#ASMSTART 8963; GFX900-NEXT: ; def s[8:11] 8964; GFX900-NEXT: ;;#ASMEND 8965; GFX900-NEXT: ;;#ASMSTART 8966; GFX900-NEXT: ; def s[4:7] 8967; GFX900-NEXT: ;;#ASMEND 8968; GFX900-NEXT: s_mov_b32 s8, s9 8969; GFX900-NEXT: s_mov_b32 s9, s4 8970; GFX900-NEXT: s_mov_b32 s10, s4 8971; GFX900-NEXT: ;;#ASMSTART 8972; GFX900-NEXT: ; use s[8:10] 8973; GFX900-NEXT: ;;#ASMEND 8974; GFX900-NEXT: s_setpc_b64 s[30:31] 8975; 8976; GFX90A-LABEL: s_shuffle_v3i32_v4i32__5_0_0: 8977; GFX90A: ; %bb.0: 8978; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8979; GFX90A-NEXT: ;;#ASMSTART 8980; GFX90A-NEXT: ; def s[8:11] 8981; GFX90A-NEXT: ;;#ASMEND 8982; GFX90A-NEXT: ;;#ASMSTART 8983; GFX90A-NEXT: ; def s[4:7] 8984; GFX90A-NEXT: ;;#ASMEND 8985; GFX90A-NEXT: s_mov_b32 s8, s9 8986; GFX90A-NEXT: s_mov_b32 s9, s4 8987; GFX90A-NEXT: s_mov_b32 s10, s4 8988; GFX90A-NEXT: ;;#ASMSTART 8989; GFX90A-NEXT: ; use s[8:10] 8990; GFX90A-NEXT: ;;#ASMEND 8991; GFX90A-NEXT: s_setpc_b64 s[30:31] 8992; 8993; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_0_0: 8994; GFX940: ; %bb.0: 8995; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8996; GFX940-NEXT: ;;#ASMSTART 8997; GFX940-NEXT: ; def s[0:3] 8998; GFX940-NEXT: ;;#ASMEND 8999; GFX940-NEXT: ;;#ASMSTART 9000; GFX940-NEXT: ; def s[4:7] 9001; GFX940-NEXT: ;;#ASMEND 9002; GFX940-NEXT: s_mov_b32 s8, s5 9003; GFX940-NEXT: s_mov_b32 s9, s0 9004; GFX940-NEXT: s_mov_b32 s10, s0 9005; GFX940-NEXT: ;;#ASMSTART 9006; GFX940-NEXT: ; use s[8:10] 9007; GFX940-NEXT: ;;#ASMEND 9008; GFX940-NEXT: s_setpc_b64 s[30:31] 9009 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9010 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9011 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 0, i32 0> 9012 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9013 ret void 9014} 9015 9016define void @s_shuffle_v3i32_v4i32__6_0_0() { 9017; GFX900-LABEL: s_shuffle_v3i32_v4i32__6_0_0: 9018; GFX900: ; %bb.0: 9019; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9020; GFX900-NEXT: ;;#ASMSTART 9021; GFX900-NEXT: ; def s[8:11] 9022; GFX900-NEXT: ;;#ASMEND 9023; GFX900-NEXT: ;;#ASMSTART 9024; GFX900-NEXT: ; def s[4:7] 9025; GFX900-NEXT: ;;#ASMEND 9026; GFX900-NEXT: s_mov_b32 s8, s10 9027; GFX900-NEXT: s_mov_b32 s9, s4 9028; GFX900-NEXT: s_mov_b32 s10, s4 9029; GFX900-NEXT: ;;#ASMSTART 9030; GFX900-NEXT: ; use s[8:10] 9031; GFX900-NEXT: ;;#ASMEND 9032; GFX900-NEXT: s_setpc_b64 s[30:31] 9033; 9034; GFX90A-LABEL: s_shuffle_v3i32_v4i32__6_0_0: 9035; GFX90A: ; %bb.0: 9036; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9037; GFX90A-NEXT: ;;#ASMSTART 9038; GFX90A-NEXT: ; def s[8:11] 9039; GFX90A-NEXT: ;;#ASMEND 9040; GFX90A-NEXT: ;;#ASMSTART 9041; GFX90A-NEXT: ; def s[4:7] 9042; GFX90A-NEXT: ;;#ASMEND 9043; GFX90A-NEXT: s_mov_b32 s8, s10 9044; GFX90A-NEXT: s_mov_b32 s9, s4 9045; GFX90A-NEXT: s_mov_b32 s10, s4 9046; GFX90A-NEXT: ;;#ASMSTART 9047; GFX90A-NEXT: ; use s[8:10] 9048; GFX90A-NEXT: ;;#ASMEND 9049; GFX90A-NEXT: s_setpc_b64 s[30:31] 9050; 9051; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_0_0: 9052; GFX940: ; %bb.0: 9053; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9054; GFX940-NEXT: ;;#ASMSTART 9055; GFX940-NEXT: ; def s[0:3] 9056; GFX940-NEXT: ;;#ASMEND 9057; GFX940-NEXT: ;;#ASMSTART 9058; GFX940-NEXT: ; def s[4:7] 9059; GFX940-NEXT: ;;#ASMEND 9060; GFX940-NEXT: s_mov_b32 s8, s6 9061; GFX940-NEXT: s_mov_b32 s9, s0 9062; GFX940-NEXT: s_mov_b32 s10, s0 9063; GFX940-NEXT: ;;#ASMSTART 9064; GFX940-NEXT: ; use s[8:10] 9065; GFX940-NEXT: ;;#ASMEND 9066; GFX940-NEXT: s_setpc_b64 s[30:31] 9067 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9068 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9069 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 0, i32 0> 9070 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9071 ret void 9072} 9073 9074define void @s_shuffle_v3i32_v4i32__7_0_0() { 9075; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_0_0: 9076; GFX900: ; %bb.0: 9077; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9078; GFX900-NEXT: ;;#ASMSTART 9079; GFX900-NEXT: ; def s[8:11] 9080; GFX900-NEXT: ;;#ASMEND 9081; GFX900-NEXT: ;;#ASMSTART 9082; GFX900-NEXT: ; def s[4:7] 9083; GFX900-NEXT: ;;#ASMEND 9084; GFX900-NEXT: s_mov_b32 s8, s11 9085; GFX900-NEXT: s_mov_b32 s9, s4 9086; GFX900-NEXT: s_mov_b32 s10, s4 9087; GFX900-NEXT: ;;#ASMSTART 9088; GFX900-NEXT: ; use s[8:10] 9089; GFX900-NEXT: ;;#ASMEND 9090; GFX900-NEXT: s_setpc_b64 s[30:31] 9091; 9092; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_0_0: 9093; GFX90A: ; %bb.0: 9094; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9095; GFX90A-NEXT: ;;#ASMSTART 9096; GFX90A-NEXT: ; def s[8:11] 9097; GFX90A-NEXT: ;;#ASMEND 9098; GFX90A-NEXT: ;;#ASMSTART 9099; GFX90A-NEXT: ; def s[4:7] 9100; GFX90A-NEXT: ;;#ASMEND 9101; GFX90A-NEXT: s_mov_b32 s8, s11 9102; GFX90A-NEXT: s_mov_b32 s9, s4 9103; GFX90A-NEXT: s_mov_b32 s10, s4 9104; GFX90A-NEXT: ;;#ASMSTART 9105; GFX90A-NEXT: ; use s[8:10] 9106; GFX90A-NEXT: ;;#ASMEND 9107; GFX90A-NEXT: s_setpc_b64 s[30:31] 9108; 9109; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_0: 9110; GFX940: ; %bb.0: 9111; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9112; GFX940-NEXT: ;;#ASMSTART 9113; GFX940-NEXT: ; def s[0:3] 9114; GFX940-NEXT: ;;#ASMEND 9115; GFX940-NEXT: ;;#ASMSTART 9116; GFX940-NEXT: ; def s[4:7] 9117; GFX940-NEXT: ;;#ASMEND 9118; GFX940-NEXT: s_mov_b32 s8, s7 9119; GFX940-NEXT: s_mov_b32 s9, s0 9120; GFX940-NEXT: s_mov_b32 s10, s0 9121; GFX940-NEXT: ;;#ASMSTART 9122; GFX940-NEXT: ; use s[8:10] 9123; GFX940-NEXT: ;;#ASMEND 9124; GFX940-NEXT: s_setpc_b64 s[30:31] 9125 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9126 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9127 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 0> 9128 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9129 ret void 9130} 9131 9132define void @s_shuffle_v3i32_v4i32__7_u_0() { 9133; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_0: 9134; GFX900: ; %bb.0: 9135; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9136; GFX900-NEXT: ;;#ASMSTART 9137; GFX900-NEXT: ; def s[8:11] 9138; GFX900-NEXT: ;;#ASMEND 9139; GFX900-NEXT: ;;#ASMSTART 9140; GFX900-NEXT: ; def s[4:7] 9141; GFX900-NEXT: ;;#ASMEND 9142; GFX900-NEXT: s_mov_b32 s8, s11 9143; GFX900-NEXT: s_mov_b32 s10, s4 9144; GFX900-NEXT: ;;#ASMSTART 9145; GFX900-NEXT: ; use s[8:10] 9146; GFX900-NEXT: ;;#ASMEND 9147; GFX900-NEXT: s_setpc_b64 s[30:31] 9148; 9149; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_0: 9150; GFX90A: ; %bb.0: 9151; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9152; GFX90A-NEXT: ;;#ASMSTART 9153; GFX90A-NEXT: ; def s[8:11] 9154; GFX90A-NEXT: ;;#ASMEND 9155; GFX90A-NEXT: ;;#ASMSTART 9156; GFX90A-NEXT: ; def s[4:7] 9157; GFX90A-NEXT: ;;#ASMEND 9158; GFX90A-NEXT: s_mov_b32 s8, s11 9159; GFX90A-NEXT: s_mov_b32 s10, s4 9160; GFX90A-NEXT: ;;#ASMSTART 9161; GFX90A-NEXT: ; use s[8:10] 9162; GFX90A-NEXT: ;;#ASMEND 9163; GFX90A-NEXT: s_setpc_b64 s[30:31] 9164; 9165; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_0: 9166; GFX940: ; %bb.0: 9167; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9168; GFX940-NEXT: ;;#ASMSTART 9169; GFX940-NEXT: ; def s[0:3] 9170; GFX940-NEXT: ;;#ASMEND 9171; GFX940-NEXT: ;;#ASMSTART 9172; GFX940-NEXT: ; def s[4:7] 9173; GFX940-NEXT: ;;#ASMEND 9174; GFX940-NEXT: s_mov_b32 s8, s7 9175; GFX940-NEXT: s_mov_b32 s10, s0 9176; GFX940-NEXT: ;;#ASMSTART 9177; GFX940-NEXT: ; use s[8:10] 9178; GFX940-NEXT: ;;#ASMEND 9179; GFX940-NEXT: s_setpc_b64 s[30:31] 9180 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9181 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9182 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 0> 9183 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9184 ret void 9185} 9186 9187define void @s_shuffle_v3i32_v4i32__7_1_0() { 9188; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_1_0: 9189; GFX900: ; %bb.0: 9190; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9191; GFX900-NEXT: ;;#ASMSTART 9192; GFX900-NEXT: ; def s[8:11] 9193; GFX900-NEXT: ;;#ASMEND 9194; GFX900-NEXT: ;;#ASMSTART 9195; GFX900-NEXT: ; def s[4:7] 9196; GFX900-NEXT: ;;#ASMEND 9197; GFX900-NEXT: s_mov_b32 s8, s11 9198; GFX900-NEXT: s_mov_b32 s9, s5 9199; GFX900-NEXT: s_mov_b32 s10, s4 9200; GFX900-NEXT: ;;#ASMSTART 9201; GFX900-NEXT: ; use s[8:10] 9202; GFX900-NEXT: ;;#ASMEND 9203; GFX900-NEXT: s_setpc_b64 s[30:31] 9204; 9205; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_1_0: 9206; GFX90A: ; %bb.0: 9207; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9208; GFX90A-NEXT: ;;#ASMSTART 9209; GFX90A-NEXT: ; def s[8:11] 9210; GFX90A-NEXT: ;;#ASMEND 9211; GFX90A-NEXT: ;;#ASMSTART 9212; GFX90A-NEXT: ; def s[4:7] 9213; GFX90A-NEXT: ;;#ASMEND 9214; GFX90A-NEXT: s_mov_b32 s8, s11 9215; GFX90A-NEXT: s_mov_b32 s9, s5 9216; GFX90A-NEXT: s_mov_b32 s10, s4 9217; GFX90A-NEXT: ;;#ASMSTART 9218; GFX90A-NEXT: ; use s[8:10] 9219; GFX90A-NEXT: ;;#ASMEND 9220; GFX90A-NEXT: s_setpc_b64 s[30:31] 9221; 9222; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_0: 9223; GFX940: ; %bb.0: 9224; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9225; GFX940-NEXT: ;;#ASMSTART 9226; GFX940-NEXT: ; def s[0:3] 9227; GFX940-NEXT: ;;#ASMEND 9228; GFX940-NEXT: ;;#ASMSTART 9229; GFX940-NEXT: ; def s[4:7] 9230; GFX940-NEXT: ;;#ASMEND 9231; GFX940-NEXT: s_mov_b32 s8, s7 9232; GFX940-NEXT: s_mov_b32 s9, s1 9233; GFX940-NEXT: s_mov_b32 s10, s0 9234; GFX940-NEXT: ;;#ASMSTART 9235; GFX940-NEXT: ; use s[8:10] 9236; GFX940-NEXT: ;;#ASMEND 9237; GFX940-NEXT: s_setpc_b64 s[30:31] 9238 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9239 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9240 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 0> 9241 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9242 ret void 9243} 9244 9245define void @s_shuffle_v3i32_v4i32__7_2_0() { 9246; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_2_0: 9247; GFX900: ; %bb.0: 9248; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9249; GFX900-NEXT: ;;#ASMSTART 9250; GFX900-NEXT: ; def s[8:11] 9251; GFX900-NEXT: ;;#ASMEND 9252; GFX900-NEXT: ;;#ASMSTART 9253; GFX900-NEXT: ; def s[4:7] 9254; GFX900-NEXT: ;;#ASMEND 9255; GFX900-NEXT: s_mov_b32 s8, s11 9256; GFX900-NEXT: s_mov_b32 s9, s6 9257; GFX900-NEXT: s_mov_b32 s10, s4 9258; GFX900-NEXT: ;;#ASMSTART 9259; GFX900-NEXT: ; use s[8:10] 9260; GFX900-NEXT: ;;#ASMEND 9261; GFX900-NEXT: s_setpc_b64 s[30:31] 9262; 9263; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_2_0: 9264; GFX90A: ; %bb.0: 9265; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9266; GFX90A-NEXT: ;;#ASMSTART 9267; GFX90A-NEXT: ; def s[8:11] 9268; GFX90A-NEXT: ;;#ASMEND 9269; GFX90A-NEXT: ;;#ASMSTART 9270; GFX90A-NEXT: ; def s[4:7] 9271; GFX90A-NEXT: ;;#ASMEND 9272; GFX90A-NEXT: s_mov_b32 s8, s11 9273; GFX90A-NEXT: s_mov_b32 s9, s6 9274; GFX90A-NEXT: s_mov_b32 s10, s4 9275; GFX90A-NEXT: ;;#ASMSTART 9276; GFX90A-NEXT: ; use s[8:10] 9277; GFX90A-NEXT: ;;#ASMEND 9278; GFX90A-NEXT: s_setpc_b64 s[30:31] 9279; 9280; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_0: 9281; GFX940: ; %bb.0: 9282; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9283; GFX940-NEXT: ;;#ASMSTART 9284; GFX940-NEXT: ; def s[0:3] 9285; GFX940-NEXT: ;;#ASMEND 9286; GFX940-NEXT: ;;#ASMSTART 9287; GFX940-NEXT: ; def s[4:7] 9288; GFX940-NEXT: ;;#ASMEND 9289; GFX940-NEXT: s_mov_b32 s8, s7 9290; GFX940-NEXT: s_mov_b32 s9, s2 9291; GFX940-NEXT: s_mov_b32 s10, s0 9292; GFX940-NEXT: ;;#ASMSTART 9293; GFX940-NEXT: ; use s[8:10] 9294; GFX940-NEXT: ;;#ASMEND 9295; GFX940-NEXT: s_setpc_b64 s[30:31] 9296 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9297 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9298 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 0> 9299 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9300 ret void 9301} 9302 9303define void @s_shuffle_v3i32_v4i32__7_3_0() { 9304; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_3_0: 9305; GFX900: ; %bb.0: 9306; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9307; GFX900-NEXT: ;;#ASMSTART 9308; GFX900-NEXT: ; def s[8:11] 9309; GFX900-NEXT: ;;#ASMEND 9310; GFX900-NEXT: ;;#ASMSTART 9311; GFX900-NEXT: ; def s[4:7] 9312; GFX900-NEXT: ;;#ASMEND 9313; GFX900-NEXT: s_mov_b32 s8, s11 9314; GFX900-NEXT: s_mov_b32 s9, s7 9315; GFX900-NEXT: s_mov_b32 s10, s4 9316; GFX900-NEXT: ;;#ASMSTART 9317; GFX900-NEXT: ; use s[8:10] 9318; GFX900-NEXT: ;;#ASMEND 9319; GFX900-NEXT: s_setpc_b64 s[30:31] 9320; 9321; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_3_0: 9322; GFX90A: ; %bb.0: 9323; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9324; GFX90A-NEXT: ;;#ASMSTART 9325; GFX90A-NEXT: ; def s[8:11] 9326; GFX90A-NEXT: ;;#ASMEND 9327; GFX90A-NEXT: ;;#ASMSTART 9328; GFX90A-NEXT: ; def s[4:7] 9329; GFX90A-NEXT: ;;#ASMEND 9330; GFX90A-NEXT: s_mov_b32 s8, s11 9331; GFX90A-NEXT: s_mov_b32 s9, s7 9332; GFX90A-NEXT: s_mov_b32 s10, s4 9333; GFX90A-NEXT: ;;#ASMSTART 9334; GFX90A-NEXT: ; use s[8:10] 9335; GFX90A-NEXT: ;;#ASMEND 9336; GFX90A-NEXT: s_setpc_b64 s[30:31] 9337; 9338; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_0: 9339; GFX940: ; %bb.0: 9340; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9341; GFX940-NEXT: ;;#ASMSTART 9342; GFX940-NEXT: ; def s[0:3] 9343; GFX940-NEXT: ;;#ASMEND 9344; GFX940-NEXT: ;;#ASMSTART 9345; GFX940-NEXT: ; def s[4:7] 9346; GFX940-NEXT: ;;#ASMEND 9347; GFX940-NEXT: s_mov_b32 s8, s7 9348; GFX940-NEXT: s_mov_b32 s9, s3 9349; GFX940-NEXT: s_mov_b32 s10, s0 9350; GFX940-NEXT: ;;#ASMSTART 9351; GFX940-NEXT: ; use s[8:10] 9352; GFX940-NEXT: ;;#ASMEND 9353; GFX940-NEXT: s_setpc_b64 s[30:31] 9354 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9355 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9356 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 0> 9357 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9358 ret void 9359} 9360 9361define void @s_shuffle_v3i32_v4i32__7_4_0() { 9362; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_4_0: 9363; GFX900: ; %bb.0: 9364; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9365; GFX900-NEXT: ;;#ASMSTART 9366; GFX900-NEXT: ; def s[4:7] 9367; GFX900-NEXT: ;;#ASMEND 9368; GFX900-NEXT: ;;#ASMSTART 9369; GFX900-NEXT: ; def s[12:15] 9370; GFX900-NEXT: ;;#ASMEND 9371; GFX900-NEXT: s_mov_b32 s8, s15 9372; GFX900-NEXT: s_mov_b32 s9, s12 9373; GFX900-NEXT: s_mov_b32 s10, s4 9374; GFX900-NEXT: ;;#ASMSTART 9375; GFX900-NEXT: ; use s[8:10] 9376; GFX900-NEXT: ;;#ASMEND 9377; GFX900-NEXT: s_setpc_b64 s[30:31] 9378; 9379; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_4_0: 9380; GFX90A: ; %bb.0: 9381; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9382; GFX90A-NEXT: ;;#ASMSTART 9383; GFX90A-NEXT: ; def s[4:7] 9384; GFX90A-NEXT: ;;#ASMEND 9385; GFX90A-NEXT: ;;#ASMSTART 9386; GFX90A-NEXT: ; def s[12:15] 9387; GFX90A-NEXT: ;;#ASMEND 9388; GFX90A-NEXT: s_mov_b32 s8, s15 9389; GFX90A-NEXT: s_mov_b32 s9, s12 9390; GFX90A-NEXT: s_mov_b32 s10, s4 9391; GFX90A-NEXT: ;;#ASMSTART 9392; GFX90A-NEXT: ; use s[8:10] 9393; GFX90A-NEXT: ;;#ASMEND 9394; GFX90A-NEXT: s_setpc_b64 s[30:31] 9395; 9396; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_0: 9397; GFX940: ; %bb.0: 9398; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9399; GFX940-NEXT: ;;#ASMSTART 9400; GFX940-NEXT: ; def s[0:3] 9401; GFX940-NEXT: ;;#ASMEND 9402; GFX940-NEXT: ;;#ASMSTART 9403; GFX940-NEXT: ; def s[4:7] 9404; GFX940-NEXT: ;;#ASMEND 9405; GFX940-NEXT: s_mov_b32 s8, s7 9406; GFX940-NEXT: s_mov_b32 s9, s4 9407; GFX940-NEXT: s_mov_b32 s10, s0 9408; GFX940-NEXT: ;;#ASMSTART 9409; GFX940-NEXT: ; use s[8:10] 9410; GFX940-NEXT: ;;#ASMEND 9411; GFX940-NEXT: s_setpc_b64 s[30:31] 9412 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9413 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9414 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 0> 9415 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9416 ret void 9417} 9418 9419define void @s_shuffle_v3i32_v4i32__7_5_0() { 9420; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_5_0: 9421; GFX900: ; %bb.0: 9422; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9423; GFX900-NEXT: ;;#ASMSTART 9424; GFX900-NEXT: ; def s[8:11] 9425; GFX900-NEXT: ;;#ASMEND 9426; GFX900-NEXT: ;;#ASMSTART 9427; GFX900-NEXT: ; def s[4:7] 9428; GFX900-NEXT: ;;#ASMEND 9429; GFX900-NEXT: s_mov_b32 s8, s11 9430; GFX900-NEXT: s_mov_b32 s10, s4 9431; GFX900-NEXT: ;;#ASMSTART 9432; GFX900-NEXT: ; use s[8:10] 9433; GFX900-NEXT: ;;#ASMEND 9434; GFX900-NEXT: s_setpc_b64 s[30:31] 9435; 9436; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_5_0: 9437; GFX90A: ; %bb.0: 9438; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9439; GFX90A-NEXT: ;;#ASMSTART 9440; GFX90A-NEXT: ; def s[8:11] 9441; GFX90A-NEXT: ;;#ASMEND 9442; GFX90A-NEXT: ;;#ASMSTART 9443; GFX90A-NEXT: ; def s[4:7] 9444; GFX90A-NEXT: ;;#ASMEND 9445; GFX90A-NEXT: s_mov_b32 s8, s11 9446; GFX90A-NEXT: s_mov_b32 s10, s4 9447; GFX90A-NEXT: ;;#ASMSTART 9448; GFX90A-NEXT: ; use s[8:10] 9449; GFX90A-NEXT: ;;#ASMEND 9450; GFX90A-NEXT: s_setpc_b64 s[30:31] 9451; 9452; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_5_0: 9453; GFX940: ; %bb.0: 9454; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9455; GFX940-NEXT: ;;#ASMSTART 9456; GFX940-NEXT: ; def s[8:11] 9457; GFX940-NEXT: ;;#ASMEND 9458; GFX940-NEXT: ;;#ASMSTART 9459; GFX940-NEXT: ; def s[0:3] 9460; GFX940-NEXT: ;;#ASMEND 9461; GFX940-NEXT: s_mov_b32 s8, s11 9462; GFX940-NEXT: s_mov_b32 s10, s0 9463; GFX940-NEXT: ;;#ASMSTART 9464; GFX940-NEXT: ; use s[8:10] 9465; GFX940-NEXT: ;;#ASMEND 9466; GFX940-NEXT: s_setpc_b64 s[30:31] 9467 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9468 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9469 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 0> 9470 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9471 ret void 9472} 9473 9474define void @s_shuffle_v3i32_v4i32__7_6_0() { 9475; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_6_0: 9476; GFX900: ; %bb.0: 9477; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9478; GFX900-NEXT: ;;#ASMSTART 9479; GFX900-NEXT: ; def s[8:11] 9480; GFX900-NEXT: ;;#ASMEND 9481; GFX900-NEXT: ;;#ASMSTART 9482; GFX900-NEXT: ; def s[4:7] 9483; GFX900-NEXT: ;;#ASMEND 9484; GFX900-NEXT: s_mov_b32 s8, s11 9485; GFX900-NEXT: s_mov_b32 s9, s10 9486; GFX900-NEXT: s_mov_b32 s10, s4 9487; GFX900-NEXT: ;;#ASMSTART 9488; GFX900-NEXT: ; use s[8:10] 9489; GFX900-NEXT: ;;#ASMEND 9490; GFX900-NEXT: s_setpc_b64 s[30:31] 9491; 9492; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_6_0: 9493; GFX90A: ; %bb.0: 9494; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9495; GFX90A-NEXT: ;;#ASMSTART 9496; GFX90A-NEXT: ; def s[8:11] 9497; GFX90A-NEXT: ;;#ASMEND 9498; GFX90A-NEXT: ;;#ASMSTART 9499; GFX90A-NEXT: ; def s[4:7] 9500; GFX90A-NEXT: ;;#ASMEND 9501; GFX90A-NEXT: s_mov_b32 s8, s11 9502; GFX90A-NEXT: s_mov_b32 s9, s10 9503; GFX90A-NEXT: s_mov_b32 s10, s4 9504; GFX90A-NEXT: ;;#ASMSTART 9505; GFX90A-NEXT: ; use s[8:10] 9506; GFX90A-NEXT: ;;#ASMEND 9507; GFX90A-NEXT: s_setpc_b64 s[30:31] 9508; 9509; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_0: 9510; GFX940: ; %bb.0: 9511; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9512; GFX940-NEXT: ;;#ASMSTART 9513; GFX940-NEXT: ; def s[0:3] 9514; GFX940-NEXT: ;;#ASMEND 9515; GFX940-NEXT: ;;#ASMSTART 9516; GFX940-NEXT: ; def s[4:7] 9517; GFX940-NEXT: ;;#ASMEND 9518; GFX940-NEXT: s_mov_b32 s8, s7 9519; GFX940-NEXT: s_mov_b32 s9, s6 9520; GFX940-NEXT: s_mov_b32 s10, s0 9521; GFX940-NEXT: ;;#ASMSTART 9522; GFX940-NEXT: ; use s[8:10] 9523; GFX940-NEXT: ;;#ASMEND 9524; GFX940-NEXT: s_setpc_b64 s[30:31] 9525 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9526 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9527 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 0> 9528 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9529 ret void 9530} 9531 9532define void @s_shuffle_v3i32_v4i32__u_1_1() { 9533; GFX9-LABEL: s_shuffle_v3i32_v4i32__u_1_1: 9534; GFX9: ; %bb.0: 9535; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9536; GFX9-NEXT: ;;#ASMSTART 9537; GFX9-NEXT: ; def s[8:11] 9538; GFX9-NEXT: ;;#ASMEND 9539; GFX9-NEXT: s_mov_b32 s10, s9 9540; GFX9-NEXT: ;;#ASMSTART 9541; GFX9-NEXT: ; use s[8:10] 9542; GFX9-NEXT: ;;#ASMEND 9543; GFX9-NEXT: s_setpc_b64 s[30:31] 9544 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9545 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 poison, i32 1, i32 1> 9546 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9547 ret void 9548} 9549 9550define void @s_shuffle_v3i32_v4i32__0_1_1() { 9551; GFX9-LABEL: s_shuffle_v3i32_v4i32__0_1_1: 9552; GFX9: ; %bb.0: 9553; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9554; GFX9-NEXT: ;;#ASMSTART 9555; GFX9-NEXT: ; def s[8:11] 9556; GFX9-NEXT: ;;#ASMEND 9557; GFX9-NEXT: s_mov_b32 s10, s9 9558; GFX9-NEXT: ;;#ASMSTART 9559; GFX9-NEXT: ; use s[8:10] 9560; GFX9-NEXT: ;;#ASMEND 9561; GFX9-NEXT: s_setpc_b64 s[30:31] 9562 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9563 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 1> 9564 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9565 ret void 9566} 9567 9568define void @s_shuffle_v3i32_v4i32__1_1_1() { 9569; GFX9-LABEL: s_shuffle_v3i32_v4i32__1_1_1: 9570; GFX9: ; %bb.0: 9571; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9572; GFX9-NEXT: ;;#ASMSTART 9573; GFX9-NEXT: ; def s[8:11] 9574; GFX9-NEXT: ;;#ASMEND 9575; GFX9-NEXT: s_mov_b32 s8, s9 9576; GFX9-NEXT: s_mov_b32 s10, s9 9577; GFX9-NEXT: ;;#ASMSTART 9578; GFX9-NEXT: ; use s[8:10] 9579; GFX9-NEXT: ;;#ASMEND 9580; GFX9-NEXT: s_setpc_b64 s[30:31] 9581 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9582 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 1, i32 1> 9583 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9584 ret void 9585} 9586 9587define void @s_shuffle_v3i32_v4i32__2_1_1() { 9588; GFX9-LABEL: s_shuffle_v3i32_v4i32__2_1_1: 9589; GFX9: ; %bb.0: 9590; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9591; GFX9-NEXT: ;;#ASMSTART 9592; GFX9-NEXT: ; def s[8:11] 9593; GFX9-NEXT: ;;#ASMEND 9594; GFX9-NEXT: s_mov_b32 s8, s10 9595; GFX9-NEXT: s_mov_b32 s10, s9 9596; GFX9-NEXT: ;;#ASMSTART 9597; GFX9-NEXT: ; use s[8:10] 9598; GFX9-NEXT: ;;#ASMEND 9599; GFX9-NEXT: s_setpc_b64 s[30:31] 9600 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9601 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 1, i32 1> 9602 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9603 ret void 9604} 9605 9606define void @s_shuffle_v3i32_v4i32__3_1_1() { 9607; GFX9-LABEL: s_shuffle_v3i32_v4i32__3_1_1: 9608; GFX9: ; %bb.0: 9609; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9610; GFX9-NEXT: ;;#ASMSTART 9611; GFX9-NEXT: ; def s[8:11] 9612; GFX9-NEXT: ;;#ASMEND 9613; GFX9-NEXT: s_mov_b32 s8, s11 9614; GFX9-NEXT: s_mov_b32 s10, s9 9615; GFX9-NEXT: ;;#ASMSTART 9616; GFX9-NEXT: ; use s[8:10] 9617; GFX9-NEXT: ;;#ASMEND 9618; GFX9-NEXT: s_setpc_b64 s[30:31] 9619 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9620 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 1, i32 1> 9621 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9622 ret void 9623} 9624 9625define void @s_shuffle_v3i32_v4i32__4_1_1() { 9626; GFX9-LABEL: s_shuffle_v3i32_v4i32__4_1_1: 9627; GFX9: ; %bb.0: 9628; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9629; GFX9-NEXT: ;;#ASMSTART 9630; GFX9-NEXT: ; def s[8:11] 9631; GFX9-NEXT: ;;#ASMEND 9632; GFX9-NEXT: s_mov_b32 s10, s9 9633; GFX9-NEXT: ;;#ASMSTART 9634; GFX9-NEXT: ; use s[8:10] 9635; GFX9-NEXT: ;;#ASMEND 9636; GFX9-NEXT: s_setpc_b64 s[30:31] 9637 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9638 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 1, i32 1> 9639 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9640 ret void 9641} 9642 9643define void @s_shuffle_v3i32_v4i32__5_1_1() { 9644; GFX900-LABEL: s_shuffle_v3i32_v4i32__5_1_1: 9645; GFX900: ; %bb.0: 9646; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9647; GFX900-NEXT: ;;#ASMSTART 9648; GFX900-NEXT: ; def s[8:11] 9649; GFX900-NEXT: ;;#ASMEND 9650; GFX900-NEXT: ;;#ASMSTART 9651; GFX900-NEXT: ; def s[4:7] 9652; GFX900-NEXT: ;;#ASMEND 9653; GFX900-NEXT: s_mov_b32 s8, s5 9654; GFX900-NEXT: s_mov_b32 s10, s9 9655; GFX900-NEXT: ;;#ASMSTART 9656; GFX900-NEXT: ; use s[8:10] 9657; GFX900-NEXT: ;;#ASMEND 9658; GFX900-NEXT: s_setpc_b64 s[30:31] 9659; 9660; GFX90A-LABEL: s_shuffle_v3i32_v4i32__5_1_1: 9661; GFX90A: ; %bb.0: 9662; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9663; GFX90A-NEXT: ;;#ASMSTART 9664; GFX90A-NEXT: ; def s[8:11] 9665; GFX90A-NEXT: ;;#ASMEND 9666; GFX90A-NEXT: ;;#ASMSTART 9667; GFX90A-NEXT: ; def s[4:7] 9668; GFX90A-NEXT: ;;#ASMEND 9669; GFX90A-NEXT: s_mov_b32 s8, s5 9670; GFX90A-NEXT: s_mov_b32 s10, s9 9671; GFX90A-NEXT: ;;#ASMSTART 9672; GFX90A-NEXT: ; use s[8:10] 9673; GFX90A-NEXT: ;;#ASMEND 9674; GFX90A-NEXT: s_setpc_b64 s[30:31] 9675; 9676; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_1_1: 9677; GFX940: ; %bb.0: 9678; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9679; GFX940-NEXT: ;;#ASMSTART 9680; GFX940-NEXT: ; def s[8:11] 9681; GFX940-NEXT: ;;#ASMEND 9682; GFX940-NEXT: ;;#ASMSTART 9683; GFX940-NEXT: ; def s[0:3] 9684; GFX940-NEXT: ;;#ASMEND 9685; GFX940-NEXT: s_mov_b32 s8, s1 9686; GFX940-NEXT: s_mov_b32 s10, s9 9687; GFX940-NEXT: ;;#ASMSTART 9688; GFX940-NEXT: ; use s[8:10] 9689; GFX940-NEXT: ;;#ASMEND 9690; GFX940-NEXT: s_setpc_b64 s[30:31] 9691 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9692 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9693 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 1, i32 1> 9694 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9695 ret void 9696} 9697 9698define void @s_shuffle_v3i32_v4i32__6_1_1() { 9699; GFX900-LABEL: s_shuffle_v3i32_v4i32__6_1_1: 9700; GFX900: ; %bb.0: 9701; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9702; GFX900-NEXT: ;;#ASMSTART 9703; GFX900-NEXT: ; def s[8:11] 9704; GFX900-NEXT: ;;#ASMEND 9705; GFX900-NEXT: ;;#ASMSTART 9706; GFX900-NEXT: ; def s[4:7] 9707; GFX900-NEXT: ;;#ASMEND 9708; GFX900-NEXT: s_mov_b32 s8, s6 9709; GFX900-NEXT: s_mov_b32 s10, s9 9710; GFX900-NEXT: ;;#ASMSTART 9711; GFX900-NEXT: ; use s[8:10] 9712; GFX900-NEXT: ;;#ASMEND 9713; GFX900-NEXT: s_setpc_b64 s[30:31] 9714; 9715; GFX90A-LABEL: s_shuffle_v3i32_v4i32__6_1_1: 9716; GFX90A: ; %bb.0: 9717; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9718; GFX90A-NEXT: ;;#ASMSTART 9719; GFX90A-NEXT: ; def s[8:11] 9720; GFX90A-NEXT: ;;#ASMEND 9721; GFX90A-NEXT: ;;#ASMSTART 9722; GFX90A-NEXT: ; def s[4:7] 9723; GFX90A-NEXT: ;;#ASMEND 9724; GFX90A-NEXT: s_mov_b32 s8, s6 9725; GFX90A-NEXT: s_mov_b32 s10, s9 9726; GFX90A-NEXT: ;;#ASMSTART 9727; GFX90A-NEXT: ; use s[8:10] 9728; GFX90A-NEXT: ;;#ASMEND 9729; GFX90A-NEXT: s_setpc_b64 s[30:31] 9730; 9731; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_1_1: 9732; GFX940: ; %bb.0: 9733; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9734; GFX940-NEXT: ;;#ASMSTART 9735; GFX940-NEXT: ; def s[8:11] 9736; GFX940-NEXT: ;;#ASMEND 9737; GFX940-NEXT: ;;#ASMSTART 9738; GFX940-NEXT: ; def s[0:3] 9739; GFX940-NEXT: ;;#ASMEND 9740; GFX940-NEXT: s_mov_b32 s8, s2 9741; GFX940-NEXT: s_mov_b32 s10, s9 9742; GFX940-NEXT: ;;#ASMSTART 9743; GFX940-NEXT: ; use s[8:10] 9744; GFX940-NEXT: ;;#ASMEND 9745; GFX940-NEXT: s_setpc_b64 s[30:31] 9746 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9747 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9748 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 1, i32 1> 9749 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9750 ret void 9751} 9752 9753define void @s_shuffle_v3i32_v4i32__7_1_1() { 9754; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_1_1: 9755; GFX900: ; %bb.0: 9756; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9757; GFX900-NEXT: ;;#ASMSTART 9758; GFX900-NEXT: ; def s[8:11] 9759; GFX900-NEXT: ;;#ASMEND 9760; GFX900-NEXT: ;;#ASMSTART 9761; GFX900-NEXT: ; def s[4:7] 9762; GFX900-NEXT: ;;#ASMEND 9763; GFX900-NEXT: s_mov_b32 s8, s7 9764; GFX900-NEXT: s_mov_b32 s10, s9 9765; GFX900-NEXT: ;;#ASMSTART 9766; GFX900-NEXT: ; use s[8:10] 9767; GFX900-NEXT: ;;#ASMEND 9768; GFX900-NEXT: s_setpc_b64 s[30:31] 9769; 9770; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_1_1: 9771; GFX90A: ; %bb.0: 9772; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9773; GFX90A-NEXT: ;;#ASMSTART 9774; GFX90A-NEXT: ; def s[8:11] 9775; GFX90A-NEXT: ;;#ASMEND 9776; GFX90A-NEXT: ;;#ASMSTART 9777; GFX90A-NEXT: ; def s[4:7] 9778; GFX90A-NEXT: ;;#ASMEND 9779; GFX90A-NEXT: s_mov_b32 s8, s7 9780; GFX90A-NEXT: s_mov_b32 s10, s9 9781; GFX90A-NEXT: ;;#ASMSTART 9782; GFX90A-NEXT: ; use s[8:10] 9783; GFX90A-NEXT: ;;#ASMEND 9784; GFX90A-NEXT: s_setpc_b64 s[30:31] 9785; 9786; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_1: 9787; GFX940: ; %bb.0: 9788; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9789; GFX940-NEXT: ;;#ASMSTART 9790; GFX940-NEXT: ; def s[8:11] 9791; GFX940-NEXT: ;;#ASMEND 9792; GFX940-NEXT: ;;#ASMSTART 9793; GFX940-NEXT: ; def s[0:3] 9794; GFX940-NEXT: ;;#ASMEND 9795; GFX940-NEXT: s_mov_b32 s8, s3 9796; GFX940-NEXT: s_mov_b32 s10, s9 9797; GFX940-NEXT: ;;#ASMSTART 9798; GFX940-NEXT: ; use s[8:10] 9799; GFX940-NEXT: ;;#ASMEND 9800; GFX940-NEXT: s_setpc_b64 s[30:31] 9801 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9802 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9803 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 1> 9804 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9805 ret void 9806} 9807 9808define void @s_shuffle_v3i32_v4i32__7_u_1() { 9809; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_1: 9810; GFX900: ; %bb.0: 9811; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9812; GFX900-NEXT: ;;#ASMSTART 9813; GFX900-NEXT: ; def s[8:11] 9814; GFX900-NEXT: ;;#ASMEND 9815; GFX900-NEXT: ;;#ASMSTART 9816; GFX900-NEXT: ; def s[4:7] 9817; GFX900-NEXT: ;;#ASMEND 9818; GFX900-NEXT: s_mov_b32 s8, s11 9819; GFX900-NEXT: s_mov_b32 s10, s5 9820; GFX900-NEXT: ;;#ASMSTART 9821; GFX900-NEXT: ; use s[8:10] 9822; GFX900-NEXT: ;;#ASMEND 9823; GFX900-NEXT: s_setpc_b64 s[30:31] 9824; 9825; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_1: 9826; GFX90A: ; %bb.0: 9827; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9828; GFX90A-NEXT: ;;#ASMSTART 9829; GFX90A-NEXT: ; def s[8:11] 9830; GFX90A-NEXT: ;;#ASMEND 9831; GFX90A-NEXT: ;;#ASMSTART 9832; GFX90A-NEXT: ; def s[4:7] 9833; GFX90A-NEXT: ;;#ASMEND 9834; GFX90A-NEXT: s_mov_b32 s8, s11 9835; GFX90A-NEXT: s_mov_b32 s10, s5 9836; GFX90A-NEXT: ;;#ASMSTART 9837; GFX90A-NEXT: ; use s[8:10] 9838; GFX90A-NEXT: ;;#ASMEND 9839; GFX90A-NEXT: s_setpc_b64 s[30:31] 9840; 9841; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_1: 9842; GFX940: ; %bb.0: 9843; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9844; GFX940-NEXT: ;;#ASMSTART 9845; GFX940-NEXT: ; def s[0:3] 9846; GFX940-NEXT: ;;#ASMEND 9847; GFX940-NEXT: ;;#ASMSTART 9848; GFX940-NEXT: ; def s[4:7] 9849; GFX940-NEXT: ;;#ASMEND 9850; GFX940-NEXT: s_mov_b32 s8, s7 9851; GFX940-NEXT: s_mov_b32 s10, s1 9852; GFX940-NEXT: ;;#ASMSTART 9853; GFX940-NEXT: ; use s[8:10] 9854; GFX940-NEXT: ;;#ASMEND 9855; GFX940-NEXT: s_setpc_b64 s[30:31] 9856 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9857 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9858 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 1> 9859 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9860 ret void 9861} 9862 9863define void @s_shuffle_v3i32_v4i32__7_0_1() { 9864; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_0_1: 9865; GFX900: ; %bb.0: 9866; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9867; GFX900-NEXT: ;;#ASMSTART 9868; GFX900-NEXT: ; def s[8:11] 9869; GFX900-NEXT: ;;#ASMEND 9870; GFX900-NEXT: ;;#ASMSTART 9871; GFX900-NEXT: ; def s[4:7] 9872; GFX900-NEXT: ;;#ASMEND 9873; GFX900-NEXT: s_mov_b32 s8, s11 9874; GFX900-NEXT: s_mov_b32 s9, s4 9875; GFX900-NEXT: s_mov_b32 s10, s5 9876; GFX900-NEXT: ;;#ASMSTART 9877; GFX900-NEXT: ; use s[8:10] 9878; GFX900-NEXT: ;;#ASMEND 9879; GFX900-NEXT: s_setpc_b64 s[30:31] 9880; 9881; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_0_1: 9882; GFX90A: ; %bb.0: 9883; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9884; GFX90A-NEXT: ;;#ASMSTART 9885; GFX90A-NEXT: ; def s[8:11] 9886; GFX90A-NEXT: ;;#ASMEND 9887; GFX90A-NEXT: ;;#ASMSTART 9888; GFX90A-NEXT: ; def s[4:7] 9889; GFX90A-NEXT: ;;#ASMEND 9890; GFX90A-NEXT: s_mov_b32 s8, s11 9891; GFX90A-NEXT: s_mov_b32 s9, s4 9892; GFX90A-NEXT: s_mov_b32 s10, s5 9893; GFX90A-NEXT: ;;#ASMSTART 9894; GFX90A-NEXT: ; use s[8:10] 9895; GFX90A-NEXT: ;;#ASMEND 9896; GFX90A-NEXT: s_setpc_b64 s[30:31] 9897; 9898; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_1: 9899; GFX940: ; %bb.0: 9900; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9901; GFX940-NEXT: ;;#ASMSTART 9902; GFX940-NEXT: ; def s[0:3] 9903; GFX940-NEXT: ;;#ASMEND 9904; GFX940-NEXT: ;;#ASMSTART 9905; GFX940-NEXT: ; def s[4:7] 9906; GFX940-NEXT: ;;#ASMEND 9907; GFX940-NEXT: s_mov_b32 s8, s7 9908; GFX940-NEXT: s_mov_b32 s9, s0 9909; GFX940-NEXT: s_mov_b32 s10, s1 9910; GFX940-NEXT: ;;#ASMSTART 9911; GFX940-NEXT: ; use s[8:10] 9912; GFX940-NEXT: ;;#ASMEND 9913; GFX940-NEXT: s_setpc_b64 s[30:31] 9914 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9915 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9916 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 1> 9917 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9918 ret void 9919} 9920 9921define void @s_shuffle_v3i32_v4i32__7_2_1() { 9922; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_2_1: 9923; GFX900: ; %bb.0: 9924; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9925; GFX900-NEXT: ;;#ASMSTART 9926; GFX900-NEXT: ; def s[8:11] 9927; GFX900-NEXT: ;;#ASMEND 9928; GFX900-NEXT: ;;#ASMSTART 9929; GFX900-NEXT: ; def s[4:7] 9930; GFX900-NEXT: ;;#ASMEND 9931; GFX900-NEXT: s_mov_b32 s8, s11 9932; GFX900-NEXT: s_mov_b32 s9, s6 9933; GFX900-NEXT: s_mov_b32 s10, s5 9934; GFX900-NEXT: ;;#ASMSTART 9935; GFX900-NEXT: ; use s[8:10] 9936; GFX900-NEXT: ;;#ASMEND 9937; GFX900-NEXT: s_setpc_b64 s[30:31] 9938; 9939; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_2_1: 9940; GFX90A: ; %bb.0: 9941; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9942; GFX90A-NEXT: ;;#ASMSTART 9943; GFX90A-NEXT: ; def s[8:11] 9944; GFX90A-NEXT: ;;#ASMEND 9945; GFX90A-NEXT: ;;#ASMSTART 9946; GFX90A-NEXT: ; def s[4:7] 9947; GFX90A-NEXT: ;;#ASMEND 9948; GFX90A-NEXT: s_mov_b32 s8, s11 9949; GFX90A-NEXT: s_mov_b32 s9, s6 9950; GFX90A-NEXT: s_mov_b32 s10, s5 9951; GFX90A-NEXT: ;;#ASMSTART 9952; GFX90A-NEXT: ; use s[8:10] 9953; GFX90A-NEXT: ;;#ASMEND 9954; GFX90A-NEXT: s_setpc_b64 s[30:31] 9955; 9956; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_1: 9957; GFX940: ; %bb.0: 9958; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9959; GFX940-NEXT: ;;#ASMSTART 9960; GFX940-NEXT: ; def s[0:3] 9961; GFX940-NEXT: ;;#ASMEND 9962; GFX940-NEXT: ;;#ASMSTART 9963; GFX940-NEXT: ; def s[4:7] 9964; GFX940-NEXT: ;;#ASMEND 9965; GFX940-NEXT: s_mov_b32 s8, s7 9966; GFX940-NEXT: s_mov_b32 s9, s2 9967; GFX940-NEXT: s_mov_b32 s10, s1 9968; GFX940-NEXT: ;;#ASMSTART 9969; GFX940-NEXT: ; use s[8:10] 9970; GFX940-NEXT: ;;#ASMEND 9971; GFX940-NEXT: s_setpc_b64 s[30:31] 9972 %vec0 = call <4 x i32> asm "; def $0", "=s"() 9973 %vec1 = call <4 x i32> asm "; def $0", "=s"() 9974 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 1> 9975 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 9976 ret void 9977} 9978 9979define void @s_shuffle_v3i32_v4i32__7_3_1() { 9980; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_3_1: 9981; GFX900: ; %bb.0: 9982; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9983; GFX900-NEXT: ;;#ASMSTART 9984; GFX900-NEXT: ; def s[8:11] 9985; GFX900-NEXT: ;;#ASMEND 9986; GFX900-NEXT: ;;#ASMSTART 9987; GFX900-NEXT: ; def s[4:7] 9988; GFX900-NEXT: ;;#ASMEND 9989; GFX900-NEXT: s_mov_b32 s8, s11 9990; GFX900-NEXT: s_mov_b32 s9, s7 9991; GFX900-NEXT: s_mov_b32 s10, s5 9992; GFX900-NEXT: ;;#ASMSTART 9993; GFX900-NEXT: ; use s[8:10] 9994; GFX900-NEXT: ;;#ASMEND 9995; GFX900-NEXT: s_setpc_b64 s[30:31] 9996; 9997; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_3_1: 9998; GFX90A: ; %bb.0: 9999; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10000; GFX90A-NEXT: ;;#ASMSTART 10001; GFX90A-NEXT: ; def s[8:11] 10002; GFX90A-NEXT: ;;#ASMEND 10003; GFX90A-NEXT: ;;#ASMSTART 10004; GFX90A-NEXT: ; def s[4:7] 10005; GFX90A-NEXT: ;;#ASMEND 10006; GFX90A-NEXT: s_mov_b32 s8, s11 10007; GFX90A-NEXT: s_mov_b32 s9, s7 10008; GFX90A-NEXT: s_mov_b32 s10, s5 10009; GFX90A-NEXT: ;;#ASMSTART 10010; GFX90A-NEXT: ; use s[8:10] 10011; GFX90A-NEXT: ;;#ASMEND 10012; GFX90A-NEXT: s_setpc_b64 s[30:31] 10013; 10014; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_1: 10015; GFX940: ; %bb.0: 10016; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10017; GFX940-NEXT: ;;#ASMSTART 10018; GFX940-NEXT: ; def s[0:3] 10019; GFX940-NEXT: ;;#ASMEND 10020; GFX940-NEXT: ;;#ASMSTART 10021; GFX940-NEXT: ; def s[4:7] 10022; GFX940-NEXT: ;;#ASMEND 10023; GFX940-NEXT: s_mov_b32 s8, s7 10024; GFX940-NEXT: s_mov_b32 s9, s3 10025; GFX940-NEXT: s_mov_b32 s10, s1 10026; GFX940-NEXT: ;;#ASMSTART 10027; GFX940-NEXT: ; use s[8:10] 10028; GFX940-NEXT: ;;#ASMEND 10029; GFX940-NEXT: s_setpc_b64 s[30:31] 10030 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10031 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10032 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 1> 10033 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10034 ret void 10035} 10036 10037define void @s_shuffle_v3i32_v4i32__7_4_1() { 10038; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_4_1: 10039; GFX900: ; %bb.0: 10040; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10041; GFX900-NEXT: ;;#ASMSTART 10042; GFX900-NEXT: ; def s[4:7] 10043; GFX900-NEXT: ;;#ASMEND 10044; GFX900-NEXT: ;;#ASMSTART 10045; GFX900-NEXT: ; def s[12:15] 10046; GFX900-NEXT: ;;#ASMEND 10047; GFX900-NEXT: s_mov_b32 s8, s15 10048; GFX900-NEXT: s_mov_b32 s9, s12 10049; GFX900-NEXT: s_mov_b32 s10, s5 10050; GFX900-NEXT: ;;#ASMSTART 10051; GFX900-NEXT: ; use s[8:10] 10052; GFX900-NEXT: ;;#ASMEND 10053; GFX900-NEXT: s_setpc_b64 s[30:31] 10054; 10055; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_4_1: 10056; GFX90A: ; %bb.0: 10057; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10058; GFX90A-NEXT: ;;#ASMSTART 10059; GFX90A-NEXT: ; def s[4:7] 10060; GFX90A-NEXT: ;;#ASMEND 10061; GFX90A-NEXT: ;;#ASMSTART 10062; GFX90A-NEXT: ; def s[12:15] 10063; GFX90A-NEXT: ;;#ASMEND 10064; GFX90A-NEXT: s_mov_b32 s8, s15 10065; GFX90A-NEXT: s_mov_b32 s9, s12 10066; GFX90A-NEXT: s_mov_b32 s10, s5 10067; GFX90A-NEXT: ;;#ASMSTART 10068; GFX90A-NEXT: ; use s[8:10] 10069; GFX90A-NEXT: ;;#ASMEND 10070; GFX90A-NEXT: s_setpc_b64 s[30:31] 10071; 10072; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_1: 10073; GFX940: ; %bb.0: 10074; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10075; GFX940-NEXT: ;;#ASMSTART 10076; GFX940-NEXT: ; def s[0:3] 10077; GFX940-NEXT: ;;#ASMEND 10078; GFX940-NEXT: ;;#ASMSTART 10079; GFX940-NEXT: ; def s[4:7] 10080; GFX940-NEXT: ;;#ASMEND 10081; GFX940-NEXT: s_mov_b32 s8, s7 10082; GFX940-NEXT: s_mov_b32 s9, s4 10083; GFX940-NEXT: s_mov_b32 s10, s1 10084; GFX940-NEXT: ;;#ASMSTART 10085; GFX940-NEXT: ; use s[8:10] 10086; GFX940-NEXT: ;;#ASMEND 10087; GFX940-NEXT: s_setpc_b64 s[30:31] 10088 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10089 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10090 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 1> 10091 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10092 ret void 10093} 10094 10095define void @s_shuffle_v3i32_v4i32__7_5_1() { 10096; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_5_1: 10097; GFX900: ; %bb.0: 10098; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10099; GFX900-NEXT: ;;#ASMSTART 10100; GFX900-NEXT: ; def s[8:11] 10101; GFX900-NEXT: ;;#ASMEND 10102; GFX900-NEXT: ;;#ASMSTART 10103; GFX900-NEXT: ; def s[4:7] 10104; GFX900-NEXT: ;;#ASMEND 10105; GFX900-NEXT: s_mov_b32 s8, s11 10106; GFX900-NEXT: s_mov_b32 s10, s5 10107; GFX900-NEXT: ;;#ASMSTART 10108; GFX900-NEXT: ; use s[8:10] 10109; GFX900-NEXT: ;;#ASMEND 10110; GFX900-NEXT: s_setpc_b64 s[30:31] 10111; 10112; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_5_1: 10113; GFX90A: ; %bb.0: 10114; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10115; GFX90A-NEXT: ;;#ASMSTART 10116; GFX90A-NEXT: ; def s[8:11] 10117; GFX90A-NEXT: ;;#ASMEND 10118; GFX90A-NEXT: ;;#ASMSTART 10119; GFX90A-NEXT: ; def s[4:7] 10120; GFX90A-NEXT: ;;#ASMEND 10121; GFX90A-NEXT: s_mov_b32 s8, s11 10122; GFX90A-NEXT: s_mov_b32 s10, s5 10123; GFX90A-NEXT: ;;#ASMSTART 10124; GFX90A-NEXT: ; use s[8:10] 10125; GFX90A-NEXT: ;;#ASMEND 10126; GFX90A-NEXT: s_setpc_b64 s[30:31] 10127; 10128; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_5_1: 10129; GFX940: ; %bb.0: 10130; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10131; GFX940-NEXT: ;;#ASMSTART 10132; GFX940-NEXT: ; def s[8:11] 10133; GFX940-NEXT: ;;#ASMEND 10134; GFX940-NEXT: ;;#ASMSTART 10135; GFX940-NEXT: ; def s[0:3] 10136; GFX940-NEXT: ;;#ASMEND 10137; GFX940-NEXT: s_mov_b32 s8, s11 10138; GFX940-NEXT: s_mov_b32 s10, s1 10139; GFX940-NEXT: ;;#ASMSTART 10140; GFX940-NEXT: ; use s[8:10] 10141; GFX940-NEXT: ;;#ASMEND 10142; GFX940-NEXT: s_setpc_b64 s[30:31] 10143 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10144 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10145 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 1> 10146 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10147 ret void 10148} 10149 10150define void @s_shuffle_v3i32_v4i32__7_6_1() { 10151; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_6_1: 10152; GFX900: ; %bb.0: 10153; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10154; GFX900-NEXT: ;;#ASMSTART 10155; GFX900-NEXT: ; def s[8:11] 10156; GFX900-NEXT: ;;#ASMEND 10157; GFX900-NEXT: ;;#ASMSTART 10158; GFX900-NEXT: ; def s[4:7] 10159; GFX900-NEXT: ;;#ASMEND 10160; GFX900-NEXT: s_mov_b32 s8, s11 10161; GFX900-NEXT: s_mov_b32 s9, s10 10162; GFX900-NEXT: s_mov_b32 s10, s5 10163; GFX900-NEXT: ;;#ASMSTART 10164; GFX900-NEXT: ; use s[8:10] 10165; GFX900-NEXT: ;;#ASMEND 10166; GFX900-NEXT: s_setpc_b64 s[30:31] 10167; 10168; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_6_1: 10169; GFX90A: ; %bb.0: 10170; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10171; GFX90A-NEXT: ;;#ASMSTART 10172; GFX90A-NEXT: ; def s[8:11] 10173; GFX90A-NEXT: ;;#ASMEND 10174; GFX90A-NEXT: ;;#ASMSTART 10175; GFX90A-NEXT: ; def s[4:7] 10176; GFX90A-NEXT: ;;#ASMEND 10177; GFX90A-NEXT: s_mov_b32 s8, s11 10178; GFX90A-NEXT: s_mov_b32 s9, s10 10179; GFX90A-NEXT: s_mov_b32 s10, s5 10180; GFX90A-NEXT: ;;#ASMSTART 10181; GFX90A-NEXT: ; use s[8:10] 10182; GFX90A-NEXT: ;;#ASMEND 10183; GFX90A-NEXT: s_setpc_b64 s[30:31] 10184; 10185; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_1: 10186; GFX940: ; %bb.0: 10187; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10188; GFX940-NEXT: ;;#ASMSTART 10189; GFX940-NEXT: ; def s[0:3] 10190; GFX940-NEXT: ;;#ASMEND 10191; GFX940-NEXT: ;;#ASMSTART 10192; GFX940-NEXT: ; def s[4:7] 10193; GFX940-NEXT: ;;#ASMEND 10194; GFX940-NEXT: s_mov_b32 s8, s7 10195; GFX940-NEXT: s_mov_b32 s9, s6 10196; GFX940-NEXT: s_mov_b32 s10, s1 10197; GFX940-NEXT: ;;#ASMSTART 10198; GFX940-NEXT: ; use s[8:10] 10199; GFX940-NEXT: ;;#ASMEND 10200; GFX940-NEXT: s_setpc_b64 s[30:31] 10201 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10202 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10203 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 1> 10204 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10205 ret void 10206} 10207 10208define void @s_shuffle_v3i32_v4i32__u_2_2() { 10209; GFX9-LABEL: s_shuffle_v3i32_v4i32__u_2_2: 10210; GFX9: ; %bb.0: 10211; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10212; GFX9-NEXT: ;;#ASMSTART 10213; GFX9-NEXT: ; def s[8:11] 10214; GFX9-NEXT: ;;#ASMEND 10215; GFX9-NEXT: s_mov_b32 s9, s10 10216; GFX9-NEXT: ;;#ASMSTART 10217; GFX9-NEXT: ; use s[8:10] 10218; GFX9-NEXT: ;;#ASMEND 10219; GFX9-NEXT: s_setpc_b64 s[30:31] 10220 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10221 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 poison, i32 2, i32 2> 10222 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10223 ret void 10224} 10225 10226define void @s_shuffle_v3i32_v4i32__0_2_2() { 10227; GFX9-LABEL: s_shuffle_v3i32_v4i32__0_2_2: 10228; GFX9: ; %bb.0: 10229; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10230; GFX9-NEXT: ;;#ASMSTART 10231; GFX9-NEXT: ; def s[8:11] 10232; GFX9-NEXT: ;;#ASMEND 10233; GFX9-NEXT: s_mov_b32 s9, s10 10234; GFX9-NEXT: ;;#ASMSTART 10235; GFX9-NEXT: ; use s[8:10] 10236; GFX9-NEXT: ;;#ASMEND 10237; GFX9-NEXT: s_setpc_b64 s[30:31] 10238 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10239 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 0, i32 2, i32 2> 10240 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10241 ret void 10242} 10243 10244define void @s_shuffle_v3i32_v4i32__1_2_2() { 10245; GFX9-LABEL: s_shuffle_v3i32_v4i32__1_2_2: 10246; GFX9: ; %bb.0: 10247; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10248; GFX9-NEXT: ;;#ASMSTART 10249; GFX9-NEXT: ; def s[8:11] 10250; GFX9-NEXT: ;;#ASMEND 10251; GFX9-NEXT: s_mov_b32 s8, s9 10252; GFX9-NEXT: s_mov_b32 s9, s10 10253; GFX9-NEXT: ;;#ASMSTART 10254; GFX9-NEXT: ; use s[8:10] 10255; GFX9-NEXT: ;;#ASMEND 10256; GFX9-NEXT: s_setpc_b64 s[30:31] 10257 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10258 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 2> 10259 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10260 ret void 10261} 10262 10263define void @s_shuffle_v3i32_v4i32__2_2_2() { 10264; GFX9-LABEL: s_shuffle_v3i32_v4i32__2_2_2: 10265; GFX9: ; %bb.0: 10266; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10267; GFX9-NEXT: ;;#ASMSTART 10268; GFX9-NEXT: ; def s[8:11] 10269; GFX9-NEXT: ;;#ASMEND 10270; GFX9-NEXT: s_mov_b32 s8, s10 10271; GFX9-NEXT: s_mov_b32 s9, s10 10272; GFX9-NEXT: ;;#ASMSTART 10273; GFX9-NEXT: ; use s[8:10] 10274; GFX9-NEXT: ;;#ASMEND 10275; GFX9-NEXT: s_setpc_b64 s[30:31] 10276 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10277 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 2, i32 2> 10278 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10279 ret void 10280} 10281 10282define void @s_shuffle_v3i32_v4i32__3_2_2() { 10283; GFX9-LABEL: s_shuffle_v3i32_v4i32__3_2_2: 10284; GFX9: ; %bb.0: 10285; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10286; GFX9-NEXT: ;;#ASMSTART 10287; GFX9-NEXT: ; def s[8:11] 10288; GFX9-NEXT: ;;#ASMEND 10289; GFX9-NEXT: s_mov_b32 s8, s11 10290; GFX9-NEXT: s_mov_b32 s9, s10 10291; GFX9-NEXT: ;;#ASMSTART 10292; GFX9-NEXT: ; use s[8:10] 10293; GFX9-NEXT: ;;#ASMEND 10294; GFX9-NEXT: s_setpc_b64 s[30:31] 10295 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10296 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 2, i32 2> 10297 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10298 ret void 10299} 10300 10301define void @s_shuffle_v3i32_v4i32__4_2_2() { 10302; GFX9-LABEL: s_shuffle_v3i32_v4i32__4_2_2: 10303; GFX9: ; %bb.0: 10304; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10305; GFX9-NEXT: ;;#ASMSTART 10306; GFX9-NEXT: ; def s[8:11] 10307; GFX9-NEXT: ;;#ASMEND 10308; GFX9-NEXT: s_mov_b32 s9, s10 10309; GFX9-NEXT: ;;#ASMSTART 10310; GFX9-NEXT: ; use s[8:10] 10311; GFX9-NEXT: ;;#ASMEND 10312; GFX9-NEXT: s_setpc_b64 s[30:31] 10313 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10314 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 2, i32 2> 10315 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10316 ret void 10317} 10318 10319define void @s_shuffle_v3i32_v4i32__5_2_2() { 10320; GFX900-LABEL: s_shuffle_v3i32_v4i32__5_2_2: 10321; GFX900: ; %bb.0: 10322; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10323; GFX900-NEXT: ;;#ASMSTART 10324; GFX900-NEXT: ; def s[8:11] 10325; GFX900-NEXT: ;;#ASMEND 10326; GFX900-NEXT: ;;#ASMSTART 10327; GFX900-NEXT: ; def s[4:7] 10328; GFX900-NEXT: ;;#ASMEND 10329; GFX900-NEXT: s_mov_b32 s8, s5 10330; GFX900-NEXT: s_mov_b32 s9, s10 10331; GFX900-NEXT: ;;#ASMSTART 10332; GFX900-NEXT: ; use s[8:10] 10333; GFX900-NEXT: ;;#ASMEND 10334; GFX900-NEXT: s_setpc_b64 s[30:31] 10335; 10336; GFX90A-LABEL: s_shuffle_v3i32_v4i32__5_2_2: 10337; GFX90A: ; %bb.0: 10338; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10339; GFX90A-NEXT: ;;#ASMSTART 10340; GFX90A-NEXT: ; def s[8:11] 10341; GFX90A-NEXT: ;;#ASMEND 10342; GFX90A-NEXT: ;;#ASMSTART 10343; GFX90A-NEXT: ; def s[4:7] 10344; GFX90A-NEXT: ;;#ASMEND 10345; GFX90A-NEXT: s_mov_b32 s8, s5 10346; GFX90A-NEXT: s_mov_b32 s9, s10 10347; GFX90A-NEXT: ;;#ASMSTART 10348; GFX90A-NEXT: ; use s[8:10] 10349; GFX90A-NEXT: ;;#ASMEND 10350; GFX90A-NEXT: s_setpc_b64 s[30:31] 10351; 10352; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_2_2: 10353; GFX940: ; %bb.0: 10354; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10355; GFX940-NEXT: ;;#ASMSTART 10356; GFX940-NEXT: ; def s[8:11] 10357; GFX940-NEXT: ;;#ASMEND 10358; GFX940-NEXT: ;;#ASMSTART 10359; GFX940-NEXT: ; def s[0:3] 10360; GFX940-NEXT: ;;#ASMEND 10361; GFX940-NEXT: s_mov_b32 s8, s1 10362; GFX940-NEXT: s_mov_b32 s9, s10 10363; GFX940-NEXT: ;;#ASMSTART 10364; GFX940-NEXT: ; use s[8:10] 10365; GFX940-NEXT: ;;#ASMEND 10366; GFX940-NEXT: s_setpc_b64 s[30:31] 10367 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10368 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10369 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 2, i32 2> 10370 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10371 ret void 10372} 10373 10374define void @s_shuffle_v3i32_v4i32__6_2_2() { 10375; GFX900-LABEL: s_shuffle_v3i32_v4i32__6_2_2: 10376; GFX900: ; %bb.0: 10377; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10378; GFX900-NEXT: ;;#ASMSTART 10379; GFX900-NEXT: ; def s[8:11] 10380; GFX900-NEXT: ;;#ASMEND 10381; GFX900-NEXT: ;;#ASMSTART 10382; GFX900-NEXT: ; def s[4:7] 10383; GFX900-NEXT: ;;#ASMEND 10384; GFX900-NEXT: s_mov_b32 s8, s6 10385; GFX900-NEXT: s_mov_b32 s9, s10 10386; GFX900-NEXT: ;;#ASMSTART 10387; GFX900-NEXT: ; use s[8:10] 10388; GFX900-NEXT: ;;#ASMEND 10389; GFX900-NEXT: s_setpc_b64 s[30:31] 10390; 10391; GFX90A-LABEL: s_shuffle_v3i32_v4i32__6_2_2: 10392; GFX90A: ; %bb.0: 10393; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10394; GFX90A-NEXT: ;;#ASMSTART 10395; GFX90A-NEXT: ; def s[8:11] 10396; GFX90A-NEXT: ;;#ASMEND 10397; GFX90A-NEXT: ;;#ASMSTART 10398; GFX90A-NEXT: ; def s[4:7] 10399; GFX90A-NEXT: ;;#ASMEND 10400; GFX90A-NEXT: s_mov_b32 s8, s6 10401; GFX90A-NEXT: s_mov_b32 s9, s10 10402; GFX90A-NEXT: ;;#ASMSTART 10403; GFX90A-NEXT: ; use s[8:10] 10404; GFX90A-NEXT: ;;#ASMEND 10405; GFX90A-NEXT: s_setpc_b64 s[30:31] 10406; 10407; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_2_2: 10408; GFX940: ; %bb.0: 10409; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10410; GFX940-NEXT: ;;#ASMSTART 10411; GFX940-NEXT: ; def s[8:11] 10412; GFX940-NEXT: ;;#ASMEND 10413; GFX940-NEXT: ;;#ASMSTART 10414; GFX940-NEXT: ; def s[0:3] 10415; GFX940-NEXT: ;;#ASMEND 10416; GFX940-NEXT: s_mov_b32 s8, s2 10417; GFX940-NEXT: s_mov_b32 s9, s10 10418; GFX940-NEXT: ;;#ASMSTART 10419; GFX940-NEXT: ; use s[8:10] 10420; GFX940-NEXT: ;;#ASMEND 10421; GFX940-NEXT: s_setpc_b64 s[30:31] 10422 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10423 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10424 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 2, i32 2> 10425 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10426 ret void 10427} 10428 10429define void @s_shuffle_v3i32_v4i32__7_2_2() { 10430; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_2_2: 10431; GFX900: ; %bb.0: 10432; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10433; GFX900-NEXT: ;;#ASMSTART 10434; GFX900-NEXT: ; def s[8:11] 10435; GFX900-NEXT: ;;#ASMEND 10436; GFX900-NEXT: ;;#ASMSTART 10437; GFX900-NEXT: ; def s[4:7] 10438; GFX900-NEXT: ;;#ASMEND 10439; GFX900-NEXT: s_mov_b32 s8, s7 10440; GFX900-NEXT: s_mov_b32 s9, s10 10441; GFX900-NEXT: ;;#ASMSTART 10442; GFX900-NEXT: ; use s[8:10] 10443; GFX900-NEXT: ;;#ASMEND 10444; GFX900-NEXT: s_setpc_b64 s[30:31] 10445; 10446; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_2_2: 10447; GFX90A: ; %bb.0: 10448; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10449; GFX90A-NEXT: ;;#ASMSTART 10450; GFX90A-NEXT: ; def s[8:11] 10451; GFX90A-NEXT: ;;#ASMEND 10452; GFX90A-NEXT: ;;#ASMSTART 10453; GFX90A-NEXT: ; def s[4:7] 10454; GFX90A-NEXT: ;;#ASMEND 10455; GFX90A-NEXT: s_mov_b32 s8, s7 10456; GFX90A-NEXT: s_mov_b32 s9, s10 10457; GFX90A-NEXT: ;;#ASMSTART 10458; GFX90A-NEXT: ; use s[8:10] 10459; GFX90A-NEXT: ;;#ASMEND 10460; GFX90A-NEXT: s_setpc_b64 s[30:31] 10461; 10462; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_2: 10463; GFX940: ; %bb.0: 10464; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10465; GFX940-NEXT: ;;#ASMSTART 10466; GFX940-NEXT: ; def s[8:11] 10467; GFX940-NEXT: ;;#ASMEND 10468; GFX940-NEXT: ;;#ASMSTART 10469; GFX940-NEXT: ; def s[0:3] 10470; GFX940-NEXT: ;;#ASMEND 10471; GFX940-NEXT: s_mov_b32 s8, s3 10472; GFX940-NEXT: s_mov_b32 s9, s10 10473; GFX940-NEXT: ;;#ASMSTART 10474; GFX940-NEXT: ; use s[8:10] 10475; GFX940-NEXT: ;;#ASMEND 10476; GFX940-NEXT: s_setpc_b64 s[30:31] 10477 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10478 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10479 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 2> 10480 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10481 ret void 10482} 10483 10484define void @s_shuffle_v3i32_v4i32__7_u_2() { 10485; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_2: 10486; GFX900: ; %bb.0: 10487; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10488; GFX900-NEXT: ;;#ASMSTART 10489; GFX900-NEXT: ; def s[8:11] 10490; GFX900-NEXT: ;;#ASMEND 10491; GFX900-NEXT: ;;#ASMSTART 10492; GFX900-NEXT: ; def s[4:7] 10493; GFX900-NEXT: ;;#ASMEND 10494; GFX900-NEXT: s_mov_b32 s8, s7 10495; GFX900-NEXT: ;;#ASMSTART 10496; GFX900-NEXT: ; use s[8:10] 10497; GFX900-NEXT: ;;#ASMEND 10498; GFX900-NEXT: s_setpc_b64 s[30:31] 10499; 10500; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_2: 10501; GFX90A: ; %bb.0: 10502; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10503; GFX90A-NEXT: ;;#ASMSTART 10504; GFX90A-NEXT: ; def s[8:11] 10505; GFX90A-NEXT: ;;#ASMEND 10506; GFX90A-NEXT: ;;#ASMSTART 10507; GFX90A-NEXT: ; def s[4:7] 10508; GFX90A-NEXT: ;;#ASMEND 10509; GFX90A-NEXT: s_mov_b32 s8, s7 10510; GFX90A-NEXT: ;;#ASMSTART 10511; GFX90A-NEXT: ; use s[8:10] 10512; GFX90A-NEXT: ;;#ASMEND 10513; GFX90A-NEXT: s_setpc_b64 s[30:31] 10514; 10515; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_2: 10516; GFX940: ; %bb.0: 10517; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10518; GFX940-NEXT: ;;#ASMSTART 10519; GFX940-NEXT: ; def s[8:11] 10520; GFX940-NEXT: ;;#ASMEND 10521; GFX940-NEXT: ;;#ASMSTART 10522; GFX940-NEXT: ; def s[0:3] 10523; GFX940-NEXT: ;;#ASMEND 10524; GFX940-NEXT: s_mov_b32 s8, s3 10525; GFX940-NEXT: ;;#ASMSTART 10526; GFX940-NEXT: ; use s[8:10] 10527; GFX940-NEXT: ;;#ASMEND 10528; GFX940-NEXT: s_setpc_b64 s[30:31] 10529 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10530 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10531 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 2> 10532 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10533 ret void 10534} 10535 10536define void @s_shuffle_v3i32_v4i32__7_0_2() { 10537; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_0_2: 10538; GFX900: ; %bb.0: 10539; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10540; GFX900-NEXT: ;;#ASMSTART 10541; GFX900-NEXT: ; def s[8:11] 10542; GFX900-NEXT: ;;#ASMEND 10543; GFX900-NEXT: ;;#ASMSTART 10544; GFX900-NEXT: ; def s[4:7] 10545; GFX900-NEXT: ;;#ASMEND 10546; GFX900-NEXT: s_mov_b32 s8, s11 10547; GFX900-NEXT: s_mov_b32 s9, s4 10548; GFX900-NEXT: s_mov_b32 s10, s6 10549; GFX900-NEXT: ;;#ASMSTART 10550; GFX900-NEXT: ; use s[8:10] 10551; GFX900-NEXT: ;;#ASMEND 10552; GFX900-NEXT: s_setpc_b64 s[30:31] 10553; 10554; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_0_2: 10555; GFX90A: ; %bb.0: 10556; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10557; GFX90A-NEXT: ;;#ASMSTART 10558; GFX90A-NEXT: ; def s[8:11] 10559; GFX90A-NEXT: ;;#ASMEND 10560; GFX90A-NEXT: ;;#ASMSTART 10561; GFX90A-NEXT: ; def s[4:7] 10562; GFX90A-NEXT: ;;#ASMEND 10563; GFX90A-NEXT: s_mov_b32 s8, s11 10564; GFX90A-NEXT: s_mov_b32 s9, s4 10565; GFX90A-NEXT: s_mov_b32 s10, s6 10566; GFX90A-NEXT: ;;#ASMSTART 10567; GFX90A-NEXT: ; use s[8:10] 10568; GFX90A-NEXT: ;;#ASMEND 10569; GFX90A-NEXT: s_setpc_b64 s[30:31] 10570; 10571; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_2: 10572; GFX940: ; %bb.0: 10573; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10574; GFX940-NEXT: ;;#ASMSTART 10575; GFX940-NEXT: ; def s[0:3] 10576; GFX940-NEXT: ;;#ASMEND 10577; GFX940-NEXT: ;;#ASMSTART 10578; GFX940-NEXT: ; def s[4:7] 10579; GFX940-NEXT: ;;#ASMEND 10580; GFX940-NEXT: s_mov_b32 s8, s7 10581; GFX940-NEXT: s_mov_b32 s9, s0 10582; GFX940-NEXT: s_mov_b32 s10, s2 10583; GFX940-NEXT: ;;#ASMSTART 10584; GFX940-NEXT: ; use s[8:10] 10585; GFX940-NEXT: ;;#ASMEND 10586; GFX940-NEXT: s_setpc_b64 s[30:31] 10587 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10588 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10589 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 2> 10590 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10591 ret void 10592} 10593 10594define void @s_shuffle_v3i32_v4i32__7_1_2() { 10595; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_1_2: 10596; GFX900: ; %bb.0: 10597; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10598; GFX900-NEXT: ;;#ASMSTART 10599; GFX900-NEXT: ; def s[8:11] 10600; GFX900-NEXT: ;;#ASMEND 10601; GFX900-NEXT: ;;#ASMSTART 10602; GFX900-NEXT: ; def s[4:7] 10603; GFX900-NEXT: ;;#ASMEND 10604; GFX900-NEXT: s_mov_b32 s8, s7 10605; GFX900-NEXT: ;;#ASMSTART 10606; GFX900-NEXT: ; use s[8:10] 10607; GFX900-NEXT: ;;#ASMEND 10608; GFX900-NEXT: s_setpc_b64 s[30:31] 10609; 10610; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_1_2: 10611; GFX90A: ; %bb.0: 10612; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10613; GFX90A-NEXT: ;;#ASMSTART 10614; GFX90A-NEXT: ; def s[8:11] 10615; GFX90A-NEXT: ;;#ASMEND 10616; GFX90A-NEXT: ;;#ASMSTART 10617; GFX90A-NEXT: ; def s[4:7] 10618; GFX90A-NEXT: ;;#ASMEND 10619; GFX90A-NEXT: s_mov_b32 s8, s7 10620; GFX90A-NEXT: ;;#ASMSTART 10621; GFX90A-NEXT: ; use s[8:10] 10622; GFX90A-NEXT: ;;#ASMEND 10623; GFX90A-NEXT: s_setpc_b64 s[30:31] 10624; 10625; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_2: 10626; GFX940: ; %bb.0: 10627; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10628; GFX940-NEXT: ;;#ASMSTART 10629; GFX940-NEXT: ; def s[8:11] 10630; GFX940-NEXT: ;;#ASMEND 10631; GFX940-NEXT: ;;#ASMSTART 10632; GFX940-NEXT: ; def s[0:3] 10633; GFX940-NEXT: ;;#ASMEND 10634; GFX940-NEXT: s_mov_b32 s8, s3 10635; GFX940-NEXT: ;;#ASMSTART 10636; GFX940-NEXT: ; use s[8:10] 10637; GFX940-NEXT: ;;#ASMEND 10638; GFX940-NEXT: s_setpc_b64 s[30:31] 10639 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10640 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10641 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 2> 10642 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10643 ret void 10644} 10645 10646define void @s_shuffle_v3i32_v4i32__7_3_2() { 10647; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_3_2: 10648; GFX900: ; %bb.0: 10649; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10650; GFX900-NEXT: ;;#ASMSTART 10651; GFX900-NEXT: ; def s[8:11] 10652; GFX900-NEXT: ;;#ASMEND 10653; GFX900-NEXT: ;;#ASMSTART 10654; GFX900-NEXT: ; def s[4:7] 10655; GFX900-NEXT: ;;#ASMEND 10656; GFX900-NEXT: s_mov_b32 s8, s7 10657; GFX900-NEXT: s_mov_b32 s9, s11 10658; GFX900-NEXT: ;;#ASMSTART 10659; GFX900-NEXT: ; use s[8:10] 10660; GFX900-NEXT: ;;#ASMEND 10661; GFX900-NEXT: s_setpc_b64 s[30:31] 10662; 10663; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_3_2: 10664; GFX90A: ; %bb.0: 10665; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10666; GFX90A-NEXT: ;;#ASMSTART 10667; GFX90A-NEXT: ; def s[8:11] 10668; GFX90A-NEXT: ;;#ASMEND 10669; GFX90A-NEXT: ;;#ASMSTART 10670; GFX90A-NEXT: ; def s[4:7] 10671; GFX90A-NEXT: ;;#ASMEND 10672; GFX90A-NEXT: s_mov_b32 s8, s7 10673; GFX90A-NEXT: s_mov_b32 s9, s11 10674; GFX90A-NEXT: ;;#ASMSTART 10675; GFX90A-NEXT: ; use s[8:10] 10676; GFX90A-NEXT: ;;#ASMEND 10677; GFX90A-NEXT: s_setpc_b64 s[30:31] 10678; 10679; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_2: 10680; GFX940: ; %bb.0: 10681; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10682; GFX940-NEXT: ;;#ASMSTART 10683; GFX940-NEXT: ; def s[8:11] 10684; GFX940-NEXT: ;;#ASMEND 10685; GFX940-NEXT: ;;#ASMSTART 10686; GFX940-NEXT: ; def s[0:3] 10687; GFX940-NEXT: ;;#ASMEND 10688; GFX940-NEXT: s_mov_b32 s8, s3 10689; GFX940-NEXT: s_mov_b32 s9, s11 10690; GFX940-NEXT: ;;#ASMSTART 10691; GFX940-NEXT: ; use s[8:10] 10692; GFX940-NEXT: ;;#ASMEND 10693; GFX940-NEXT: s_setpc_b64 s[30:31] 10694 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10695 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10696 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 2> 10697 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10698 ret void 10699} 10700 10701define void @s_shuffle_v3i32_v4i32__7_4_2() { 10702; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_4_2: 10703; GFX900: ; %bb.0: 10704; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10705; GFX900-NEXT: ;;#ASMSTART 10706; GFX900-NEXT: ; def s[8:11] 10707; GFX900-NEXT: ;;#ASMEND 10708; GFX900-NEXT: ;;#ASMSTART 10709; GFX900-NEXT: ; def s[4:7] 10710; GFX900-NEXT: ;;#ASMEND 10711; GFX900-NEXT: s_mov_b32 s8, s7 10712; GFX900-NEXT: s_mov_b32 s9, s4 10713; GFX900-NEXT: ;;#ASMSTART 10714; GFX900-NEXT: ; use s[8:10] 10715; GFX900-NEXT: ;;#ASMEND 10716; GFX900-NEXT: s_setpc_b64 s[30:31] 10717; 10718; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_4_2: 10719; GFX90A: ; %bb.0: 10720; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10721; GFX90A-NEXT: ;;#ASMSTART 10722; GFX90A-NEXT: ; def s[8:11] 10723; GFX90A-NEXT: ;;#ASMEND 10724; GFX90A-NEXT: ;;#ASMSTART 10725; GFX90A-NEXT: ; def s[4:7] 10726; GFX90A-NEXT: ;;#ASMEND 10727; GFX90A-NEXT: s_mov_b32 s8, s7 10728; GFX90A-NEXT: s_mov_b32 s9, s4 10729; GFX90A-NEXT: ;;#ASMSTART 10730; GFX90A-NEXT: ; use s[8:10] 10731; GFX90A-NEXT: ;;#ASMEND 10732; GFX90A-NEXT: s_setpc_b64 s[30:31] 10733; 10734; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_2: 10735; GFX940: ; %bb.0: 10736; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10737; GFX940-NEXT: ;;#ASMSTART 10738; GFX940-NEXT: ; def s[8:11] 10739; GFX940-NEXT: ;;#ASMEND 10740; GFX940-NEXT: ;;#ASMSTART 10741; GFX940-NEXT: ; def s[0:3] 10742; GFX940-NEXT: ;;#ASMEND 10743; GFX940-NEXT: s_mov_b32 s8, s3 10744; GFX940-NEXT: s_mov_b32 s9, s0 10745; GFX940-NEXT: ;;#ASMSTART 10746; GFX940-NEXT: ; use s[8:10] 10747; GFX940-NEXT: ;;#ASMEND 10748; GFX940-NEXT: s_setpc_b64 s[30:31] 10749 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10750 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10751 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 2> 10752 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10753 ret void 10754} 10755 10756define void @s_shuffle_v3i32_v4i32__7_5_2() { 10757; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_5_2: 10758; GFX900: ; %bb.0: 10759; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10760; GFX900-NEXT: ;;#ASMSTART 10761; GFX900-NEXT: ; def s[8:11] 10762; GFX900-NEXT: ;;#ASMEND 10763; GFX900-NEXT: ;;#ASMSTART 10764; GFX900-NEXT: ; def s[4:7] 10765; GFX900-NEXT: ;;#ASMEND 10766; GFX900-NEXT: s_mov_b32 s8, s11 10767; GFX900-NEXT: s_mov_b32 s10, s6 10768; GFX900-NEXT: ;;#ASMSTART 10769; GFX900-NEXT: ; use s[8:10] 10770; GFX900-NEXT: ;;#ASMEND 10771; GFX900-NEXT: s_setpc_b64 s[30:31] 10772; 10773; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_5_2: 10774; GFX90A: ; %bb.0: 10775; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10776; GFX90A-NEXT: ;;#ASMSTART 10777; GFX90A-NEXT: ; def s[8:11] 10778; GFX90A-NEXT: ;;#ASMEND 10779; GFX90A-NEXT: ;;#ASMSTART 10780; GFX90A-NEXT: ; def s[4:7] 10781; GFX90A-NEXT: ;;#ASMEND 10782; GFX90A-NEXT: s_mov_b32 s8, s11 10783; GFX90A-NEXT: s_mov_b32 s10, s6 10784; GFX90A-NEXT: ;;#ASMSTART 10785; GFX90A-NEXT: ; use s[8:10] 10786; GFX90A-NEXT: ;;#ASMEND 10787; GFX90A-NEXT: s_setpc_b64 s[30:31] 10788; 10789; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_5_2: 10790; GFX940: ; %bb.0: 10791; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10792; GFX940-NEXT: ;;#ASMSTART 10793; GFX940-NEXT: ; def s[8:11] 10794; GFX940-NEXT: ;;#ASMEND 10795; GFX940-NEXT: ;;#ASMSTART 10796; GFX940-NEXT: ; def s[0:3] 10797; GFX940-NEXT: ;;#ASMEND 10798; GFX940-NEXT: s_mov_b32 s8, s11 10799; GFX940-NEXT: s_mov_b32 s10, s2 10800; GFX940-NEXT: ;;#ASMSTART 10801; GFX940-NEXT: ; use s[8:10] 10802; GFX940-NEXT: ;;#ASMEND 10803; GFX940-NEXT: s_setpc_b64 s[30:31] 10804 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10805 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10806 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 2> 10807 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10808 ret void 10809} 10810 10811define void @s_shuffle_v3i32_v4i32__7_6_2() { 10812; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_6_2: 10813; GFX900: ; %bb.0: 10814; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10815; GFX900-NEXT: ;;#ASMSTART 10816; GFX900-NEXT: ; def s[8:11] 10817; GFX900-NEXT: ;;#ASMEND 10818; GFX900-NEXT: ;;#ASMSTART 10819; GFX900-NEXT: ; def s[4:7] 10820; GFX900-NEXT: ;;#ASMEND 10821; GFX900-NEXT: s_mov_b32 s8, s7 10822; GFX900-NEXT: s_mov_b32 s9, s6 10823; GFX900-NEXT: ;;#ASMSTART 10824; GFX900-NEXT: ; use s[8:10] 10825; GFX900-NEXT: ;;#ASMEND 10826; GFX900-NEXT: s_setpc_b64 s[30:31] 10827; 10828; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_6_2: 10829; GFX90A: ; %bb.0: 10830; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10831; GFX90A-NEXT: ;;#ASMSTART 10832; GFX90A-NEXT: ; def s[8:11] 10833; GFX90A-NEXT: ;;#ASMEND 10834; GFX90A-NEXT: ;;#ASMSTART 10835; GFX90A-NEXT: ; def s[4:7] 10836; GFX90A-NEXT: ;;#ASMEND 10837; GFX90A-NEXT: s_mov_b32 s8, s7 10838; GFX90A-NEXT: s_mov_b32 s9, s6 10839; GFX90A-NEXT: ;;#ASMSTART 10840; GFX90A-NEXT: ; use s[8:10] 10841; GFX90A-NEXT: ;;#ASMEND 10842; GFX90A-NEXT: s_setpc_b64 s[30:31] 10843; 10844; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_2: 10845; GFX940: ; %bb.0: 10846; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10847; GFX940-NEXT: ;;#ASMSTART 10848; GFX940-NEXT: ; def s[8:11] 10849; GFX940-NEXT: ;;#ASMEND 10850; GFX940-NEXT: ;;#ASMSTART 10851; GFX940-NEXT: ; def s[0:3] 10852; GFX940-NEXT: ;;#ASMEND 10853; GFX940-NEXT: s_mov_b32 s8, s3 10854; GFX940-NEXT: s_mov_b32 s9, s2 10855; GFX940-NEXT: ;;#ASMSTART 10856; GFX940-NEXT: ; use s[8:10] 10857; GFX940-NEXT: ;;#ASMEND 10858; GFX940-NEXT: s_setpc_b64 s[30:31] 10859 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10860 %vec1 = call <4 x i32> asm "; def $0", "=s"() 10861 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 2> 10862 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10863 ret void 10864} 10865 10866define void @s_shuffle_v3i32_v4i32__u_3_3() { 10867; GFX900-LABEL: s_shuffle_v3i32_v4i32__u_3_3: 10868; GFX900: ; %bb.0: 10869; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10870; GFX900-NEXT: ;;#ASMSTART 10871; GFX900-NEXT: ; def s[4:7] 10872; GFX900-NEXT: ;;#ASMEND 10873; GFX900-NEXT: s_mov_b32 s9, s7 10874; GFX900-NEXT: s_mov_b32 s10, s7 10875; GFX900-NEXT: ;;#ASMSTART 10876; GFX900-NEXT: ; use s[8:10] 10877; GFX900-NEXT: ;;#ASMEND 10878; GFX900-NEXT: s_setpc_b64 s[30:31] 10879; 10880; GFX90A-LABEL: s_shuffle_v3i32_v4i32__u_3_3: 10881; GFX90A: ; %bb.0: 10882; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10883; GFX90A-NEXT: ;;#ASMSTART 10884; GFX90A-NEXT: ; def s[4:7] 10885; GFX90A-NEXT: ;;#ASMEND 10886; GFX90A-NEXT: s_mov_b32 s9, s7 10887; GFX90A-NEXT: s_mov_b32 s10, s7 10888; GFX90A-NEXT: ;;#ASMSTART 10889; GFX90A-NEXT: ; use s[8:10] 10890; GFX90A-NEXT: ;;#ASMEND 10891; GFX90A-NEXT: s_setpc_b64 s[30:31] 10892; 10893; GFX940-LABEL: s_shuffle_v3i32_v4i32__u_3_3: 10894; GFX940: ; %bb.0: 10895; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10896; GFX940-NEXT: ;;#ASMSTART 10897; GFX940-NEXT: ; def s[0:3] 10898; GFX940-NEXT: ;;#ASMEND 10899; GFX940-NEXT: s_mov_b32 s9, s3 10900; GFX940-NEXT: s_mov_b32 s10, s3 10901; GFX940-NEXT: ;;#ASMSTART 10902; GFX940-NEXT: ; use s[8:10] 10903; GFX940-NEXT: ;;#ASMEND 10904; GFX940-NEXT: s_setpc_b64 s[30:31] 10905 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10906 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 poison, i32 3, i32 3> 10907 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10908 ret void 10909} 10910 10911define void @s_shuffle_v3i32_v4i32__0_3_3() { 10912; GFX9-LABEL: s_shuffle_v3i32_v4i32__0_3_3: 10913; GFX9: ; %bb.0: 10914; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10915; GFX9-NEXT: ;;#ASMSTART 10916; GFX9-NEXT: ; def s[8:11] 10917; GFX9-NEXT: ;;#ASMEND 10918; GFX9-NEXT: s_mov_b32 s9, s11 10919; GFX9-NEXT: s_mov_b32 s10, s11 10920; GFX9-NEXT: ;;#ASMSTART 10921; GFX9-NEXT: ; use s[8:10] 10922; GFX9-NEXT: ;;#ASMEND 10923; GFX9-NEXT: s_setpc_b64 s[30:31] 10924 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10925 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 0, i32 3, i32 3> 10926 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10927 ret void 10928} 10929 10930define void @s_shuffle_v3i32_v4i32__1_3_3() { 10931; GFX900-LABEL: s_shuffle_v3i32_v4i32__1_3_3: 10932; GFX900: ; %bb.0: 10933; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10934; GFX900-NEXT: ;;#ASMSTART 10935; GFX900-NEXT: ; def s[4:7] 10936; GFX900-NEXT: ;;#ASMEND 10937; GFX900-NEXT: s_mov_b32 s8, s5 10938; GFX900-NEXT: s_mov_b32 s9, s7 10939; GFX900-NEXT: s_mov_b32 s10, s7 10940; GFX900-NEXT: ;;#ASMSTART 10941; GFX900-NEXT: ; use s[8:10] 10942; GFX900-NEXT: ;;#ASMEND 10943; GFX900-NEXT: s_setpc_b64 s[30:31] 10944; 10945; GFX90A-LABEL: s_shuffle_v3i32_v4i32__1_3_3: 10946; GFX90A: ; %bb.0: 10947; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10948; GFX90A-NEXT: ;;#ASMSTART 10949; GFX90A-NEXT: ; def s[4:7] 10950; GFX90A-NEXT: ;;#ASMEND 10951; GFX90A-NEXT: s_mov_b32 s8, s5 10952; GFX90A-NEXT: s_mov_b32 s9, s7 10953; GFX90A-NEXT: s_mov_b32 s10, s7 10954; GFX90A-NEXT: ;;#ASMSTART 10955; GFX90A-NEXT: ; use s[8:10] 10956; GFX90A-NEXT: ;;#ASMEND 10957; GFX90A-NEXT: s_setpc_b64 s[30:31] 10958; 10959; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_3_3: 10960; GFX940: ; %bb.0: 10961; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10962; GFX940-NEXT: ;;#ASMSTART 10963; GFX940-NEXT: ; def s[0:3] 10964; GFX940-NEXT: ;;#ASMEND 10965; GFX940-NEXT: s_mov_b32 s8, s1 10966; GFX940-NEXT: s_mov_b32 s9, s3 10967; GFX940-NEXT: s_mov_b32 s10, s3 10968; GFX940-NEXT: ;;#ASMSTART 10969; GFX940-NEXT: ; use s[8:10] 10970; GFX940-NEXT: ;;#ASMEND 10971; GFX940-NEXT: s_setpc_b64 s[30:31] 10972 %vec0 = call <4 x i32> asm "; def $0", "=s"() 10973 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 3, i32 3> 10974 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 10975 ret void 10976} 10977 10978define void @s_shuffle_v3i32_v4i32__2_3_3() { 10979; GFX900-LABEL: s_shuffle_v3i32_v4i32__2_3_3: 10980; GFX900: ; %bb.0: 10981; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10982; GFX900-NEXT: ;;#ASMSTART 10983; GFX900-NEXT: ; def s[4:7] 10984; GFX900-NEXT: ;;#ASMEND 10985; GFX900-NEXT: s_mov_b32 s8, s6 10986; GFX900-NEXT: s_mov_b32 s9, s7 10987; GFX900-NEXT: s_mov_b32 s10, s7 10988; GFX900-NEXT: ;;#ASMSTART 10989; GFX900-NEXT: ; use s[8:10] 10990; GFX900-NEXT: ;;#ASMEND 10991; GFX900-NEXT: s_setpc_b64 s[30:31] 10992; 10993; GFX90A-LABEL: s_shuffle_v3i32_v4i32__2_3_3: 10994; GFX90A: ; %bb.0: 10995; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10996; GFX90A-NEXT: ;;#ASMSTART 10997; GFX90A-NEXT: ; def s[4:7] 10998; GFX90A-NEXT: ;;#ASMEND 10999; GFX90A-NEXT: s_mov_b32 s8, s6 11000; GFX90A-NEXT: s_mov_b32 s9, s7 11001; GFX90A-NEXT: s_mov_b32 s10, s7 11002; GFX90A-NEXT: ;;#ASMSTART 11003; GFX90A-NEXT: ; use s[8:10] 11004; GFX90A-NEXT: ;;#ASMEND 11005; GFX90A-NEXT: s_setpc_b64 s[30:31] 11006; 11007; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_3_3: 11008; GFX940: ; %bb.0: 11009; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11010; GFX940-NEXT: ;;#ASMSTART 11011; GFX940-NEXT: ; def s[0:3] 11012; GFX940-NEXT: ;;#ASMEND 11013; GFX940-NEXT: s_mov_b32 s8, s2 11014; GFX940-NEXT: s_mov_b32 s9, s3 11015; GFX940-NEXT: s_mov_b32 s10, s3 11016; GFX940-NEXT: ;;#ASMSTART 11017; GFX940-NEXT: ; use s[8:10] 11018; GFX940-NEXT: ;;#ASMEND 11019; GFX940-NEXT: s_setpc_b64 s[30:31] 11020 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11021 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 3, i32 3> 11022 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11023 ret void 11024} 11025 11026define void @s_shuffle_v3i32_v4i32__3_3_3() { 11027; GFX900-LABEL: s_shuffle_v3i32_v4i32__3_3_3: 11028; GFX900: ; %bb.0: 11029; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11030; GFX900-NEXT: ;;#ASMSTART 11031; GFX900-NEXT: ; def s[4:7] 11032; GFX900-NEXT: ;;#ASMEND 11033; GFX900-NEXT: s_mov_b32 s8, s7 11034; GFX900-NEXT: s_mov_b32 s9, s7 11035; GFX900-NEXT: s_mov_b32 s10, s7 11036; GFX900-NEXT: ;;#ASMSTART 11037; GFX900-NEXT: ; use s[8:10] 11038; GFX900-NEXT: ;;#ASMEND 11039; GFX900-NEXT: s_setpc_b64 s[30:31] 11040; 11041; GFX90A-LABEL: s_shuffle_v3i32_v4i32__3_3_3: 11042; GFX90A: ; %bb.0: 11043; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11044; GFX90A-NEXT: ;;#ASMSTART 11045; GFX90A-NEXT: ; def s[4:7] 11046; GFX90A-NEXT: ;;#ASMEND 11047; GFX90A-NEXT: s_mov_b32 s8, s7 11048; GFX90A-NEXT: s_mov_b32 s9, s7 11049; GFX90A-NEXT: s_mov_b32 s10, s7 11050; GFX90A-NEXT: ;;#ASMSTART 11051; GFX90A-NEXT: ; use s[8:10] 11052; GFX90A-NEXT: ;;#ASMEND 11053; GFX90A-NEXT: s_setpc_b64 s[30:31] 11054; 11055; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_3_3: 11056; GFX940: ; %bb.0: 11057; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11058; GFX940-NEXT: ;;#ASMSTART 11059; GFX940-NEXT: ; def s[0:3] 11060; GFX940-NEXT: ;;#ASMEND 11061; GFX940-NEXT: s_mov_b32 s8, s3 11062; GFX940-NEXT: s_mov_b32 s9, s3 11063; GFX940-NEXT: s_mov_b32 s10, s3 11064; GFX940-NEXT: ;;#ASMSTART 11065; GFX940-NEXT: ; use s[8:10] 11066; GFX940-NEXT: ;;#ASMEND 11067; GFX940-NEXT: s_setpc_b64 s[30:31] 11068 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11069 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 3, i32 3> 11070 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11071 ret void 11072} 11073 11074define void @s_shuffle_v3i32_v4i32__4_3_3() { 11075; GFX900-LABEL: s_shuffle_v3i32_v4i32__4_3_3: 11076; GFX900: ; %bb.0: 11077; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11078; GFX900-NEXT: ;;#ASMSTART 11079; GFX900-NEXT: ; def s[4:7] 11080; GFX900-NEXT: ;;#ASMEND 11081; GFX900-NEXT: s_mov_b32 s9, s7 11082; GFX900-NEXT: s_mov_b32 s10, s7 11083; GFX900-NEXT: ;;#ASMSTART 11084; GFX900-NEXT: ; use s[8:10] 11085; GFX900-NEXT: ;;#ASMEND 11086; GFX900-NEXT: s_setpc_b64 s[30:31] 11087; 11088; GFX90A-LABEL: s_shuffle_v3i32_v4i32__4_3_3: 11089; GFX90A: ; %bb.0: 11090; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11091; GFX90A-NEXT: ;;#ASMSTART 11092; GFX90A-NEXT: ; def s[4:7] 11093; GFX90A-NEXT: ;;#ASMEND 11094; GFX90A-NEXT: s_mov_b32 s9, s7 11095; GFX90A-NEXT: s_mov_b32 s10, s7 11096; GFX90A-NEXT: ;;#ASMSTART 11097; GFX90A-NEXT: ; use s[8:10] 11098; GFX90A-NEXT: ;;#ASMEND 11099; GFX90A-NEXT: s_setpc_b64 s[30:31] 11100; 11101; GFX940-LABEL: s_shuffle_v3i32_v4i32__4_3_3: 11102; GFX940: ; %bb.0: 11103; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11104; GFX940-NEXT: ;;#ASMSTART 11105; GFX940-NEXT: ; def s[0:3] 11106; GFX940-NEXT: ;;#ASMEND 11107; GFX940-NEXT: s_mov_b32 s9, s3 11108; GFX940-NEXT: s_mov_b32 s10, s3 11109; GFX940-NEXT: ;;#ASMSTART 11110; GFX940-NEXT: ; use s[8:10] 11111; GFX940-NEXT: ;;#ASMEND 11112; GFX940-NEXT: s_setpc_b64 s[30:31] 11113 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11114 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 3, i32 3> 11115 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11116 ret void 11117} 11118 11119define void @s_shuffle_v3i32_v4i32__5_3_3() { 11120; GFX900-LABEL: s_shuffle_v3i32_v4i32__5_3_3: 11121; GFX900: ; %bb.0: 11122; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11123; GFX900-NEXT: ;;#ASMSTART 11124; GFX900-NEXT: ; def s[8:11] 11125; GFX900-NEXT: ;;#ASMEND 11126; GFX900-NEXT: ;;#ASMSTART 11127; GFX900-NEXT: ; def s[4:7] 11128; GFX900-NEXT: ;;#ASMEND 11129; GFX900-NEXT: s_mov_b32 s8, s9 11130; GFX900-NEXT: s_mov_b32 s9, s7 11131; GFX900-NEXT: s_mov_b32 s10, s7 11132; GFX900-NEXT: ;;#ASMSTART 11133; GFX900-NEXT: ; use s[8:10] 11134; GFX900-NEXT: ;;#ASMEND 11135; GFX900-NEXT: s_setpc_b64 s[30:31] 11136; 11137; GFX90A-LABEL: s_shuffle_v3i32_v4i32__5_3_3: 11138; GFX90A: ; %bb.0: 11139; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11140; GFX90A-NEXT: ;;#ASMSTART 11141; GFX90A-NEXT: ; def s[8:11] 11142; GFX90A-NEXT: ;;#ASMEND 11143; GFX90A-NEXT: ;;#ASMSTART 11144; GFX90A-NEXT: ; def s[4:7] 11145; GFX90A-NEXT: ;;#ASMEND 11146; GFX90A-NEXT: s_mov_b32 s8, s9 11147; GFX90A-NEXT: s_mov_b32 s9, s7 11148; GFX90A-NEXT: s_mov_b32 s10, s7 11149; GFX90A-NEXT: ;;#ASMSTART 11150; GFX90A-NEXT: ; use s[8:10] 11151; GFX90A-NEXT: ;;#ASMEND 11152; GFX90A-NEXT: s_setpc_b64 s[30:31] 11153; 11154; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_3_3: 11155; GFX940: ; %bb.0: 11156; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11157; GFX940-NEXT: ;;#ASMSTART 11158; GFX940-NEXT: ; def s[0:3] 11159; GFX940-NEXT: ;;#ASMEND 11160; GFX940-NEXT: ;;#ASMSTART 11161; GFX940-NEXT: ; def s[4:7] 11162; GFX940-NEXT: ;;#ASMEND 11163; GFX940-NEXT: s_mov_b32 s8, s5 11164; GFX940-NEXT: s_mov_b32 s9, s3 11165; GFX940-NEXT: s_mov_b32 s10, s3 11166; GFX940-NEXT: ;;#ASMSTART 11167; GFX940-NEXT: ; use s[8:10] 11168; GFX940-NEXT: ;;#ASMEND 11169; GFX940-NEXT: s_setpc_b64 s[30:31] 11170 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11171 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11172 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 3, i32 3> 11173 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11174 ret void 11175} 11176 11177define void @s_shuffle_v3i32_v4i32__6_3_3() { 11178; GFX900-LABEL: s_shuffle_v3i32_v4i32__6_3_3: 11179; GFX900: ; %bb.0: 11180; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11181; GFX900-NEXT: ;;#ASMSTART 11182; GFX900-NEXT: ; def s[8:11] 11183; GFX900-NEXT: ;;#ASMEND 11184; GFX900-NEXT: ;;#ASMSTART 11185; GFX900-NEXT: ; def s[4:7] 11186; GFX900-NEXT: ;;#ASMEND 11187; GFX900-NEXT: s_mov_b32 s8, s10 11188; GFX900-NEXT: s_mov_b32 s9, s7 11189; GFX900-NEXT: s_mov_b32 s10, s7 11190; GFX900-NEXT: ;;#ASMSTART 11191; GFX900-NEXT: ; use s[8:10] 11192; GFX900-NEXT: ;;#ASMEND 11193; GFX900-NEXT: s_setpc_b64 s[30:31] 11194; 11195; GFX90A-LABEL: s_shuffle_v3i32_v4i32__6_3_3: 11196; GFX90A: ; %bb.0: 11197; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11198; GFX90A-NEXT: ;;#ASMSTART 11199; GFX90A-NEXT: ; def s[8:11] 11200; GFX90A-NEXT: ;;#ASMEND 11201; GFX90A-NEXT: ;;#ASMSTART 11202; GFX90A-NEXT: ; def s[4:7] 11203; GFX90A-NEXT: ;;#ASMEND 11204; GFX90A-NEXT: s_mov_b32 s8, s10 11205; GFX90A-NEXT: s_mov_b32 s9, s7 11206; GFX90A-NEXT: s_mov_b32 s10, s7 11207; GFX90A-NEXT: ;;#ASMSTART 11208; GFX90A-NEXT: ; use s[8:10] 11209; GFX90A-NEXT: ;;#ASMEND 11210; GFX90A-NEXT: s_setpc_b64 s[30:31] 11211; 11212; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_3_3: 11213; GFX940: ; %bb.0: 11214; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11215; GFX940-NEXT: ;;#ASMSTART 11216; GFX940-NEXT: ; def s[0:3] 11217; GFX940-NEXT: ;;#ASMEND 11218; GFX940-NEXT: ;;#ASMSTART 11219; GFX940-NEXT: ; def s[4:7] 11220; GFX940-NEXT: ;;#ASMEND 11221; GFX940-NEXT: s_mov_b32 s8, s6 11222; GFX940-NEXT: s_mov_b32 s9, s3 11223; GFX940-NEXT: s_mov_b32 s10, s3 11224; GFX940-NEXT: ;;#ASMSTART 11225; GFX940-NEXT: ; use s[8:10] 11226; GFX940-NEXT: ;;#ASMEND 11227; GFX940-NEXT: s_setpc_b64 s[30:31] 11228 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11229 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11230 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 3, i32 3> 11231 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11232 ret void 11233} 11234 11235define void @s_shuffle_v3i32_v4i32__7_3_3() { 11236; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_3_3: 11237; GFX900: ; %bb.0: 11238; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11239; GFX900-NEXT: ;;#ASMSTART 11240; GFX900-NEXT: ; def s[8:11] 11241; GFX900-NEXT: ;;#ASMEND 11242; GFX900-NEXT: ;;#ASMSTART 11243; GFX900-NEXT: ; def s[4:7] 11244; GFX900-NEXT: ;;#ASMEND 11245; GFX900-NEXT: s_mov_b32 s8, s11 11246; GFX900-NEXT: s_mov_b32 s9, s7 11247; GFX900-NEXT: s_mov_b32 s10, s7 11248; GFX900-NEXT: ;;#ASMSTART 11249; GFX900-NEXT: ; use s[8:10] 11250; GFX900-NEXT: ;;#ASMEND 11251; GFX900-NEXT: s_setpc_b64 s[30:31] 11252; 11253; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_3_3: 11254; GFX90A: ; %bb.0: 11255; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11256; GFX90A-NEXT: ;;#ASMSTART 11257; GFX90A-NEXT: ; def s[8:11] 11258; GFX90A-NEXT: ;;#ASMEND 11259; GFX90A-NEXT: ;;#ASMSTART 11260; GFX90A-NEXT: ; def s[4:7] 11261; GFX90A-NEXT: ;;#ASMEND 11262; GFX90A-NEXT: s_mov_b32 s8, s11 11263; GFX90A-NEXT: s_mov_b32 s9, s7 11264; GFX90A-NEXT: s_mov_b32 s10, s7 11265; GFX90A-NEXT: ;;#ASMSTART 11266; GFX90A-NEXT: ; use s[8:10] 11267; GFX90A-NEXT: ;;#ASMEND 11268; GFX90A-NEXT: s_setpc_b64 s[30:31] 11269; 11270; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_3: 11271; GFX940: ; %bb.0: 11272; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11273; GFX940-NEXT: ;;#ASMSTART 11274; GFX940-NEXT: ; def s[0:3] 11275; GFX940-NEXT: ;;#ASMEND 11276; GFX940-NEXT: ;;#ASMSTART 11277; GFX940-NEXT: ; def s[4:7] 11278; GFX940-NEXT: ;;#ASMEND 11279; GFX940-NEXT: s_mov_b32 s8, s7 11280; GFX940-NEXT: s_mov_b32 s9, s3 11281; GFX940-NEXT: s_mov_b32 s10, s3 11282; GFX940-NEXT: ;;#ASMSTART 11283; GFX940-NEXT: ; use s[8:10] 11284; GFX940-NEXT: ;;#ASMEND 11285; GFX940-NEXT: s_setpc_b64 s[30:31] 11286 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11287 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11288 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 3> 11289 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11290 ret void 11291} 11292 11293define void @s_shuffle_v3i32_v4i32__7_u_3() { 11294; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_3: 11295; GFX900: ; %bb.0: 11296; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11297; GFX900-NEXT: ;;#ASMSTART 11298; GFX900-NEXT: ; def s[8:11] 11299; GFX900-NEXT: ;;#ASMEND 11300; GFX900-NEXT: ;;#ASMSTART 11301; GFX900-NEXT: ; def s[4:7] 11302; GFX900-NEXT: ;;#ASMEND 11303; GFX900-NEXT: s_mov_b32 s8, s11 11304; GFX900-NEXT: s_mov_b32 s10, s7 11305; GFX900-NEXT: ;;#ASMSTART 11306; GFX900-NEXT: ; use s[8:10] 11307; GFX900-NEXT: ;;#ASMEND 11308; GFX900-NEXT: s_setpc_b64 s[30:31] 11309; 11310; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_3: 11311; GFX90A: ; %bb.0: 11312; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11313; GFX90A-NEXT: ;;#ASMSTART 11314; GFX90A-NEXT: ; def s[8:11] 11315; GFX90A-NEXT: ;;#ASMEND 11316; GFX90A-NEXT: ;;#ASMSTART 11317; GFX90A-NEXT: ; def s[4:7] 11318; GFX90A-NEXT: ;;#ASMEND 11319; GFX90A-NEXT: s_mov_b32 s8, s11 11320; GFX90A-NEXT: s_mov_b32 s10, s7 11321; GFX90A-NEXT: ;;#ASMSTART 11322; GFX90A-NEXT: ; use s[8:10] 11323; GFX90A-NEXT: ;;#ASMEND 11324; GFX90A-NEXT: s_setpc_b64 s[30:31] 11325; 11326; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_3: 11327; GFX940: ; %bb.0: 11328; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11329; GFX940-NEXT: ;;#ASMSTART 11330; GFX940-NEXT: ; def s[0:3] 11331; GFX940-NEXT: ;;#ASMEND 11332; GFX940-NEXT: ;;#ASMSTART 11333; GFX940-NEXT: ; def s[4:7] 11334; GFX940-NEXT: ;;#ASMEND 11335; GFX940-NEXT: s_mov_b32 s8, s7 11336; GFX940-NEXT: s_mov_b32 s10, s3 11337; GFX940-NEXT: ;;#ASMSTART 11338; GFX940-NEXT: ; use s[8:10] 11339; GFX940-NEXT: ;;#ASMEND 11340; GFX940-NEXT: s_setpc_b64 s[30:31] 11341 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11342 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11343 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 3> 11344 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11345 ret void 11346} 11347 11348define void @s_shuffle_v3i32_v4i32__7_0_3() { 11349; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_0_3: 11350; GFX900: ; %bb.0: 11351; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11352; GFX900-NEXT: ;;#ASMSTART 11353; GFX900-NEXT: ; def s[8:11] 11354; GFX900-NEXT: ;;#ASMEND 11355; GFX900-NEXT: ;;#ASMSTART 11356; GFX900-NEXT: ; def s[4:7] 11357; GFX900-NEXT: ;;#ASMEND 11358; GFX900-NEXT: s_mov_b32 s8, s11 11359; GFX900-NEXT: s_mov_b32 s9, s4 11360; GFX900-NEXT: s_mov_b32 s10, s7 11361; GFX900-NEXT: ;;#ASMSTART 11362; GFX900-NEXT: ; use s[8:10] 11363; GFX900-NEXT: ;;#ASMEND 11364; GFX900-NEXT: s_setpc_b64 s[30:31] 11365; 11366; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_0_3: 11367; GFX90A: ; %bb.0: 11368; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11369; GFX90A-NEXT: ;;#ASMSTART 11370; GFX90A-NEXT: ; def s[8:11] 11371; GFX90A-NEXT: ;;#ASMEND 11372; GFX90A-NEXT: ;;#ASMSTART 11373; GFX90A-NEXT: ; def s[4:7] 11374; GFX90A-NEXT: ;;#ASMEND 11375; GFX90A-NEXT: s_mov_b32 s8, s11 11376; GFX90A-NEXT: s_mov_b32 s9, s4 11377; GFX90A-NEXT: s_mov_b32 s10, s7 11378; GFX90A-NEXT: ;;#ASMSTART 11379; GFX90A-NEXT: ; use s[8:10] 11380; GFX90A-NEXT: ;;#ASMEND 11381; GFX90A-NEXT: s_setpc_b64 s[30:31] 11382; 11383; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_3: 11384; GFX940: ; %bb.0: 11385; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11386; GFX940-NEXT: ;;#ASMSTART 11387; GFX940-NEXT: ; def s[0:3] 11388; GFX940-NEXT: ;;#ASMEND 11389; GFX940-NEXT: ;;#ASMSTART 11390; GFX940-NEXT: ; def s[4:7] 11391; GFX940-NEXT: ;;#ASMEND 11392; GFX940-NEXT: s_mov_b32 s8, s7 11393; GFX940-NEXT: s_mov_b32 s9, s0 11394; GFX940-NEXT: s_mov_b32 s10, s3 11395; GFX940-NEXT: ;;#ASMSTART 11396; GFX940-NEXT: ; use s[8:10] 11397; GFX940-NEXT: ;;#ASMEND 11398; GFX940-NEXT: s_setpc_b64 s[30:31] 11399 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11400 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11401 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 3> 11402 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11403 ret void 11404} 11405 11406define void @s_shuffle_v3i32_v4i32__7_1_3() { 11407; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_1_3: 11408; GFX900: ; %bb.0: 11409; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11410; GFX900-NEXT: ;;#ASMSTART 11411; GFX900-NEXT: ; def s[8:11] 11412; GFX900-NEXT: ;;#ASMEND 11413; GFX900-NEXT: ;;#ASMSTART 11414; GFX900-NEXT: ; def s[4:7] 11415; GFX900-NEXT: ;;#ASMEND 11416; GFX900-NEXT: s_mov_b32 s8, s7 11417; GFX900-NEXT: s_mov_b32 s10, s11 11418; GFX900-NEXT: ;;#ASMSTART 11419; GFX900-NEXT: ; use s[8:10] 11420; GFX900-NEXT: ;;#ASMEND 11421; GFX900-NEXT: s_setpc_b64 s[30:31] 11422; 11423; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_1_3: 11424; GFX90A: ; %bb.0: 11425; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11426; GFX90A-NEXT: ;;#ASMSTART 11427; GFX90A-NEXT: ; def s[8:11] 11428; GFX90A-NEXT: ;;#ASMEND 11429; GFX90A-NEXT: ;;#ASMSTART 11430; GFX90A-NEXT: ; def s[4:7] 11431; GFX90A-NEXT: ;;#ASMEND 11432; GFX90A-NEXT: s_mov_b32 s8, s7 11433; GFX90A-NEXT: s_mov_b32 s10, s11 11434; GFX90A-NEXT: ;;#ASMSTART 11435; GFX90A-NEXT: ; use s[8:10] 11436; GFX90A-NEXT: ;;#ASMEND 11437; GFX90A-NEXT: s_setpc_b64 s[30:31] 11438; 11439; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_3: 11440; GFX940: ; %bb.0: 11441; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11442; GFX940-NEXT: ;;#ASMSTART 11443; GFX940-NEXT: ; def s[8:11] 11444; GFX940-NEXT: ;;#ASMEND 11445; GFX940-NEXT: ;;#ASMSTART 11446; GFX940-NEXT: ; def s[0:3] 11447; GFX940-NEXT: ;;#ASMEND 11448; GFX940-NEXT: s_mov_b32 s8, s3 11449; GFX940-NEXT: s_mov_b32 s10, s11 11450; GFX940-NEXT: ;;#ASMSTART 11451; GFX940-NEXT: ; use s[8:10] 11452; GFX940-NEXT: ;;#ASMEND 11453; GFX940-NEXT: s_setpc_b64 s[30:31] 11454 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11455 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11456 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 3> 11457 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11458 ret void 11459} 11460 11461define void @s_shuffle_v3i32_v4i32__7_2_3() { 11462; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_2_3: 11463; GFX900: ; %bb.0: 11464; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11465; GFX900-NEXT: ;;#ASMSTART 11466; GFX900-NEXT: ; def s[8:11] 11467; GFX900-NEXT: ;;#ASMEND 11468; GFX900-NEXT: ;;#ASMSTART 11469; GFX900-NEXT: ; def s[4:7] 11470; GFX900-NEXT: ;;#ASMEND 11471; GFX900-NEXT: s_mov_b32 s8, s11 11472; GFX900-NEXT: s_mov_b32 s9, s6 11473; GFX900-NEXT: s_mov_b32 s10, s7 11474; GFX900-NEXT: ;;#ASMSTART 11475; GFX900-NEXT: ; use s[8:10] 11476; GFX900-NEXT: ;;#ASMEND 11477; GFX900-NEXT: s_setpc_b64 s[30:31] 11478; 11479; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_2_3: 11480; GFX90A: ; %bb.0: 11481; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11482; GFX90A-NEXT: ;;#ASMSTART 11483; GFX90A-NEXT: ; def s[8:11] 11484; GFX90A-NEXT: ;;#ASMEND 11485; GFX90A-NEXT: ;;#ASMSTART 11486; GFX90A-NEXT: ; def s[4:7] 11487; GFX90A-NEXT: ;;#ASMEND 11488; GFX90A-NEXT: s_mov_b32 s8, s11 11489; GFX90A-NEXT: s_mov_b32 s9, s6 11490; GFX90A-NEXT: s_mov_b32 s10, s7 11491; GFX90A-NEXT: ;;#ASMSTART 11492; GFX90A-NEXT: ; use s[8:10] 11493; GFX90A-NEXT: ;;#ASMEND 11494; GFX90A-NEXT: s_setpc_b64 s[30:31] 11495; 11496; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_3: 11497; GFX940: ; %bb.0: 11498; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11499; GFX940-NEXT: ;;#ASMSTART 11500; GFX940-NEXT: ; def s[0:3] 11501; GFX940-NEXT: ;;#ASMEND 11502; GFX940-NEXT: ;;#ASMSTART 11503; GFX940-NEXT: ; def s[4:7] 11504; GFX940-NEXT: ;;#ASMEND 11505; GFX940-NEXT: s_mov_b32 s8, s7 11506; GFX940-NEXT: s_mov_b32 s9, s2 11507; GFX940-NEXT: s_mov_b32 s10, s3 11508; GFX940-NEXT: ;;#ASMSTART 11509; GFX940-NEXT: ; use s[8:10] 11510; GFX940-NEXT: ;;#ASMEND 11511; GFX940-NEXT: s_setpc_b64 s[30:31] 11512 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11513 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11514 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 3> 11515 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11516 ret void 11517} 11518 11519define void @s_shuffle_v3i32_v4i32__7_4_3() { 11520; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_4_3: 11521; GFX900: ; %bb.0: 11522; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11523; GFX900-NEXT: ;;#ASMSTART 11524; GFX900-NEXT: ; def s[4:7] 11525; GFX900-NEXT: ;;#ASMEND 11526; GFX900-NEXT: ;;#ASMSTART 11527; GFX900-NEXT: ; def s[12:15] 11528; GFX900-NEXT: ;;#ASMEND 11529; GFX900-NEXT: s_mov_b32 s8, s15 11530; GFX900-NEXT: s_mov_b32 s9, s12 11531; GFX900-NEXT: s_mov_b32 s10, s7 11532; GFX900-NEXT: ;;#ASMSTART 11533; GFX900-NEXT: ; use s[8:10] 11534; GFX900-NEXT: ;;#ASMEND 11535; GFX900-NEXT: s_setpc_b64 s[30:31] 11536; 11537; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_4_3: 11538; GFX90A: ; %bb.0: 11539; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11540; GFX90A-NEXT: ;;#ASMSTART 11541; GFX90A-NEXT: ; def s[4:7] 11542; GFX90A-NEXT: ;;#ASMEND 11543; GFX90A-NEXT: ;;#ASMSTART 11544; GFX90A-NEXT: ; def s[12:15] 11545; GFX90A-NEXT: ;;#ASMEND 11546; GFX90A-NEXT: s_mov_b32 s8, s15 11547; GFX90A-NEXT: s_mov_b32 s9, s12 11548; GFX90A-NEXT: s_mov_b32 s10, s7 11549; GFX90A-NEXT: ;;#ASMSTART 11550; GFX90A-NEXT: ; use s[8:10] 11551; GFX90A-NEXT: ;;#ASMEND 11552; GFX90A-NEXT: s_setpc_b64 s[30:31] 11553; 11554; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_3: 11555; GFX940: ; %bb.0: 11556; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11557; GFX940-NEXT: ;;#ASMSTART 11558; GFX940-NEXT: ; def s[0:3] 11559; GFX940-NEXT: ;;#ASMEND 11560; GFX940-NEXT: ;;#ASMSTART 11561; GFX940-NEXT: ; def s[4:7] 11562; GFX940-NEXT: ;;#ASMEND 11563; GFX940-NEXT: s_mov_b32 s8, s7 11564; GFX940-NEXT: s_mov_b32 s9, s4 11565; GFX940-NEXT: s_mov_b32 s10, s3 11566; GFX940-NEXT: ;;#ASMSTART 11567; GFX940-NEXT: ; use s[8:10] 11568; GFX940-NEXT: ;;#ASMEND 11569; GFX940-NEXT: s_setpc_b64 s[30:31] 11570 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11571 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11572 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 3> 11573 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11574 ret void 11575} 11576 11577define void @s_shuffle_v3i32_v4i32__7_5_3() { 11578; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_5_3: 11579; GFX900: ; %bb.0: 11580; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11581; GFX900-NEXT: ;;#ASMSTART 11582; GFX900-NEXT: ; def s[8:11] 11583; GFX900-NEXT: ;;#ASMEND 11584; GFX900-NEXT: ;;#ASMSTART 11585; GFX900-NEXT: ; def s[4:7] 11586; GFX900-NEXT: ;;#ASMEND 11587; GFX900-NEXT: s_mov_b32 s8, s11 11588; GFX900-NEXT: s_mov_b32 s10, s7 11589; GFX900-NEXT: ;;#ASMSTART 11590; GFX900-NEXT: ; use s[8:10] 11591; GFX900-NEXT: ;;#ASMEND 11592; GFX900-NEXT: s_setpc_b64 s[30:31] 11593; 11594; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_5_3: 11595; GFX90A: ; %bb.0: 11596; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11597; GFX90A-NEXT: ;;#ASMSTART 11598; GFX90A-NEXT: ; def s[8:11] 11599; GFX90A-NEXT: ;;#ASMEND 11600; GFX90A-NEXT: ;;#ASMSTART 11601; GFX90A-NEXT: ; def s[4:7] 11602; GFX90A-NEXT: ;;#ASMEND 11603; GFX90A-NEXT: s_mov_b32 s8, s11 11604; GFX90A-NEXT: s_mov_b32 s10, s7 11605; GFX90A-NEXT: ;;#ASMSTART 11606; GFX90A-NEXT: ; use s[8:10] 11607; GFX90A-NEXT: ;;#ASMEND 11608; GFX90A-NEXT: s_setpc_b64 s[30:31] 11609; 11610; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_5_3: 11611; GFX940: ; %bb.0: 11612; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11613; GFX940-NEXT: ;;#ASMSTART 11614; GFX940-NEXT: ; def s[8:11] 11615; GFX940-NEXT: ;;#ASMEND 11616; GFX940-NEXT: ;;#ASMSTART 11617; GFX940-NEXT: ; def s[0:3] 11618; GFX940-NEXT: ;;#ASMEND 11619; GFX940-NEXT: s_mov_b32 s8, s11 11620; GFX940-NEXT: s_mov_b32 s10, s3 11621; GFX940-NEXT: ;;#ASMSTART 11622; GFX940-NEXT: ; use s[8:10] 11623; GFX940-NEXT: ;;#ASMEND 11624; GFX940-NEXT: s_setpc_b64 s[30:31] 11625 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11626 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11627 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 3> 11628 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11629 ret void 11630} 11631 11632define void @s_shuffle_v3i32_v4i32__7_6_3() { 11633; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_6_3: 11634; GFX900: ; %bb.0: 11635; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11636; GFX900-NEXT: ;;#ASMSTART 11637; GFX900-NEXT: ; def s[8:11] 11638; GFX900-NEXT: ;;#ASMEND 11639; GFX900-NEXT: ;;#ASMSTART 11640; GFX900-NEXT: ; def s[4:7] 11641; GFX900-NEXT: ;;#ASMEND 11642; GFX900-NEXT: s_mov_b32 s8, s11 11643; GFX900-NEXT: s_mov_b32 s9, s10 11644; GFX900-NEXT: s_mov_b32 s10, s7 11645; GFX900-NEXT: ;;#ASMSTART 11646; GFX900-NEXT: ; use s[8:10] 11647; GFX900-NEXT: ;;#ASMEND 11648; GFX900-NEXT: s_setpc_b64 s[30:31] 11649; 11650; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_6_3: 11651; GFX90A: ; %bb.0: 11652; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11653; GFX90A-NEXT: ;;#ASMSTART 11654; GFX90A-NEXT: ; def s[8:11] 11655; GFX90A-NEXT: ;;#ASMEND 11656; GFX90A-NEXT: ;;#ASMSTART 11657; GFX90A-NEXT: ; def s[4:7] 11658; GFX90A-NEXT: ;;#ASMEND 11659; GFX90A-NEXT: s_mov_b32 s8, s11 11660; GFX90A-NEXT: s_mov_b32 s9, s10 11661; GFX90A-NEXT: s_mov_b32 s10, s7 11662; GFX90A-NEXT: ;;#ASMSTART 11663; GFX90A-NEXT: ; use s[8:10] 11664; GFX90A-NEXT: ;;#ASMEND 11665; GFX90A-NEXT: s_setpc_b64 s[30:31] 11666; 11667; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_3: 11668; GFX940: ; %bb.0: 11669; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11670; GFX940-NEXT: ;;#ASMSTART 11671; GFX940-NEXT: ; def s[0:3] 11672; GFX940-NEXT: ;;#ASMEND 11673; GFX940-NEXT: ;;#ASMSTART 11674; GFX940-NEXT: ; def s[4:7] 11675; GFX940-NEXT: ;;#ASMEND 11676; GFX940-NEXT: s_mov_b32 s8, s7 11677; GFX940-NEXT: s_mov_b32 s9, s6 11678; GFX940-NEXT: s_mov_b32 s10, s3 11679; GFX940-NEXT: ;;#ASMSTART 11680; GFX940-NEXT: ; use s[8:10] 11681; GFX940-NEXT: ;;#ASMEND 11682; GFX940-NEXT: s_setpc_b64 s[30:31] 11683 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11684 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11685 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 3> 11686 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11687 ret void 11688} 11689 11690define void @s_shuffle_v3i32_v4i32__u_4_4() { 11691; GFX9-LABEL: s_shuffle_v3i32_v4i32__u_4_4: 11692; GFX9: ; %bb.0: 11693; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11694; GFX9-NEXT: ;;#ASMSTART 11695; GFX9-NEXT: ; use s[8:10] 11696; GFX9-NEXT: ;;#ASMEND 11697; GFX9-NEXT: s_setpc_b64 s[30:31] 11698 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11699 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 poison, i32 4, i32 4> 11700 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11701 ret void 11702} 11703 11704define void @s_shuffle_v3i32_v4i32__0_4_4() { 11705; GFX900-LABEL: s_shuffle_v3i32_v4i32__0_4_4: 11706; GFX900: ; %bb.0: 11707; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11708; GFX900-NEXT: ;;#ASMSTART 11709; GFX900-NEXT: ; def s[8:11] 11710; GFX900-NEXT: ;;#ASMEND 11711; GFX900-NEXT: ;;#ASMSTART 11712; GFX900-NEXT: ; use s[8:10] 11713; GFX900-NEXT: ;;#ASMEND 11714; GFX900-NEXT: s_setpc_b64 s[30:31] 11715; 11716; GFX90A-LABEL: s_shuffle_v3i32_v4i32__0_4_4: 11717; GFX90A: ; %bb.0: 11718; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11719; GFX90A-NEXT: ;;#ASMSTART 11720; GFX90A-NEXT: ; def s[8:11] 11721; GFX90A-NEXT: ;;#ASMEND 11722; GFX90A-NEXT: ;;#ASMSTART 11723; GFX90A-NEXT: ; use s[8:10] 11724; GFX90A-NEXT: ;;#ASMEND 11725; GFX90A-NEXT: s_setpc_b64 s[30:31] 11726; 11727; GFX940-LABEL: s_shuffle_v3i32_v4i32__0_4_4: 11728; GFX940: ; %bb.0: 11729; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11730; GFX940-NEXT: ;;#ASMSTART 11731; GFX940-NEXT: ; def s[8:11] 11732; GFX940-NEXT: ;;#ASMEND 11733; GFX940-NEXT: s_nop 0 11734; GFX940-NEXT: ;;#ASMSTART 11735; GFX940-NEXT: ; use s[8:10] 11736; GFX940-NEXT: ;;#ASMEND 11737; GFX940-NEXT: s_setpc_b64 s[30:31] 11738 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11739 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 0, i32 4, i32 4> 11740 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11741 ret void 11742} 11743 11744define void @s_shuffle_v3i32_v4i32__1_4_4() { 11745; GFX900-LABEL: s_shuffle_v3i32_v4i32__1_4_4: 11746; GFX900: ; %bb.0: 11747; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11748; GFX900-NEXT: ;;#ASMSTART 11749; GFX900-NEXT: ; def s[4:7] 11750; GFX900-NEXT: ;;#ASMEND 11751; GFX900-NEXT: s_mov_b32 s8, s5 11752; GFX900-NEXT: ;;#ASMSTART 11753; GFX900-NEXT: ; use s[8:10] 11754; GFX900-NEXT: ;;#ASMEND 11755; GFX900-NEXT: s_setpc_b64 s[30:31] 11756; 11757; GFX90A-LABEL: s_shuffle_v3i32_v4i32__1_4_4: 11758; GFX90A: ; %bb.0: 11759; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11760; GFX90A-NEXT: ;;#ASMSTART 11761; GFX90A-NEXT: ; def s[4:7] 11762; GFX90A-NEXT: ;;#ASMEND 11763; GFX90A-NEXT: s_mov_b32 s8, s5 11764; GFX90A-NEXT: ;;#ASMSTART 11765; GFX90A-NEXT: ; use s[8:10] 11766; GFX90A-NEXT: ;;#ASMEND 11767; GFX90A-NEXT: s_setpc_b64 s[30:31] 11768; 11769; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_4_4: 11770; GFX940: ; %bb.0: 11771; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11772; GFX940-NEXT: ;;#ASMSTART 11773; GFX940-NEXT: ; def s[0:3] 11774; GFX940-NEXT: ;;#ASMEND 11775; GFX940-NEXT: s_mov_b32 s8, s1 11776; GFX940-NEXT: ;;#ASMSTART 11777; GFX940-NEXT: ; use s[8:10] 11778; GFX940-NEXT: ;;#ASMEND 11779; GFX940-NEXT: s_setpc_b64 s[30:31] 11780 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11781 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 1, i32 4, i32 4> 11782 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11783 ret void 11784} 11785 11786define void @s_shuffle_v3i32_v4i32__2_4_4() { 11787; GFX900-LABEL: s_shuffle_v3i32_v4i32__2_4_4: 11788; GFX900: ; %bb.0: 11789; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11790; GFX900-NEXT: ;;#ASMSTART 11791; GFX900-NEXT: ; def s[4:7] 11792; GFX900-NEXT: ;;#ASMEND 11793; GFX900-NEXT: s_mov_b32 s8, s6 11794; GFX900-NEXT: ;;#ASMSTART 11795; GFX900-NEXT: ; use s[8:10] 11796; GFX900-NEXT: ;;#ASMEND 11797; GFX900-NEXT: s_setpc_b64 s[30:31] 11798; 11799; GFX90A-LABEL: s_shuffle_v3i32_v4i32__2_4_4: 11800; GFX90A: ; %bb.0: 11801; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11802; GFX90A-NEXT: ;;#ASMSTART 11803; GFX90A-NEXT: ; def s[4:7] 11804; GFX90A-NEXT: ;;#ASMEND 11805; GFX90A-NEXT: s_mov_b32 s8, s6 11806; GFX90A-NEXT: ;;#ASMSTART 11807; GFX90A-NEXT: ; use s[8:10] 11808; GFX90A-NEXT: ;;#ASMEND 11809; GFX90A-NEXT: s_setpc_b64 s[30:31] 11810; 11811; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_4_4: 11812; GFX940: ; %bb.0: 11813; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11814; GFX940-NEXT: ;;#ASMSTART 11815; GFX940-NEXT: ; def s[0:3] 11816; GFX940-NEXT: ;;#ASMEND 11817; GFX940-NEXT: s_mov_b32 s8, s2 11818; GFX940-NEXT: ;;#ASMSTART 11819; GFX940-NEXT: ; use s[8:10] 11820; GFX940-NEXT: ;;#ASMEND 11821; GFX940-NEXT: s_setpc_b64 s[30:31] 11822 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11823 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 2, i32 4, i32 4> 11824 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11825 ret void 11826} 11827 11828define void @s_shuffle_v3i32_v4i32__3_4_4() { 11829; GFX900-LABEL: s_shuffle_v3i32_v4i32__3_4_4: 11830; GFX900: ; %bb.0: 11831; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11832; GFX900-NEXT: ;;#ASMSTART 11833; GFX900-NEXT: ; def s[4:7] 11834; GFX900-NEXT: ;;#ASMEND 11835; GFX900-NEXT: s_mov_b32 s8, s7 11836; GFX900-NEXT: ;;#ASMSTART 11837; GFX900-NEXT: ; use s[8:10] 11838; GFX900-NEXT: ;;#ASMEND 11839; GFX900-NEXT: s_setpc_b64 s[30:31] 11840; 11841; GFX90A-LABEL: s_shuffle_v3i32_v4i32__3_4_4: 11842; GFX90A: ; %bb.0: 11843; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11844; GFX90A-NEXT: ;;#ASMSTART 11845; GFX90A-NEXT: ; def s[4:7] 11846; GFX90A-NEXT: ;;#ASMEND 11847; GFX90A-NEXT: s_mov_b32 s8, s7 11848; GFX90A-NEXT: ;;#ASMSTART 11849; GFX90A-NEXT: ; use s[8:10] 11850; GFX90A-NEXT: ;;#ASMEND 11851; GFX90A-NEXT: s_setpc_b64 s[30:31] 11852; 11853; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_4_4: 11854; GFX940: ; %bb.0: 11855; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11856; GFX940-NEXT: ;;#ASMSTART 11857; GFX940-NEXT: ; def s[0:3] 11858; GFX940-NEXT: ;;#ASMEND 11859; GFX940-NEXT: s_mov_b32 s8, s3 11860; GFX940-NEXT: ;;#ASMSTART 11861; GFX940-NEXT: ; use s[8:10] 11862; GFX940-NEXT: ;;#ASMEND 11863; GFX940-NEXT: s_setpc_b64 s[30:31] 11864 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11865 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 3, i32 4, i32 4> 11866 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11867 ret void 11868} 11869 11870define void @s_shuffle_v3i32_v4i32__4_4_4() { 11871; GFX9-LABEL: s_shuffle_v3i32_v4i32__4_4_4: 11872; GFX9: ; %bb.0: 11873; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11874; GFX9-NEXT: ;;#ASMSTART 11875; GFX9-NEXT: ; use s[8:10] 11876; GFX9-NEXT: ;;#ASMEND 11877; GFX9-NEXT: s_setpc_b64 s[30:31] 11878 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11879 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> poison, <3 x i32> <i32 4, i32 4, i32 4> 11880 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11881 ret void 11882} 11883 11884define void @s_shuffle_v3i32_v4i32__5_4_4() { 11885; GFX900-LABEL: s_shuffle_v3i32_v4i32__5_4_4: 11886; GFX900: ; %bb.0: 11887; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11888; GFX900-NEXT: ;;#ASMSTART 11889; GFX900-NEXT: ; def s[4:7] 11890; GFX900-NEXT: ;;#ASMEND 11891; GFX900-NEXT: s_mov_b32 s8, s5 11892; GFX900-NEXT: s_mov_b32 s9, s4 11893; GFX900-NEXT: s_mov_b32 s10, s4 11894; GFX900-NEXT: ;;#ASMSTART 11895; GFX900-NEXT: ; use s[8:10] 11896; GFX900-NEXT: ;;#ASMEND 11897; GFX900-NEXT: s_setpc_b64 s[30:31] 11898; 11899; GFX90A-LABEL: s_shuffle_v3i32_v4i32__5_4_4: 11900; GFX90A: ; %bb.0: 11901; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11902; GFX90A-NEXT: ;;#ASMSTART 11903; GFX90A-NEXT: ; def s[4:7] 11904; GFX90A-NEXT: ;;#ASMEND 11905; GFX90A-NEXT: s_mov_b32 s8, s5 11906; GFX90A-NEXT: s_mov_b32 s9, s4 11907; GFX90A-NEXT: s_mov_b32 s10, s4 11908; GFX90A-NEXT: ;;#ASMSTART 11909; GFX90A-NEXT: ; use s[8:10] 11910; GFX90A-NEXT: ;;#ASMEND 11911; GFX90A-NEXT: s_setpc_b64 s[30:31] 11912; 11913; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_4_4: 11914; GFX940: ; %bb.0: 11915; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11916; GFX940-NEXT: ;;#ASMSTART 11917; GFX940-NEXT: ; def s[0:3] 11918; GFX940-NEXT: ;;#ASMEND 11919; GFX940-NEXT: s_mov_b32 s8, s1 11920; GFX940-NEXT: s_mov_b32 s9, s0 11921; GFX940-NEXT: s_mov_b32 s10, s0 11922; GFX940-NEXT: ;;#ASMSTART 11923; GFX940-NEXT: ; use s[8:10] 11924; GFX940-NEXT: ;;#ASMEND 11925; GFX940-NEXT: s_setpc_b64 s[30:31] 11926 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11927 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11928 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 4, i32 4> 11929 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11930 ret void 11931} 11932 11933define void @s_shuffle_v3i32_v4i32__6_4_4() { 11934; GFX900-LABEL: s_shuffle_v3i32_v4i32__6_4_4: 11935; GFX900: ; %bb.0: 11936; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11937; GFX900-NEXT: ;;#ASMSTART 11938; GFX900-NEXT: ; def s[4:7] 11939; GFX900-NEXT: ;;#ASMEND 11940; GFX900-NEXT: s_mov_b32 s8, s6 11941; GFX900-NEXT: s_mov_b32 s9, s4 11942; GFX900-NEXT: s_mov_b32 s10, s4 11943; GFX900-NEXT: ;;#ASMSTART 11944; GFX900-NEXT: ; use s[8:10] 11945; GFX900-NEXT: ;;#ASMEND 11946; GFX900-NEXT: s_setpc_b64 s[30:31] 11947; 11948; GFX90A-LABEL: s_shuffle_v3i32_v4i32__6_4_4: 11949; GFX90A: ; %bb.0: 11950; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11951; GFX90A-NEXT: ;;#ASMSTART 11952; GFX90A-NEXT: ; def s[4:7] 11953; GFX90A-NEXT: ;;#ASMEND 11954; GFX90A-NEXT: s_mov_b32 s8, s6 11955; GFX90A-NEXT: s_mov_b32 s9, s4 11956; GFX90A-NEXT: s_mov_b32 s10, s4 11957; GFX90A-NEXT: ;;#ASMSTART 11958; GFX90A-NEXT: ; use s[8:10] 11959; GFX90A-NEXT: ;;#ASMEND 11960; GFX90A-NEXT: s_setpc_b64 s[30:31] 11961; 11962; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_4_4: 11963; GFX940: ; %bb.0: 11964; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11965; GFX940-NEXT: ;;#ASMSTART 11966; GFX940-NEXT: ; def s[0:3] 11967; GFX940-NEXT: ;;#ASMEND 11968; GFX940-NEXT: s_mov_b32 s8, s2 11969; GFX940-NEXT: s_mov_b32 s9, s0 11970; GFX940-NEXT: s_mov_b32 s10, s0 11971; GFX940-NEXT: ;;#ASMSTART 11972; GFX940-NEXT: ; use s[8:10] 11973; GFX940-NEXT: ;;#ASMEND 11974; GFX940-NEXT: s_setpc_b64 s[30:31] 11975 %vec0 = call <4 x i32> asm "; def $0", "=s"() 11976 %vec1 = call <4 x i32> asm "; def $0", "=s"() 11977 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 4, i32 4> 11978 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 11979 ret void 11980} 11981 11982define void @s_shuffle_v3i32_v4i32__7_4_4() { 11983; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_4_4: 11984; GFX900: ; %bb.0: 11985; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11986; GFX900-NEXT: ;;#ASMSTART 11987; GFX900-NEXT: ; def s[4:7] 11988; GFX900-NEXT: ;;#ASMEND 11989; GFX900-NEXT: s_mov_b32 s8, s7 11990; GFX900-NEXT: s_mov_b32 s9, s4 11991; GFX900-NEXT: s_mov_b32 s10, s4 11992; GFX900-NEXT: ;;#ASMSTART 11993; GFX900-NEXT: ; use s[8:10] 11994; GFX900-NEXT: ;;#ASMEND 11995; GFX900-NEXT: s_setpc_b64 s[30:31] 11996; 11997; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_4_4: 11998; GFX90A: ; %bb.0: 11999; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12000; GFX90A-NEXT: ;;#ASMSTART 12001; GFX90A-NEXT: ; def s[4:7] 12002; GFX90A-NEXT: ;;#ASMEND 12003; GFX90A-NEXT: s_mov_b32 s8, s7 12004; GFX90A-NEXT: s_mov_b32 s9, s4 12005; GFX90A-NEXT: s_mov_b32 s10, s4 12006; GFX90A-NEXT: ;;#ASMSTART 12007; GFX90A-NEXT: ; use s[8:10] 12008; GFX90A-NEXT: ;;#ASMEND 12009; GFX90A-NEXT: s_setpc_b64 s[30:31] 12010; 12011; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_4: 12012; GFX940: ; %bb.0: 12013; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12014; GFX940-NEXT: ;;#ASMSTART 12015; GFX940-NEXT: ; def s[0:3] 12016; GFX940-NEXT: ;;#ASMEND 12017; GFX940-NEXT: s_mov_b32 s8, s3 12018; GFX940-NEXT: s_mov_b32 s9, s0 12019; GFX940-NEXT: s_mov_b32 s10, s0 12020; GFX940-NEXT: ;;#ASMSTART 12021; GFX940-NEXT: ; use s[8:10] 12022; GFX940-NEXT: ;;#ASMEND 12023; GFX940-NEXT: s_setpc_b64 s[30:31] 12024 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12025 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12026 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 4> 12027 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12028 ret void 12029} 12030 12031define void @s_shuffle_v3i32_v4i32__7_u_4() { 12032; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_4: 12033; GFX900: ; %bb.0: 12034; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12035; GFX900-NEXT: ;;#ASMSTART 12036; GFX900-NEXT: ; def s[4:7] 12037; GFX900-NEXT: ;;#ASMEND 12038; GFX900-NEXT: s_mov_b32 s8, s7 12039; GFX900-NEXT: s_mov_b32 s10, s4 12040; GFX900-NEXT: ;;#ASMSTART 12041; GFX900-NEXT: ; use s[8:10] 12042; GFX900-NEXT: ;;#ASMEND 12043; GFX900-NEXT: s_setpc_b64 s[30:31] 12044; 12045; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_4: 12046; GFX90A: ; %bb.0: 12047; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12048; GFX90A-NEXT: ;;#ASMSTART 12049; GFX90A-NEXT: ; def s[4:7] 12050; GFX90A-NEXT: ;;#ASMEND 12051; GFX90A-NEXT: s_mov_b32 s8, s7 12052; GFX90A-NEXT: s_mov_b32 s10, s4 12053; GFX90A-NEXT: ;;#ASMSTART 12054; GFX90A-NEXT: ; use s[8:10] 12055; GFX90A-NEXT: ;;#ASMEND 12056; GFX90A-NEXT: s_setpc_b64 s[30:31] 12057; 12058; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_4: 12059; GFX940: ; %bb.0: 12060; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12061; GFX940-NEXT: ;;#ASMSTART 12062; GFX940-NEXT: ; def s[0:3] 12063; GFX940-NEXT: ;;#ASMEND 12064; GFX940-NEXT: s_mov_b32 s8, s3 12065; GFX940-NEXT: s_mov_b32 s10, s0 12066; GFX940-NEXT: ;;#ASMSTART 12067; GFX940-NEXT: ; use s[8:10] 12068; GFX940-NEXT: ;;#ASMEND 12069; GFX940-NEXT: s_setpc_b64 s[30:31] 12070 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12071 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12072 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 4> 12073 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12074 ret void 12075} 12076 12077define void @s_shuffle_v3i32_v4i32__7_0_4() { 12078; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_0_4: 12079; GFX900: ; %bb.0: 12080; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12081; GFX900-NEXT: ;;#ASMSTART 12082; GFX900-NEXT: ; def s[4:7] 12083; GFX900-NEXT: ;;#ASMEND 12084; GFX900-NEXT: ;;#ASMSTART 12085; GFX900-NEXT: ; def s[12:15] 12086; GFX900-NEXT: ;;#ASMEND 12087; GFX900-NEXT: s_mov_b32 s8, s15 12088; GFX900-NEXT: s_mov_b32 s9, s4 12089; GFX900-NEXT: s_mov_b32 s10, s12 12090; GFX900-NEXT: ;;#ASMSTART 12091; GFX900-NEXT: ; use s[8:10] 12092; GFX900-NEXT: ;;#ASMEND 12093; GFX900-NEXT: s_setpc_b64 s[30:31] 12094; 12095; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_0_4: 12096; GFX90A: ; %bb.0: 12097; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12098; GFX90A-NEXT: ;;#ASMSTART 12099; GFX90A-NEXT: ; def s[4:7] 12100; GFX90A-NEXT: ;;#ASMEND 12101; GFX90A-NEXT: ;;#ASMSTART 12102; GFX90A-NEXT: ; def s[12:15] 12103; GFX90A-NEXT: ;;#ASMEND 12104; GFX90A-NEXT: s_mov_b32 s8, s15 12105; GFX90A-NEXT: s_mov_b32 s9, s4 12106; GFX90A-NEXT: s_mov_b32 s10, s12 12107; GFX90A-NEXT: ;;#ASMSTART 12108; GFX90A-NEXT: ; use s[8:10] 12109; GFX90A-NEXT: ;;#ASMEND 12110; GFX90A-NEXT: s_setpc_b64 s[30:31] 12111; 12112; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_4: 12113; GFX940: ; %bb.0: 12114; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12115; GFX940-NEXT: ;;#ASMSTART 12116; GFX940-NEXT: ; def s[0:3] 12117; GFX940-NEXT: ;;#ASMEND 12118; GFX940-NEXT: ;;#ASMSTART 12119; GFX940-NEXT: ; def s[4:7] 12120; GFX940-NEXT: ;;#ASMEND 12121; GFX940-NEXT: s_mov_b32 s8, s7 12122; GFX940-NEXT: s_mov_b32 s9, s0 12123; GFX940-NEXT: s_mov_b32 s10, s4 12124; GFX940-NEXT: ;;#ASMSTART 12125; GFX940-NEXT: ; use s[8:10] 12126; GFX940-NEXT: ;;#ASMEND 12127; GFX940-NEXT: s_setpc_b64 s[30:31] 12128 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12129 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12130 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 4> 12131 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12132 ret void 12133} 12134 12135define void @s_shuffle_v3i32_v4i32__7_1_4() { 12136; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_1_4: 12137; GFX900: ; %bb.0: 12138; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12139; GFX900-NEXT: ;;#ASMSTART 12140; GFX900-NEXT: ; def s[8:11] 12141; GFX900-NEXT: ;;#ASMEND 12142; GFX900-NEXT: ;;#ASMSTART 12143; GFX900-NEXT: ; def s[4:7] 12144; GFX900-NEXT: ;;#ASMEND 12145; GFX900-NEXT: s_mov_b32 s8, s7 12146; GFX900-NEXT: s_mov_b32 s10, s4 12147; GFX900-NEXT: ;;#ASMSTART 12148; GFX900-NEXT: ; use s[8:10] 12149; GFX900-NEXT: ;;#ASMEND 12150; GFX900-NEXT: s_setpc_b64 s[30:31] 12151; 12152; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_1_4: 12153; GFX90A: ; %bb.0: 12154; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12155; GFX90A-NEXT: ;;#ASMSTART 12156; GFX90A-NEXT: ; def s[8:11] 12157; GFX90A-NEXT: ;;#ASMEND 12158; GFX90A-NEXT: ;;#ASMSTART 12159; GFX90A-NEXT: ; def s[4:7] 12160; GFX90A-NEXT: ;;#ASMEND 12161; GFX90A-NEXT: s_mov_b32 s8, s7 12162; GFX90A-NEXT: s_mov_b32 s10, s4 12163; GFX90A-NEXT: ;;#ASMSTART 12164; GFX90A-NEXT: ; use s[8:10] 12165; GFX90A-NEXT: ;;#ASMEND 12166; GFX90A-NEXT: s_setpc_b64 s[30:31] 12167; 12168; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_4: 12169; GFX940: ; %bb.0: 12170; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12171; GFX940-NEXT: ;;#ASMSTART 12172; GFX940-NEXT: ; def s[8:11] 12173; GFX940-NEXT: ;;#ASMEND 12174; GFX940-NEXT: ;;#ASMSTART 12175; GFX940-NEXT: ; def s[0:3] 12176; GFX940-NEXT: ;;#ASMEND 12177; GFX940-NEXT: s_mov_b32 s8, s3 12178; GFX940-NEXT: s_mov_b32 s10, s0 12179; GFX940-NEXT: ;;#ASMSTART 12180; GFX940-NEXT: ; use s[8:10] 12181; GFX940-NEXT: ;;#ASMEND 12182; GFX940-NEXT: s_setpc_b64 s[30:31] 12183 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12184 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12185 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 4> 12186 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12187 ret void 12188} 12189 12190define void @s_shuffle_v3i32_v4i32__7_2_4() { 12191; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_2_4: 12192; GFX900: ; %bb.0: 12193; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12194; GFX900-NEXT: ;;#ASMSTART 12195; GFX900-NEXT: ; def s[4:7] 12196; GFX900-NEXT: ;;#ASMEND 12197; GFX900-NEXT: ;;#ASMSTART 12198; GFX900-NEXT: ; def s[12:15] 12199; GFX900-NEXT: ;;#ASMEND 12200; GFX900-NEXT: s_mov_b32 s8, s15 12201; GFX900-NEXT: s_mov_b32 s9, s6 12202; GFX900-NEXT: s_mov_b32 s10, s12 12203; GFX900-NEXT: ;;#ASMSTART 12204; GFX900-NEXT: ; use s[8:10] 12205; GFX900-NEXT: ;;#ASMEND 12206; GFX900-NEXT: s_setpc_b64 s[30:31] 12207; 12208; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_2_4: 12209; GFX90A: ; %bb.0: 12210; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12211; GFX90A-NEXT: ;;#ASMSTART 12212; GFX90A-NEXT: ; def s[4:7] 12213; GFX90A-NEXT: ;;#ASMEND 12214; GFX90A-NEXT: ;;#ASMSTART 12215; GFX90A-NEXT: ; def s[12:15] 12216; GFX90A-NEXT: ;;#ASMEND 12217; GFX90A-NEXT: s_mov_b32 s8, s15 12218; GFX90A-NEXT: s_mov_b32 s9, s6 12219; GFX90A-NEXT: s_mov_b32 s10, s12 12220; GFX90A-NEXT: ;;#ASMSTART 12221; GFX90A-NEXT: ; use s[8:10] 12222; GFX90A-NEXT: ;;#ASMEND 12223; GFX90A-NEXT: s_setpc_b64 s[30:31] 12224; 12225; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_4: 12226; GFX940: ; %bb.0: 12227; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12228; GFX940-NEXT: ;;#ASMSTART 12229; GFX940-NEXT: ; def s[0:3] 12230; GFX940-NEXT: ;;#ASMEND 12231; GFX940-NEXT: ;;#ASMSTART 12232; GFX940-NEXT: ; def s[4:7] 12233; GFX940-NEXT: ;;#ASMEND 12234; GFX940-NEXT: s_mov_b32 s8, s7 12235; GFX940-NEXT: s_mov_b32 s9, s2 12236; GFX940-NEXT: s_mov_b32 s10, s4 12237; GFX940-NEXT: ;;#ASMSTART 12238; GFX940-NEXT: ; use s[8:10] 12239; GFX940-NEXT: ;;#ASMEND 12240; GFX940-NEXT: s_setpc_b64 s[30:31] 12241 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12242 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12243 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 4> 12244 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12245 ret void 12246} 12247 12248define void @s_shuffle_v3i32_v4i32__7_3_4() { 12249; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_3_4: 12250; GFX900: ; %bb.0: 12251; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12252; GFX900-NEXT: ;;#ASMSTART 12253; GFX900-NEXT: ; def s[4:7] 12254; GFX900-NEXT: ;;#ASMEND 12255; GFX900-NEXT: ;;#ASMSTART 12256; GFX900-NEXT: ; def s[12:15] 12257; GFX900-NEXT: ;;#ASMEND 12258; GFX900-NEXT: s_mov_b32 s8, s15 12259; GFX900-NEXT: s_mov_b32 s9, s7 12260; GFX900-NEXT: s_mov_b32 s10, s12 12261; GFX900-NEXT: ;;#ASMSTART 12262; GFX900-NEXT: ; use s[8:10] 12263; GFX900-NEXT: ;;#ASMEND 12264; GFX900-NEXT: s_setpc_b64 s[30:31] 12265; 12266; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_3_4: 12267; GFX90A: ; %bb.0: 12268; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12269; GFX90A-NEXT: ;;#ASMSTART 12270; GFX90A-NEXT: ; def s[4:7] 12271; GFX90A-NEXT: ;;#ASMEND 12272; GFX90A-NEXT: ;;#ASMSTART 12273; GFX90A-NEXT: ; def s[12:15] 12274; GFX90A-NEXT: ;;#ASMEND 12275; GFX90A-NEXT: s_mov_b32 s8, s15 12276; GFX90A-NEXT: s_mov_b32 s9, s7 12277; GFX90A-NEXT: s_mov_b32 s10, s12 12278; GFX90A-NEXT: ;;#ASMSTART 12279; GFX90A-NEXT: ; use s[8:10] 12280; GFX90A-NEXT: ;;#ASMEND 12281; GFX90A-NEXT: s_setpc_b64 s[30:31] 12282; 12283; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_4: 12284; GFX940: ; %bb.0: 12285; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12286; GFX940-NEXT: ;;#ASMSTART 12287; GFX940-NEXT: ; def s[0:3] 12288; GFX940-NEXT: ;;#ASMEND 12289; GFX940-NEXT: ;;#ASMSTART 12290; GFX940-NEXT: ; def s[4:7] 12291; GFX940-NEXT: ;;#ASMEND 12292; GFX940-NEXT: s_mov_b32 s8, s7 12293; GFX940-NEXT: s_mov_b32 s9, s3 12294; GFX940-NEXT: s_mov_b32 s10, s4 12295; GFX940-NEXT: ;;#ASMSTART 12296; GFX940-NEXT: ; use s[8:10] 12297; GFX940-NEXT: ;;#ASMEND 12298; GFX940-NEXT: s_setpc_b64 s[30:31] 12299 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12300 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12301 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 4> 12302 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12303 ret void 12304} 12305 12306define void @s_shuffle_v3i32_v4i32__7_5_4() { 12307; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_5_4: 12308; GFX900: ; %bb.0: 12309; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12310; GFX900-NEXT: ;;#ASMSTART 12311; GFX900-NEXT: ; def s[4:7] 12312; GFX900-NEXT: ;;#ASMEND 12313; GFX900-NEXT: s_mov_b32 s8, s7 12314; GFX900-NEXT: s_mov_b32 s9, s5 12315; GFX900-NEXT: s_mov_b32 s10, s4 12316; GFX900-NEXT: ;;#ASMSTART 12317; GFX900-NEXT: ; use s[8:10] 12318; GFX900-NEXT: ;;#ASMEND 12319; GFX900-NEXT: s_setpc_b64 s[30:31] 12320; 12321; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_5_4: 12322; GFX90A: ; %bb.0: 12323; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12324; GFX90A-NEXT: ;;#ASMSTART 12325; GFX90A-NEXT: ; def s[4:7] 12326; GFX90A-NEXT: ;;#ASMEND 12327; GFX90A-NEXT: s_mov_b32 s8, s7 12328; GFX90A-NEXT: s_mov_b32 s9, s5 12329; GFX90A-NEXT: s_mov_b32 s10, s4 12330; GFX90A-NEXT: ;;#ASMSTART 12331; GFX90A-NEXT: ; use s[8:10] 12332; GFX90A-NEXT: ;;#ASMEND 12333; GFX90A-NEXT: s_setpc_b64 s[30:31] 12334; 12335; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_5_4: 12336; GFX940: ; %bb.0: 12337; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12338; GFX940-NEXT: ;;#ASMSTART 12339; GFX940-NEXT: ; def s[0:3] 12340; GFX940-NEXT: ;;#ASMEND 12341; GFX940-NEXT: s_mov_b32 s8, s3 12342; GFX940-NEXT: s_mov_b32 s9, s1 12343; GFX940-NEXT: s_mov_b32 s10, s0 12344; GFX940-NEXT: ;;#ASMSTART 12345; GFX940-NEXT: ; use s[8:10] 12346; GFX940-NEXT: ;;#ASMEND 12347; GFX940-NEXT: s_setpc_b64 s[30:31] 12348 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12349 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12350 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 4> 12351 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12352 ret void 12353} 12354 12355define void @s_shuffle_v3i32_v4i32__7_6_4() { 12356; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_6_4: 12357; GFX900: ; %bb.0: 12358; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12359; GFX900-NEXT: ;;#ASMSTART 12360; GFX900-NEXT: ; def s[4:7] 12361; GFX900-NEXT: ;;#ASMEND 12362; GFX900-NEXT: s_mov_b32 s8, s7 12363; GFX900-NEXT: s_mov_b32 s9, s6 12364; GFX900-NEXT: s_mov_b32 s10, s4 12365; GFX900-NEXT: ;;#ASMSTART 12366; GFX900-NEXT: ; use s[8:10] 12367; GFX900-NEXT: ;;#ASMEND 12368; GFX900-NEXT: s_setpc_b64 s[30:31] 12369; 12370; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_6_4: 12371; GFX90A: ; %bb.0: 12372; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12373; GFX90A-NEXT: ;;#ASMSTART 12374; GFX90A-NEXT: ; def s[4:7] 12375; GFX90A-NEXT: ;;#ASMEND 12376; GFX90A-NEXT: s_mov_b32 s8, s7 12377; GFX90A-NEXT: s_mov_b32 s9, s6 12378; GFX90A-NEXT: s_mov_b32 s10, s4 12379; GFX90A-NEXT: ;;#ASMSTART 12380; GFX90A-NEXT: ; use s[8:10] 12381; GFX90A-NEXT: ;;#ASMEND 12382; GFX90A-NEXT: s_setpc_b64 s[30:31] 12383; 12384; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_4: 12385; GFX940: ; %bb.0: 12386; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12387; GFX940-NEXT: ;;#ASMSTART 12388; GFX940-NEXT: ; def s[0:3] 12389; GFX940-NEXT: ;;#ASMEND 12390; GFX940-NEXT: s_mov_b32 s8, s3 12391; GFX940-NEXT: s_mov_b32 s9, s2 12392; GFX940-NEXT: s_mov_b32 s10, s0 12393; GFX940-NEXT: ;;#ASMSTART 12394; GFX940-NEXT: ; use s[8:10] 12395; GFX940-NEXT: ;;#ASMEND 12396; GFX940-NEXT: s_setpc_b64 s[30:31] 12397 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12398 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12399 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 4> 12400 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12401 ret void 12402} 12403 12404define void @s_shuffle_v3i32_v4i32__u_5_5() { 12405; GFX9-LABEL: s_shuffle_v3i32_v4i32__u_5_5: 12406; GFX9: ; %bb.0: 12407; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12408; GFX9-NEXT: ;;#ASMSTART 12409; GFX9-NEXT: ; def s[8:11] 12410; GFX9-NEXT: ;;#ASMEND 12411; GFX9-NEXT: s_mov_b32 s10, s9 12412; GFX9-NEXT: ;;#ASMSTART 12413; GFX9-NEXT: ; use s[8:10] 12414; GFX9-NEXT: ;;#ASMEND 12415; GFX9-NEXT: s_setpc_b64 s[30:31] 12416 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12417 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12418 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 poison, i32 5, i32 5> 12419 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12420 ret void 12421} 12422 12423define void @s_shuffle_v3i32_v4i32__0_5_5() { 12424; GFX900-LABEL: s_shuffle_v3i32_v4i32__0_5_5: 12425; GFX900: ; %bb.0: 12426; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12427; GFX900-NEXT: ;;#ASMSTART 12428; GFX900-NEXT: ; def s[8:11] 12429; GFX900-NEXT: ;;#ASMEND 12430; GFX900-NEXT: ;;#ASMSTART 12431; GFX900-NEXT: ; def s[4:7] 12432; GFX900-NEXT: ;;#ASMEND 12433; GFX900-NEXT: s_mov_b32 s9, s5 12434; GFX900-NEXT: s_mov_b32 s10, s5 12435; GFX900-NEXT: ;;#ASMSTART 12436; GFX900-NEXT: ; use s[8:10] 12437; GFX900-NEXT: ;;#ASMEND 12438; GFX900-NEXT: s_setpc_b64 s[30:31] 12439; 12440; GFX90A-LABEL: s_shuffle_v3i32_v4i32__0_5_5: 12441; GFX90A: ; %bb.0: 12442; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12443; GFX90A-NEXT: ;;#ASMSTART 12444; GFX90A-NEXT: ; def s[8:11] 12445; GFX90A-NEXT: ;;#ASMEND 12446; GFX90A-NEXT: ;;#ASMSTART 12447; GFX90A-NEXT: ; def s[4:7] 12448; GFX90A-NEXT: ;;#ASMEND 12449; GFX90A-NEXT: s_mov_b32 s9, s5 12450; GFX90A-NEXT: s_mov_b32 s10, s5 12451; GFX90A-NEXT: ;;#ASMSTART 12452; GFX90A-NEXT: ; use s[8:10] 12453; GFX90A-NEXT: ;;#ASMEND 12454; GFX90A-NEXT: s_setpc_b64 s[30:31] 12455; 12456; GFX940-LABEL: s_shuffle_v3i32_v4i32__0_5_5: 12457; GFX940: ; %bb.0: 12458; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12459; GFX940-NEXT: ;;#ASMSTART 12460; GFX940-NEXT: ; def s[8:11] 12461; GFX940-NEXT: ;;#ASMEND 12462; GFX940-NEXT: ;;#ASMSTART 12463; GFX940-NEXT: ; def s[0:3] 12464; GFX940-NEXT: ;;#ASMEND 12465; GFX940-NEXT: s_mov_b32 s9, s1 12466; GFX940-NEXT: s_mov_b32 s10, s1 12467; GFX940-NEXT: ;;#ASMSTART 12468; GFX940-NEXT: ; use s[8:10] 12469; GFX940-NEXT: ;;#ASMEND 12470; GFX940-NEXT: s_setpc_b64 s[30:31] 12471 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12472 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12473 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 0, i32 5, i32 5> 12474 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12475 ret void 12476} 12477 12478define void @s_shuffle_v3i32_v4i32__1_5_5() { 12479; GFX900-LABEL: s_shuffle_v3i32_v4i32__1_5_5: 12480; GFX900: ; %bb.0: 12481; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12482; GFX900-NEXT: ;;#ASMSTART 12483; GFX900-NEXT: ; def s[8:11] 12484; GFX900-NEXT: ;;#ASMEND 12485; GFX900-NEXT: ;;#ASMSTART 12486; GFX900-NEXT: ; def s[4:7] 12487; GFX900-NEXT: ;;#ASMEND 12488; GFX900-NEXT: s_mov_b32 s8, s5 12489; GFX900-NEXT: s_mov_b32 s10, s9 12490; GFX900-NEXT: ;;#ASMSTART 12491; GFX900-NEXT: ; use s[8:10] 12492; GFX900-NEXT: ;;#ASMEND 12493; GFX900-NEXT: s_setpc_b64 s[30:31] 12494; 12495; GFX90A-LABEL: s_shuffle_v3i32_v4i32__1_5_5: 12496; GFX90A: ; %bb.0: 12497; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12498; GFX90A-NEXT: ;;#ASMSTART 12499; GFX90A-NEXT: ; def s[8:11] 12500; GFX90A-NEXT: ;;#ASMEND 12501; GFX90A-NEXT: ;;#ASMSTART 12502; GFX90A-NEXT: ; def s[4:7] 12503; GFX90A-NEXT: ;;#ASMEND 12504; GFX90A-NEXT: s_mov_b32 s8, s5 12505; GFX90A-NEXT: s_mov_b32 s10, s9 12506; GFX90A-NEXT: ;;#ASMSTART 12507; GFX90A-NEXT: ; use s[8:10] 12508; GFX90A-NEXT: ;;#ASMEND 12509; GFX90A-NEXT: s_setpc_b64 s[30:31] 12510; 12511; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_5_5: 12512; GFX940: ; %bb.0: 12513; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12514; GFX940-NEXT: ;;#ASMSTART 12515; GFX940-NEXT: ; def s[8:11] 12516; GFX940-NEXT: ;;#ASMEND 12517; GFX940-NEXT: ;;#ASMSTART 12518; GFX940-NEXT: ; def s[0:3] 12519; GFX940-NEXT: ;;#ASMEND 12520; GFX940-NEXT: s_mov_b32 s8, s1 12521; GFX940-NEXT: s_mov_b32 s10, s9 12522; GFX940-NEXT: ;;#ASMSTART 12523; GFX940-NEXT: ; use s[8:10] 12524; GFX940-NEXT: ;;#ASMEND 12525; GFX940-NEXT: s_setpc_b64 s[30:31] 12526 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12527 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12528 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 1, i32 5, i32 5> 12529 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12530 ret void 12531} 12532 12533define void @s_shuffle_v3i32_v4i32__2_5_5() { 12534; GFX900-LABEL: s_shuffle_v3i32_v4i32__2_5_5: 12535; GFX900: ; %bb.0: 12536; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12537; GFX900-NEXT: ;;#ASMSTART 12538; GFX900-NEXT: ; def s[8:11] 12539; GFX900-NEXT: ;;#ASMEND 12540; GFX900-NEXT: ;;#ASMSTART 12541; GFX900-NEXT: ; def s[4:7] 12542; GFX900-NEXT: ;;#ASMEND 12543; GFX900-NEXT: s_mov_b32 s8, s6 12544; GFX900-NEXT: s_mov_b32 s10, s9 12545; GFX900-NEXT: ;;#ASMSTART 12546; GFX900-NEXT: ; use s[8:10] 12547; GFX900-NEXT: ;;#ASMEND 12548; GFX900-NEXT: s_setpc_b64 s[30:31] 12549; 12550; GFX90A-LABEL: s_shuffle_v3i32_v4i32__2_5_5: 12551; GFX90A: ; %bb.0: 12552; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12553; GFX90A-NEXT: ;;#ASMSTART 12554; GFX90A-NEXT: ; def s[8:11] 12555; GFX90A-NEXT: ;;#ASMEND 12556; GFX90A-NEXT: ;;#ASMSTART 12557; GFX90A-NEXT: ; def s[4:7] 12558; GFX90A-NEXT: ;;#ASMEND 12559; GFX90A-NEXT: s_mov_b32 s8, s6 12560; GFX90A-NEXT: s_mov_b32 s10, s9 12561; GFX90A-NEXT: ;;#ASMSTART 12562; GFX90A-NEXT: ; use s[8:10] 12563; GFX90A-NEXT: ;;#ASMEND 12564; GFX90A-NEXT: s_setpc_b64 s[30:31] 12565; 12566; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_5_5: 12567; GFX940: ; %bb.0: 12568; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12569; GFX940-NEXT: ;;#ASMSTART 12570; GFX940-NEXT: ; def s[8:11] 12571; GFX940-NEXT: ;;#ASMEND 12572; GFX940-NEXT: ;;#ASMSTART 12573; GFX940-NEXT: ; def s[0:3] 12574; GFX940-NEXT: ;;#ASMEND 12575; GFX940-NEXT: s_mov_b32 s8, s2 12576; GFX940-NEXT: s_mov_b32 s10, s9 12577; GFX940-NEXT: ;;#ASMSTART 12578; GFX940-NEXT: ; use s[8:10] 12579; GFX940-NEXT: ;;#ASMEND 12580; GFX940-NEXT: s_setpc_b64 s[30:31] 12581 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12582 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12583 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 2, i32 5, i32 5> 12584 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12585 ret void 12586} 12587 12588define void @s_shuffle_v3i32_v4i32__3_5_5() { 12589; GFX900-LABEL: s_shuffle_v3i32_v4i32__3_5_5: 12590; GFX900: ; %bb.0: 12591; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12592; GFX900-NEXT: ;;#ASMSTART 12593; GFX900-NEXT: ; def s[8:11] 12594; GFX900-NEXT: ;;#ASMEND 12595; GFX900-NEXT: ;;#ASMSTART 12596; GFX900-NEXT: ; def s[4:7] 12597; GFX900-NEXT: ;;#ASMEND 12598; GFX900-NEXT: s_mov_b32 s8, s7 12599; GFX900-NEXT: s_mov_b32 s10, s9 12600; GFX900-NEXT: ;;#ASMSTART 12601; GFX900-NEXT: ; use s[8:10] 12602; GFX900-NEXT: ;;#ASMEND 12603; GFX900-NEXT: s_setpc_b64 s[30:31] 12604; 12605; GFX90A-LABEL: s_shuffle_v3i32_v4i32__3_5_5: 12606; GFX90A: ; %bb.0: 12607; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12608; GFX90A-NEXT: ;;#ASMSTART 12609; GFX90A-NEXT: ; def s[8:11] 12610; GFX90A-NEXT: ;;#ASMEND 12611; GFX90A-NEXT: ;;#ASMSTART 12612; GFX90A-NEXT: ; def s[4:7] 12613; GFX90A-NEXT: ;;#ASMEND 12614; GFX90A-NEXT: s_mov_b32 s8, s7 12615; GFX90A-NEXT: s_mov_b32 s10, s9 12616; GFX90A-NEXT: ;;#ASMSTART 12617; GFX90A-NEXT: ; use s[8:10] 12618; GFX90A-NEXT: ;;#ASMEND 12619; GFX90A-NEXT: s_setpc_b64 s[30:31] 12620; 12621; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_5_5: 12622; GFX940: ; %bb.0: 12623; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12624; GFX940-NEXT: ;;#ASMSTART 12625; GFX940-NEXT: ; def s[8:11] 12626; GFX940-NEXT: ;;#ASMEND 12627; GFX940-NEXT: ;;#ASMSTART 12628; GFX940-NEXT: ; def s[0:3] 12629; GFX940-NEXT: ;;#ASMEND 12630; GFX940-NEXT: s_mov_b32 s8, s3 12631; GFX940-NEXT: s_mov_b32 s10, s9 12632; GFX940-NEXT: ;;#ASMSTART 12633; GFX940-NEXT: ; use s[8:10] 12634; GFX940-NEXT: ;;#ASMEND 12635; GFX940-NEXT: s_setpc_b64 s[30:31] 12636 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12637 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12638 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 3, i32 5, i32 5> 12639 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12640 ret void 12641} 12642 12643define void @s_shuffle_v3i32_v4i32__4_5_5() { 12644; GFX9-LABEL: s_shuffle_v3i32_v4i32__4_5_5: 12645; GFX9: ; %bb.0: 12646; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12647; GFX9-NEXT: ;;#ASMSTART 12648; GFX9-NEXT: ; def s[8:11] 12649; GFX9-NEXT: ;;#ASMEND 12650; GFX9-NEXT: s_mov_b32 s10, s9 12651; GFX9-NEXT: ;;#ASMSTART 12652; GFX9-NEXT: ; use s[8:10] 12653; GFX9-NEXT: ;;#ASMEND 12654; GFX9-NEXT: s_setpc_b64 s[30:31] 12655 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12656 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12657 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 4, i32 5, i32 5> 12658 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12659 ret void 12660} 12661 12662define void @s_shuffle_v3i32_v4i32__5_5_5() { 12663; GFX9-LABEL: s_shuffle_v3i32_v4i32__5_5_5: 12664; GFX9: ; %bb.0: 12665; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12666; GFX9-NEXT: ;;#ASMSTART 12667; GFX9-NEXT: ; def s[8:11] 12668; GFX9-NEXT: ;;#ASMEND 12669; GFX9-NEXT: s_mov_b32 s8, s9 12670; GFX9-NEXT: s_mov_b32 s10, s9 12671; GFX9-NEXT: ;;#ASMSTART 12672; GFX9-NEXT: ; use s[8:10] 12673; GFX9-NEXT: ;;#ASMEND 12674; GFX9-NEXT: s_setpc_b64 s[30:31] 12675 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12676 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12677 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 5, i32 5> 12678 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12679 ret void 12680} 12681 12682define void @s_shuffle_v3i32_v4i32__6_5_5() { 12683; GFX9-LABEL: s_shuffle_v3i32_v4i32__6_5_5: 12684; GFX9: ; %bb.0: 12685; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12686; GFX9-NEXT: ;;#ASMSTART 12687; GFX9-NEXT: ; def s[8:11] 12688; GFX9-NEXT: ;;#ASMEND 12689; GFX9-NEXT: s_mov_b32 s8, s10 12690; GFX9-NEXT: s_mov_b32 s10, s9 12691; GFX9-NEXT: ;;#ASMSTART 12692; GFX9-NEXT: ; use s[8:10] 12693; GFX9-NEXT: ;;#ASMEND 12694; GFX9-NEXT: s_setpc_b64 s[30:31] 12695 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12696 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12697 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 5, i32 5> 12698 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12699 ret void 12700} 12701 12702define void @s_shuffle_v3i32_v4i32__7_5_5() { 12703; GFX9-LABEL: s_shuffle_v3i32_v4i32__7_5_5: 12704; GFX9: ; %bb.0: 12705; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12706; GFX9-NEXT: ;;#ASMSTART 12707; GFX9-NEXT: ; def s[8:11] 12708; GFX9-NEXT: ;;#ASMEND 12709; GFX9-NEXT: s_mov_b32 s8, s11 12710; GFX9-NEXT: s_mov_b32 s10, s9 12711; GFX9-NEXT: ;;#ASMSTART 12712; GFX9-NEXT: ; use s[8:10] 12713; GFX9-NEXT: ;;#ASMEND 12714; GFX9-NEXT: s_setpc_b64 s[30:31] 12715 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12716 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12717 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 5> 12718 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12719 ret void 12720} 12721 12722define void @s_shuffle_v3i32_v4i32__7_u_5() { 12723; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_5: 12724; GFX900: ; %bb.0: 12725; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12726; GFX900-NEXT: ;;#ASMSTART 12727; GFX900-NEXT: ; def s[4:7] 12728; GFX900-NEXT: ;;#ASMEND 12729; GFX900-NEXT: s_mov_b32 s8, s7 12730; GFX900-NEXT: s_mov_b32 s10, s5 12731; GFX900-NEXT: ;;#ASMSTART 12732; GFX900-NEXT: ; use s[8:10] 12733; GFX900-NEXT: ;;#ASMEND 12734; GFX900-NEXT: s_setpc_b64 s[30:31] 12735; 12736; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_5: 12737; GFX90A: ; %bb.0: 12738; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12739; GFX90A-NEXT: ;;#ASMSTART 12740; GFX90A-NEXT: ; def s[4:7] 12741; GFX90A-NEXT: ;;#ASMEND 12742; GFX90A-NEXT: s_mov_b32 s8, s7 12743; GFX90A-NEXT: s_mov_b32 s10, s5 12744; GFX90A-NEXT: ;;#ASMSTART 12745; GFX90A-NEXT: ; use s[8:10] 12746; GFX90A-NEXT: ;;#ASMEND 12747; GFX90A-NEXT: s_setpc_b64 s[30:31] 12748; 12749; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_5: 12750; GFX940: ; %bb.0: 12751; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12752; GFX940-NEXT: ;;#ASMSTART 12753; GFX940-NEXT: ; def s[0:3] 12754; GFX940-NEXT: ;;#ASMEND 12755; GFX940-NEXT: s_mov_b32 s8, s3 12756; GFX940-NEXT: s_mov_b32 s10, s1 12757; GFX940-NEXT: ;;#ASMSTART 12758; GFX940-NEXT: ; use s[8:10] 12759; GFX940-NEXT: ;;#ASMEND 12760; GFX940-NEXT: s_setpc_b64 s[30:31] 12761 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12762 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12763 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 5> 12764 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12765 ret void 12766} 12767 12768define void @s_shuffle_v3i32_v4i32__7_0_5() { 12769; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_0_5: 12770; GFX900: ; %bb.0: 12771; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12772; GFX900-NEXT: ;;#ASMSTART 12773; GFX900-NEXT: ; def s[4:7] 12774; GFX900-NEXT: ;;#ASMEND 12775; GFX900-NEXT: ;;#ASMSTART 12776; GFX900-NEXT: ; def s[12:15] 12777; GFX900-NEXT: ;;#ASMEND 12778; GFX900-NEXT: s_mov_b32 s8, s15 12779; GFX900-NEXT: s_mov_b32 s9, s4 12780; GFX900-NEXT: s_mov_b32 s10, s13 12781; GFX900-NEXT: ;;#ASMSTART 12782; GFX900-NEXT: ; use s[8:10] 12783; GFX900-NEXT: ;;#ASMEND 12784; GFX900-NEXT: s_setpc_b64 s[30:31] 12785; 12786; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_0_5: 12787; GFX90A: ; %bb.0: 12788; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12789; GFX90A-NEXT: ;;#ASMSTART 12790; GFX90A-NEXT: ; def s[4:7] 12791; GFX90A-NEXT: ;;#ASMEND 12792; GFX90A-NEXT: ;;#ASMSTART 12793; GFX90A-NEXT: ; def s[12:15] 12794; GFX90A-NEXT: ;;#ASMEND 12795; GFX90A-NEXT: s_mov_b32 s8, s15 12796; GFX90A-NEXT: s_mov_b32 s9, s4 12797; GFX90A-NEXT: s_mov_b32 s10, s13 12798; GFX90A-NEXT: ;;#ASMSTART 12799; GFX90A-NEXT: ; use s[8:10] 12800; GFX90A-NEXT: ;;#ASMEND 12801; GFX90A-NEXT: s_setpc_b64 s[30:31] 12802; 12803; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_5: 12804; GFX940: ; %bb.0: 12805; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12806; GFX940-NEXT: ;;#ASMSTART 12807; GFX940-NEXT: ; def s[0:3] 12808; GFX940-NEXT: ;;#ASMEND 12809; GFX940-NEXT: ;;#ASMSTART 12810; GFX940-NEXT: ; def s[4:7] 12811; GFX940-NEXT: ;;#ASMEND 12812; GFX940-NEXT: s_mov_b32 s8, s7 12813; GFX940-NEXT: s_mov_b32 s9, s0 12814; GFX940-NEXT: s_mov_b32 s10, s5 12815; GFX940-NEXT: ;;#ASMSTART 12816; GFX940-NEXT: ; use s[8:10] 12817; GFX940-NEXT: ;;#ASMEND 12818; GFX940-NEXT: s_setpc_b64 s[30:31] 12819 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12820 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12821 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 5> 12822 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12823 ret void 12824} 12825 12826define void @s_shuffle_v3i32_v4i32__7_1_5() { 12827; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_1_5: 12828; GFX900: ; %bb.0: 12829; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12830; GFX900-NEXT: ;;#ASMSTART 12831; GFX900-NEXT: ; def s[8:11] 12832; GFX900-NEXT: ;;#ASMEND 12833; GFX900-NEXT: ;;#ASMSTART 12834; GFX900-NEXT: ; def s[4:7] 12835; GFX900-NEXT: ;;#ASMEND 12836; GFX900-NEXT: s_mov_b32 s8, s7 12837; GFX900-NEXT: s_mov_b32 s10, s5 12838; GFX900-NEXT: ;;#ASMSTART 12839; GFX900-NEXT: ; use s[8:10] 12840; GFX900-NEXT: ;;#ASMEND 12841; GFX900-NEXT: s_setpc_b64 s[30:31] 12842; 12843; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_1_5: 12844; GFX90A: ; %bb.0: 12845; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12846; GFX90A-NEXT: ;;#ASMSTART 12847; GFX90A-NEXT: ; def s[8:11] 12848; GFX90A-NEXT: ;;#ASMEND 12849; GFX90A-NEXT: ;;#ASMSTART 12850; GFX90A-NEXT: ; def s[4:7] 12851; GFX90A-NEXT: ;;#ASMEND 12852; GFX90A-NEXT: s_mov_b32 s8, s7 12853; GFX90A-NEXT: s_mov_b32 s10, s5 12854; GFX90A-NEXT: ;;#ASMSTART 12855; GFX90A-NEXT: ; use s[8:10] 12856; GFX90A-NEXT: ;;#ASMEND 12857; GFX90A-NEXT: s_setpc_b64 s[30:31] 12858; 12859; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_5: 12860; GFX940: ; %bb.0: 12861; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12862; GFX940-NEXT: ;;#ASMSTART 12863; GFX940-NEXT: ; def s[8:11] 12864; GFX940-NEXT: ;;#ASMEND 12865; GFX940-NEXT: ;;#ASMSTART 12866; GFX940-NEXT: ; def s[0:3] 12867; GFX940-NEXT: ;;#ASMEND 12868; GFX940-NEXT: s_mov_b32 s8, s3 12869; GFX940-NEXT: s_mov_b32 s10, s1 12870; GFX940-NEXT: ;;#ASMSTART 12871; GFX940-NEXT: ; use s[8:10] 12872; GFX940-NEXT: ;;#ASMEND 12873; GFX940-NEXT: s_setpc_b64 s[30:31] 12874 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12875 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12876 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 5> 12877 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12878 ret void 12879} 12880 12881define void @s_shuffle_v3i32_v4i32__7_2_5() { 12882; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_2_5: 12883; GFX900: ; %bb.0: 12884; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12885; GFX900-NEXT: ;;#ASMSTART 12886; GFX900-NEXT: ; def s[4:7] 12887; GFX900-NEXT: ;;#ASMEND 12888; GFX900-NEXT: ;;#ASMSTART 12889; GFX900-NEXT: ; def s[12:15] 12890; GFX900-NEXT: ;;#ASMEND 12891; GFX900-NEXT: s_mov_b32 s8, s15 12892; GFX900-NEXT: s_mov_b32 s9, s6 12893; GFX900-NEXT: s_mov_b32 s10, s13 12894; GFX900-NEXT: ;;#ASMSTART 12895; GFX900-NEXT: ; use s[8:10] 12896; GFX900-NEXT: ;;#ASMEND 12897; GFX900-NEXT: s_setpc_b64 s[30:31] 12898; 12899; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_2_5: 12900; GFX90A: ; %bb.0: 12901; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12902; GFX90A-NEXT: ;;#ASMSTART 12903; GFX90A-NEXT: ; def s[4:7] 12904; GFX90A-NEXT: ;;#ASMEND 12905; GFX90A-NEXT: ;;#ASMSTART 12906; GFX90A-NEXT: ; def s[12:15] 12907; GFX90A-NEXT: ;;#ASMEND 12908; GFX90A-NEXT: s_mov_b32 s8, s15 12909; GFX90A-NEXT: s_mov_b32 s9, s6 12910; GFX90A-NEXT: s_mov_b32 s10, s13 12911; GFX90A-NEXT: ;;#ASMSTART 12912; GFX90A-NEXT: ; use s[8:10] 12913; GFX90A-NEXT: ;;#ASMEND 12914; GFX90A-NEXT: s_setpc_b64 s[30:31] 12915; 12916; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_5: 12917; GFX940: ; %bb.0: 12918; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12919; GFX940-NEXT: ;;#ASMSTART 12920; GFX940-NEXT: ; def s[0:3] 12921; GFX940-NEXT: ;;#ASMEND 12922; GFX940-NEXT: ;;#ASMSTART 12923; GFX940-NEXT: ; def s[4:7] 12924; GFX940-NEXT: ;;#ASMEND 12925; GFX940-NEXT: s_mov_b32 s8, s7 12926; GFX940-NEXT: s_mov_b32 s9, s2 12927; GFX940-NEXT: s_mov_b32 s10, s5 12928; GFX940-NEXT: ;;#ASMSTART 12929; GFX940-NEXT: ; use s[8:10] 12930; GFX940-NEXT: ;;#ASMEND 12931; GFX940-NEXT: s_setpc_b64 s[30:31] 12932 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12933 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12934 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 5> 12935 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12936 ret void 12937} 12938 12939define void @s_shuffle_v3i32_v4i32__7_3_5() { 12940; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_3_5: 12941; GFX900: ; %bb.0: 12942; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12943; GFX900-NEXT: ;;#ASMSTART 12944; GFX900-NEXT: ; def s[4:7] 12945; GFX900-NEXT: ;;#ASMEND 12946; GFX900-NEXT: ;;#ASMSTART 12947; GFX900-NEXT: ; def s[12:15] 12948; GFX900-NEXT: ;;#ASMEND 12949; GFX900-NEXT: s_mov_b32 s8, s15 12950; GFX900-NEXT: s_mov_b32 s9, s7 12951; GFX900-NEXT: s_mov_b32 s10, s13 12952; GFX900-NEXT: ;;#ASMSTART 12953; GFX900-NEXT: ; use s[8:10] 12954; GFX900-NEXT: ;;#ASMEND 12955; GFX900-NEXT: s_setpc_b64 s[30:31] 12956; 12957; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_3_5: 12958; GFX90A: ; %bb.0: 12959; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12960; GFX90A-NEXT: ;;#ASMSTART 12961; GFX90A-NEXT: ; def s[4:7] 12962; GFX90A-NEXT: ;;#ASMEND 12963; GFX90A-NEXT: ;;#ASMSTART 12964; GFX90A-NEXT: ; def s[12:15] 12965; GFX90A-NEXT: ;;#ASMEND 12966; GFX90A-NEXT: s_mov_b32 s8, s15 12967; GFX90A-NEXT: s_mov_b32 s9, s7 12968; GFX90A-NEXT: s_mov_b32 s10, s13 12969; GFX90A-NEXT: ;;#ASMSTART 12970; GFX90A-NEXT: ; use s[8:10] 12971; GFX90A-NEXT: ;;#ASMEND 12972; GFX90A-NEXT: s_setpc_b64 s[30:31] 12973; 12974; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_5: 12975; GFX940: ; %bb.0: 12976; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12977; GFX940-NEXT: ;;#ASMSTART 12978; GFX940-NEXT: ; def s[0:3] 12979; GFX940-NEXT: ;;#ASMEND 12980; GFX940-NEXT: ;;#ASMSTART 12981; GFX940-NEXT: ; def s[4:7] 12982; GFX940-NEXT: ;;#ASMEND 12983; GFX940-NEXT: s_mov_b32 s8, s7 12984; GFX940-NEXT: s_mov_b32 s9, s3 12985; GFX940-NEXT: s_mov_b32 s10, s5 12986; GFX940-NEXT: ;;#ASMSTART 12987; GFX940-NEXT: ; use s[8:10] 12988; GFX940-NEXT: ;;#ASMEND 12989; GFX940-NEXT: s_setpc_b64 s[30:31] 12990 %vec0 = call <4 x i32> asm "; def $0", "=s"() 12991 %vec1 = call <4 x i32> asm "; def $0", "=s"() 12992 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 5> 12993 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 12994 ret void 12995} 12996 12997define void @s_shuffle_v3i32_v4i32__7_4_5() { 12998; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_4_5: 12999; GFX900: ; %bb.0: 13000; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13001; GFX900-NEXT: ;;#ASMSTART 13002; GFX900-NEXT: ; def s[4:7] 13003; GFX900-NEXT: ;;#ASMEND 13004; GFX900-NEXT: s_mov_b32 s8, s7 13005; GFX900-NEXT: s_mov_b32 s9, s4 13006; GFX900-NEXT: s_mov_b32 s10, s5 13007; GFX900-NEXT: ;;#ASMSTART 13008; GFX900-NEXT: ; use s[8:10] 13009; GFX900-NEXT: ;;#ASMEND 13010; GFX900-NEXT: s_setpc_b64 s[30:31] 13011; 13012; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_4_5: 13013; GFX90A: ; %bb.0: 13014; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13015; GFX90A-NEXT: ;;#ASMSTART 13016; GFX90A-NEXT: ; def s[4:7] 13017; GFX90A-NEXT: ;;#ASMEND 13018; GFX90A-NEXT: s_mov_b32 s8, s7 13019; GFX90A-NEXT: s_mov_b32 s9, s4 13020; GFX90A-NEXT: s_mov_b32 s10, s5 13021; GFX90A-NEXT: ;;#ASMSTART 13022; GFX90A-NEXT: ; use s[8:10] 13023; GFX90A-NEXT: ;;#ASMEND 13024; GFX90A-NEXT: s_setpc_b64 s[30:31] 13025; 13026; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_5: 13027; GFX940: ; %bb.0: 13028; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13029; GFX940-NEXT: ;;#ASMSTART 13030; GFX940-NEXT: ; def s[0:3] 13031; GFX940-NEXT: ;;#ASMEND 13032; GFX940-NEXT: s_mov_b32 s8, s3 13033; GFX940-NEXT: s_mov_b32 s9, s0 13034; GFX940-NEXT: s_mov_b32 s10, s1 13035; GFX940-NEXT: ;;#ASMSTART 13036; GFX940-NEXT: ; use s[8:10] 13037; GFX940-NEXT: ;;#ASMEND 13038; GFX940-NEXT: s_setpc_b64 s[30:31] 13039 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13040 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13041 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 5> 13042 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13043 ret void 13044} 13045 13046define void @s_shuffle_v3i32_v4i32__7_6_5() { 13047; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_6_5: 13048; GFX900: ; %bb.0: 13049; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13050; GFX900-NEXT: ;;#ASMSTART 13051; GFX900-NEXT: ; def s[4:7] 13052; GFX900-NEXT: ;;#ASMEND 13053; GFX900-NEXT: s_mov_b32 s8, s7 13054; GFX900-NEXT: s_mov_b32 s9, s6 13055; GFX900-NEXT: s_mov_b32 s10, s5 13056; GFX900-NEXT: ;;#ASMSTART 13057; GFX900-NEXT: ; use s[8:10] 13058; GFX900-NEXT: ;;#ASMEND 13059; GFX900-NEXT: s_setpc_b64 s[30:31] 13060; 13061; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_6_5: 13062; GFX90A: ; %bb.0: 13063; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13064; GFX90A-NEXT: ;;#ASMSTART 13065; GFX90A-NEXT: ; def s[4:7] 13066; GFX90A-NEXT: ;;#ASMEND 13067; GFX90A-NEXT: s_mov_b32 s8, s7 13068; GFX90A-NEXT: s_mov_b32 s9, s6 13069; GFX90A-NEXT: s_mov_b32 s10, s5 13070; GFX90A-NEXT: ;;#ASMSTART 13071; GFX90A-NEXT: ; use s[8:10] 13072; GFX90A-NEXT: ;;#ASMEND 13073; GFX90A-NEXT: s_setpc_b64 s[30:31] 13074; 13075; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_5: 13076; GFX940: ; %bb.0: 13077; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13078; GFX940-NEXT: ;;#ASMSTART 13079; GFX940-NEXT: ; def s[0:3] 13080; GFX940-NEXT: ;;#ASMEND 13081; GFX940-NEXT: s_mov_b32 s8, s3 13082; GFX940-NEXT: s_mov_b32 s9, s2 13083; GFX940-NEXT: s_mov_b32 s10, s1 13084; GFX940-NEXT: ;;#ASMSTART 13085; GFX940-NEXT: ; use s[8:10] 13086; GFX940-NEXT: ;;#ASMEND 13087; GFX940-NEXT: s_setpc_b64 s[30:31] 13088 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13089 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13090 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 5> 13091 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13092 ret void 13093} 13094 13095define void @s_shuffle_v3i32_v4i32__u_6_6() { 13096; GFX9-LABEL: s_shuffle_v3i32_v4i32__u_6_6: 13097; GFX9: ; %bb.0: 13098; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13099; GFX9-NEXT: ;;#ASMSTART 13100; GFX9-NEXT: ; def s[8:11] 13101; GFX9-NEXT: ;;#ASMEND 13102; GFX9-NEXT: s_mov_b32 s9, s10 13103; GFX9-NEXT: ;;#ASMSTART 13104; GFX9-NEXT: ; use s[8:10] 13105; GFX9-NEXT: ;;#ASMEND 13106; GFX9-NEXT: s_setpc_b64 s[30:31] 13107 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13108 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13109 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 poison, i32 6, i32 6> 13110 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13111 ret void 13112} 13113 13114define void @s_shuffle_v3i32_v4i32__0_6_6() { 13115; GFX900-LABEL: s_shuffle_v3i32_v4i32__0_6_6: 13116; GFX900: ; %bb.0: 13117; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13118; GFX900-NEXT: ;;#ASMSTART 13119; GFX900-NEXT: ; def s[8:11] 13120; GFX900-NEXT: ;;#ASMEND 13121; GFX900-NEXT: ;;#ASMSTART 13122; GFX900-NEXT: ; def s[4:7] 13123; GFX900-NEXT: ;;#ASMEND 13124; GFX900-NEXT: s_mov_b32 s9, s6 13125; GFX900-NEXT: s_mov_b32 s10, s6 13126; GFX900-NEXT: ;;#ASMSTART 13127; GFX900-NEXT: ; use s[8:10] 13128; GFX900-NEXT: ;;#ASMEND 13129; GFX900-NEXT: s_setpc_b64 s[30:31] 13130; 13131; GFX90A-LABEL: s_shuffle_v3i32_v4i32__0_6_6: 13132; GFX90A: ; %bb.0: 13133; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13134; GFX90A-NEXT: ;;#ASMSTART 13135; GFX90A-NEXT: ; def s[8:11] 13136; GFX90A-NEXT: ;;#ASMEND 13137; GFX90A-NEXT: ;;#ASMSTART 13138; GFX90A-NEXT: ; def s[4:7] 13139; GFX90A-NEXT: ;;#ASMEND 13140; GFX90A-NEXT: s_mov_b32 s9, s6 13141; GFX90A-NEXT: s_mov_b32 s10, s6 13142; GFX90A-NEXT: ;;#ASMSTART 13143; GFX90A-NEXT: ; use s[8:10] 13144; GFX90A-NEXT: ;;#ASMEND 13145; GFX90A-NEXT: s_setpc_b64 s[30:31] 13146; 13147; GFX940-LABEL: s_shuffle_v3i32_v4i32__0_6_6: 13148; GFX940: ; %bb.0: 13149; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13150; GFX940-NEXT: ;;#ASMSTART 13151; GFX940-NEXT: ; def s[8:11] 13152; GFX940-NEXT: ;;#ASMEND 13153; GFX940-NEXT: ;;#ASMSTART 13154; GFX940-NEXT: ; def s[0:3] 13155; GFX940-NEXT: ;;#ASMEND 13156; GFX940-NEXT: s_mov_b32 s9, s2 13157; GFX940-NEXT: s_mov_b32 s10, s2 13158; GFX940-NEXT: ;;#ASMSTART 13159; GFX940-NEXT: ; use s[8:10] 13160; GFX940-NEXT: ;;#ASMEND 13161; GFX940-NEXT: s_setpc_b64 s[30:31] 13162 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13163 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13164 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 0, i32 6, i32 6> 13165 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13166 ret void 13167} 13168 13169define void @s_shuffle_v3i32_v4i32__1_6_6() { 13170; GFX900-LABEL: s_shuffle_v3i32_v4i32__1_6_6: 13171; GFX900: ; %bb.0: 13172; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13173; GFX900-NEXT: ;;#ASMSTART 13174; GFX900-NEXT: ; def s[8:11] 13175; GFX900-NEXT: ;;#ASMEND 13176; GFX900-NEXT: ;;#ASMSTART 13177; GFX900-NEXT: ; def s[4:7] 13178; GFX900-NEXT: ;;#ASMEND 13179; GFX900-NEXT: s_mov_b32 s8, s5 13180; GFX900-NEXT: s_mov_b32 s9, s10 13181; GFX900-NEXT: ;;#ASMSTART 13182; GFX900-NEXT: ; use s[8:10] 13183; GFX900-NEXT: ;;#ASMEND 13184; GFX900-NEXT: s_setpc_b64 s[30:31] 13185; 13186; GFX90A-LABEL: s_shuffle_v3i32_v4i32__1_6_6: 13187; GFX90A: ; %bb.0: 13188; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13189; GFX90A-NEXT: ;;#ASMSTART 13190; GFX90A-NEXT: ; def s[8:11] 13191; GFX90A-NEXT: ;;#ASMEND 13192; GFX90A-NEXT: ;;#ASMSTART 13193; GFX90A-NEXT: ; def s[4:7] 13194; GFX90A-NEXT: ;;#ASMEND 13195; GFX90A-NEXT: s_mov_b32 s8, s5 13196; GFX90A-NEXT: s_mov_b32 s9, s10 13197; GFX90A-NEXT: ;;#ASMSTART 13198; GFX90A-NEXT: ; use s[8:10] 13199; GFX90A-NEXT: ;;#ASMEND 13200; GFX90A-NEXT: s_setpc_b64 s[30:31] 13201; 13202; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_6_6: 13203; GFX940: ; %bb.0: 13204; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13205; GFX940-NEXT: ;;#ASMSTART 13206; GFX940-NEXT: ; def s[8:11] 13207; GFX940-NEXT: ;;#ASMEND 13208; GFX940-NEXT: ;;#ASMSTART 13209; GFX940-NEXT: ; def s[0:3] 13210; GFX940-NEXT: ;;#ASMEND 13211; GFX940-NEXT: s_mov_b32 s8, s1 13212; GFX940-NEXT: s_mov_b32 s9, s10 13213; GFX940-NEXT: ;;#ASMSTART 13214; GFX940-NEXT: ; use s[8:10] 13215; GFX940-NEXT: ;;#ASMEND 13216; GFX940-NEXT: s_setpc_b64 s[30:31] 13217 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13218 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13219 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 1, i32 6, i32 6> 13220 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13221 ret void 13222} 13223 13224define void @s_shuffle_v3i32_v4i32__2_6_6() { 13225; GFX900-LABEL: s_shuffle_v3i32_v4i32__2_6_6: 13226; GFX900: ; %bb.0: 13227; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13228; GFX900-NEXT: ;;#ASMSTART 13229; GFX900-NEXT: ; def s[8:11] 13230; GFX900-NEXT: ;;#ASMEND 13231; GFX900-NEXT: ;;#ASMSTART 13232; GFX900-NEXT: ; def s[4:7] 13233; GFX900-NEXT: ;;#ASMEND 13234; GFX900-NEXT: s_mov_b32 s8, s6 13235; GFX900-NEXT: s_mov_b32 s9, s10 13236; GFX900-NEXT: ;;#ASMSTART 13237; GFX900-NEXT: ; use s[8:10] 13238; GFX900-NEXT: ;;#ASMEND 13239; GFX900-NEXT: s_setpc_b64 s[30:31] 13240; 13241; GFX90A-LABEL: s_shuffle_v3i32_v4i32__2_6_6: 13242; GFX90A: ; %bb.0: 13243; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13244; GFX90A-NEXT: ;;#ASMSTART 13245; GFX90A-NEXT: ; def s[8:11] 13246; GFX90A-NEXT: ;;#ASMEND 13247; GFX90A-NEXT: ;;#ASMSTART 13248; GFX90A-NEXT: ; def s[4:7] 13249; GFX90A-NEXT: ;;#ASMEND 13250; GFX90A-NEXT: s_mov_b32 s8, s6 13251; GFX90A-NEXT: s_mov_b32 s9, s10 13252; GFX90A-NEXT: ;;#ASMSTART 13253; GFX90A-NEXT: ; use s[8:10] 13254; GFX90A-NEXT: ;;#ASMEND 13255; GFX90A-NEXT: s_setpc_b64 s[30:31] 13256; 13257; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_6_6: 13258; GFX940: ; %bb.0: 13259; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13260; GFX940-NEXT: ;;#ASMSTART 13261; GFX940-NEXT: ; def s[8:11] 13262; GFX940-NEXT: ;;#ASMEND 13263; GFX940-NEXT: ;;#ASMSTART 13264; GFX940-NEXT: ; def s[0:3] 13265; GFX940-NEXT: ;;#ASMEND 13266; GFX940-NEXT: s_mov_b32 s8, s2 13267; GFX940-NEXT: s_mov_b32 s9, s10 13268; GFX940-NEXT: ;;#ASMSTART 13269; GFX940-NEXT: ; use s[8:10] 13270; GFX940-NEXT: ;;#ASMEND 13271; GFX940-NEXT: s_setpc_b64 s[30:31] 13272 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13273 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13274 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 2, i32 6, i32 6> 13275 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13276 ret void 13277} 13278 13279define void @s_shuffle_v3i32_v4i32__3_6_6() { 13280; GFX900-LABEL: s_shuffle_v3i32_v4i32__3_6_6: 13281; GFX900: ; %bb.0: 13282; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13283; GFX900-NEXT: ;;#ASMSTART 13284; GFX900-NEXT: ; def s[8:11] 13285; GFX900-NEXT: ;;#ASMEND 13286; GFX900-NEXT: ;;#ASMSTART 13287; GFX900-NEXT: ; def s[4:7] 13288; GFX900-NEXT: ;;#ASMEND 13289; GFX900-NEXT: s_mov_b32 s8, s7 13290; GFX900-NEXT: s_mov_b32 s9, s10 13291; GFX900-NEXT: ;;#ASMSTART 13292; GFX900-NEXT: ; use s[8:10] 13293; GFX900-NEXT: ;;#ASMEND 13294; GFX900-NEXT: s_setpc_b64 s[30:31] 13295; 13296; GFX90A-LABEL: s_shuffle_v3i32_v4i32__3_6_6: 13297; GFX90A: ; %bb.0: 13298; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13299; GFX90A-NEXT: ;;#ASMSTART 13300; GFX90A-NEXT: ; def s[8:11] 13301; GFX90A-NEXT: ;;#ASMEND 13302; GFX90A-NEXT: ;;#ASMSTART 13303; GFX90A-NEXT: ; def s[4:7] 13304; GFX90A-NEXT: ;;#ASMEND 13305; GFX90A-NEXT: s_mov_b32 s8, s7 13306; GFX90A-NEXT: s_mov_b32 s9, s10 13307; GFX90A-NEXT: ;;#ASMSTART 13308; GFX90A-NEXT: ; use s[8:10] 13309; GFX90A-NEXT: ;;#ASMEND 13310; GFX90A-NEXT: s_setpc_b64 s[30:31] 13311; 13312; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_6_6: 13313; GFX940: ; %bb.0: 13314; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13315; GFX940-NEXT: ;;#ASMSTART 13316; GFX940-NEXT: ; def s[8:11] 13317; GFX940-NEXT: ;;#ASMEND 13318; GFX940-NEXT: ;;#ASMSTART 13319; GFX940-NEXT: ; def s[0:3] 13320; GFX940-NEXT: ;;#ASMEND 13321; GFX940-NEXT: s_mov_b32 s8, s3 13322; GFX940-NEXT: s_mov_b32 s9, s10 13323; GFX940-NEXT: ;;#ASMSTART 13324; GFX940-NEXT: ; use s[8:10] 13325; GFX940-NEXT: ;;#ASMEND 13326; GFX940-NEXT: s_setpc_b64 s[30:31] 13327 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13328 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13329 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 3, i32 6, i32 6> 13330 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13331 ret void 13332} 13333 13334define void @s_shuffle_v3i32_v4i32__4_6_6() { 13335; GFX9-LABEL: s_shuffle_v3i32_v4i32__4_6_6: 13336; GFX9: ; %bb.0: 13337; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13338; GFX9-NEXT: ;;#ASMSTART 13339; GFX9-NEXT: ; def s[8:11] 13340; GFX9-NEXT: ;;#ASMEND 13341; GFX9-NEXT: s_mov_b32 s9, s10 13342; GFX9-NEXT: ;;#ASMSTART 13343; GFX9-NEXT: ; use s[8:10] 13344; GFX9-NEXT: ;;#ASMEND 13345; GFX9-NEXT: s_setpc_b64 s[30:31] 13346 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13347 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13348 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 4, i32 6, i32 6> 13349 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13350 ret void 13351} 13352 13353define void @s_shuffle_v3i32_v4i32__5_6_6() { 13354; GFX9-LABEL: s_shuffle_v3i32_v4i32__5_6_6: 13355; GFX9: ; %bb.0: 13356; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13357; GFX9-NEXT: ;;#ASMSTART 13358; GFX9-NEXT: ; def s[8:11] 13359; GFX9-NEXT: ;;#ASMEND 13360; GFX9-NEXT: s_mov_b32 s8, s9 13361; GFX9-NEXT: s_mov_b32 s9, s10 13362; GFX9-NEXT: ;;#ASMSTART 13363; GFX9-NEXT: ; use s[8:10] 13364; GFX9-NEXT: ;;#ASMEND 13365; GFX9-NEXT: s_setpc_b64 s[30:31] 13366 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13367 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13368 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 6, i32 6> 13369 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13370 ret void 13371} 13372 13373define void @s_shuffle_v3i32_v4i32__6_6_6() { 13374; GFX9-LABEL: s_shuffle_v3i32_v4i32__6_6_6: 13375; GFX9: ; %bb.0: 13376; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13377; GFX9-NEXT: ;;#ASMSTART 13378; GFX9-NEXT: ; def s[8:11] 13379; GFX9-NEXT: ;;#ASMEND 13380; GFX9-NEXT: s_mov_b32 s8, s10 13381; GFX9-NEXT: s_mov_b32 s9, s10 13382; GFX9-NEXT: ;;#ASMSTART 13383; GFX9-NEXT: ; use s[8:10] 13384; GFX9-NEXT: ;;#ASMEND 13385; GFX9-NEXT: s_setpc_b64 s[30:31] 13386 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13387 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13388 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 6, i32 6> 13389 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13390 ret void 13391} 13392 13393define void @s_shuffle_v3i32_v4i32__7_6_6() { 13394; GFX9-LABEL: s_shuffle_v3i32_v4i32__7_6_6: 13395; GFX9: ; %bb.0: 13396; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13397; GFX9-NEXT: ;;#ASMSTART 13398; GFX9-NEXT: ; def s[8:11] 13399; GFX9-NEXT: ;;#ASMEND 13400; GFX9-NEXT: s_mov_b32 s8, s11 13401; GFX9-NEXT: s_mov_b32 s9, s10 13402; GFX9-NEXT: ;;#ASMSTART 13403; GFX9-NEXT: ; use s[8:10] 13404; GFX9-NEXT: ;;#ASMEND 13405; GFX9-NEXT: s_setpc_b64 s[30:31] 13406 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13407 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13408 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 6> 13409 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13410 ret void 13411} 13412 13413define void @s_shuffle_v3i32_v4i32__7_u_6() { 13414; GFX9-LABEL: s_shuffle_v3i32_v4i32__7_u_6: 13415; GFX9: ; %bb.0: 13416; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13417; GFX9-NEXT: ;;#ASMSTART 13418; GFX9-NEXT: ; def s[8:11] 13419; GFX9-NEXT: ;;#ASMEND 13420; GFX9-NEXT: s_mov_b32 s8, s11 13421; GFX9-NEXT: ;;#ASMSTART 13422; GFX9-NEXT: ; use s[8:10] 13423; GFX9-NEXT: ;;#ASMEND 13424; GFX9-NEXT: s_setpc_b64 s[30:31] 13425 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13426 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13427 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 6> 13428 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13429 ret void 13430} 13431 13432define void @s_shuffle_v3i32_v4i32__7_0_6() { 13433; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_0_6: 13434; GFX900: ; %bb.0: 13435; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13436; GFX900-NEXT: ;;#ASMSTART 13437; GFX900-NEXT: ; def s[8:11] 13438; GFX900-NEXT: ;;#ASMEND 13439; GFX900-NEXT: ;;#ASMSTART 13440; GFX900-NEXT: ; def s[4:7] 13441; GFX900-NEXT: ;;#ASMEND 13442; GFX900-NEXT: s_mov_b32 s8, s11 13443; GFX900-NEXT: s_mov_b32 s9, s4 13444; GFX900-NEXT: ;;#ASMSTART 13445; GFX900-NEXT: ; use s[8:10] 13446; GFX900-NEXT: ;;#ASMEND 13447; GFX900-NEXT: s_setpc_b64 s[30:31] 13448; 13449; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_0_6: 13450; GFX90A: ; %bb.0: 13451; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13452; GFX90A-NEXT: ;;#ASMSTART 13453; GFX90A-NEXT: ; def s[8:11] 13454; GFX90A-NEXT: ;;#ASMEND 13455; GFX90A-NEXT: ;;#ASMSTART 13456; GFX90A-NEXT: ; def s[4:7] 13457; GFX90A-NEXT: ;;#ASMEND 13458; GFX90A-NEXT: s_mov_b32 s8, s11 13459; GFX90A-NEXT: s_mov_b32 s9, s4 13460; GFX90A-NEXT: ;;#ASMSTART 13461; GFX90A-NEXT: ; use s[8:10] 13462; GFX90A-NEXT: ;;#ASMEND 13463; GFX90A-NEXT: s_setpc_b64 s[30:31] 13464; 13465; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_6: 13466; GFX940: ; %bb.0: 13467; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13468; GFX940-NEXT: ;;#ASMSTART 13469; GFX940-NEXT: ; def s[8:11] 13470; GFX940-NEXT: ;;#ASMEND 13471; GFX940-NEXT: ;;#ASMSTART 13472; GFX940-NEXT: ; def s[0:3] 13473; GFX940-NEXT: ;;#ASMEND 13474; GFX940-NEXT: s_mov_b32 s8, s11 13475; GFX940-NEXT: s_mov_b32 s9, s0 13476; GFX940-NEXT: ;;#ASMSTART 13477; GFX940-NEXT: ; use s[8:10] 13478; GFX940-NEXT: ;;#ASMEND 13479; GFX940-NEXT: s_setpc_b64 s[30:31] 13480 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13481 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13482 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 6> 13483 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13484 ret void 13485} 13486 13487define void @s_shuffle_v3i32_v4i32__7_1_6() { 13488; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_1_6: 13489; GFX900: ; %bb.0: 13490; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13491; GFX900-NEXT: ;;#ASMSTART 13492; GFX900-NEXT: ; def s[8:11] 13493; GFX900-NEXT: ;;#ASMEND 13494; GFX900-NEXT: ;;#ASMSTART 13495; GFX900-NEXT: ; def s[4:7] 13496; GFX900-NEXT: ;;#ASMEND 13497; GFX900-NEXT: s_mov_b32 s8, s7 13498; GFX900-NEXT: s_mov_b32 s10, s6 13499; GFX900-NEXT: ;;#ASMSTART 13500; GFX900-NEXT: ; use s[8:10] 13501; GFX900-NEXT: ;;#ASMEND 13502; GFX900-NEXT: s_setpc_b64 s[30:31] 13503; 13504; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_1_6: 13505; GFX90A: ; %bb.0: 13506; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13507; GFX90A-NEXT: ;;#ASMSTART 13508; GFX90A-NEXT: ; def s[8:11] 13509; GFX90A-NEXT: ;;#ASMEND 13510; GFX90A-NEXT: ;;#ASMSTART 13511; GFX90A-NEXT: ; def s[4:7] 13512; GFX90A-NEXT: ;;#ASMEND 13513; GFX90A-NEXT: s_mov_b32 s8, s7 13514; GFX90A-NEXT: s_mov_b32 s10, s6 13515; GFX90A-NEXT: ;;#ASMSTART 13516; GFX90A-NEXT: ; use s[8:10] 13517; GFX90A-NEXT: ;;#ASMEND 13518; GFX90A-NEXT: s_setpc_b64 s[30:31] 13519; 13520; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_6: 13521; GFX940: ; %bb.0: 13522; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13523; GFX940-NEXT: ;;#ASMSTART 13524; GFX940-NEXT: ; def s[8:11] 13525; GFX940-NEXT: ;;#ASMEND 13526; GFX940-NEXT: ;;#ASMSTART 13527; GFX940-NEXT: ; def s[0:3] 13528; GFX940-NEXT: ;;#ASMEND 13529; GFX940-NEXT: s_mov_b32 s8, s3 13530; GFX940-NEXT: s_mov_b32 s10, s2 13531; GFX940-NEXT: ;;#ASMSTART 13532; GFX940-NEXT: ; use s[8:10] 13533; GFX940-NEXT: ;;#ASMEND 13534; GFX940-NEXT: s_setpc_b64 s[30:31] 13535 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13536 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13537 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 6> 13538 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13539 ret void 13540} 13541 13542define void @s_shuffle_v3i32_v4i32__7_2_6() { 13543; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_2_6: 13544; GFX900: ; %bb.0: 13545; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13546; GFX900-NEXT: ;;#ASMSTART 13547; GFX900-NEXT: ; def s[8:11] 13548; GFX900-NEXT: ;;#ASMEND 13549; GFX900-NEXT: ;;#ASMSTART 13550; GFX900-NEXT: ; def s[4:7] 13551; GFX900-NEXT: ;;#ASMEND 13552; GFX900-NEXT: s_mov_b32 s8, s11 13553; GFX900-NEXT: s_mov_b32 s9, s6 13554; GFX900-NEXT: ;;#ASMSTART 13555; GFX900-NEXT: ; use s[8:10] 13556; GFX900-NEXT: ;;#ASMEND 13557; GFX900-NEXT: s_setpc_b64 s[30:31] 13558; 13559; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_2_6: 13560; GFX90A: ; %bb.0: 13561; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13562; GFX90A-NEXT: ;;#ASMSTART 13563; GFX90A-NEXT: ; def s[8:11] 13564; GFX90A-NEXT: ;;#ASMEND 13565; GFX90A-NEXT: ;;#ASMSTART 13566; GFX90A-NEXT: ; def s[4:7] 13567; GFX90A-NEXT: ;;#ASMEND 13568; GFX90A-NEXT: s_mov_b32 s8, s11 13569; GFX90A-NEXT: s_mov_b32 s9, s6 13570; GFX90A-NEXT: ;;#ASMSTART 13571; GFX90A-NEXT: ; use s[8:10] 13572; GFX90A-NEXT: ;;#ASMEND 13573; GFX90A-NEXT: s_setpc_b64 s[30:31] 13574; 13575; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_6: 13576; GFX940: ; %bb.0: 13577; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13578; GFX940-NEXT: ;;#ASMSTART 13579; GFX940-NEXT: ; def s[8:11] 13580; GFX940-NEXT: ;;#ASMEND 13581; GFX940-NEXT: ;;#ASMSTART 13582; GFX940-NEXT: ; def s[0:3] 13583; GFX940-NEXT: ;;#ASMEND 13584; GFX940-NEXT: s_mov_b32 s8, s11 13585; GFX940-NEXT: s_mov_b32 s9, s2 13586; GFX940-NEXT: ;;#ASMSTART 13587; GFX940-NEXT: ; use s[8:10] 13588; GFX940-NEXT: ;;#ASMEND 13589; GFX940-NEXT: s_setpc_b64 s[30:31] 13590 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13591 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13592 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 6> 13593 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13594 ret void 13595} 13596 13597define void @s_shuffle_v3i32_v4i32__7_3_6() { 13598; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_3_6: 13599; GFX900: ; %bb.0: 13600; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13601; GFX900-NEXT: ;;#ASMSTART 13602; GFX900-NEXT: ; def s[8:11] 13603; GFX900-NEXT: ;;#ASMEND 13604; GFX900-NEXT: ;;#ASMSTART 13605; GFX900-NEXT: ; def s[4:7] 13606; GFX900-NEXT: ;;#ASMEND 13607; GFX900-NEXT: s_mov_b32 s8, s11 13608; GFX900-NEXT: s_mov_b32 s9, s7 13609; GFX900-NEXT: ;;#ASMSTART 13610; GFX900-NEXT: ; use s[8:10] 13611; GFX900-NEXT: ;;#ASMEND 13612; GFX900-NEXT: s_setpc_b64 s[30:31] 13613; 13614; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_3_6: 13615; GFX90A: ; %bb.0: 13616; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13617; GFX90A-NEXT: ;;#ASMSTART 13618; GFX90A-NEXT: ; def s[8:11] 13619; GFX90A-NEXT: ;;#ASMEND 13620; GFX90A-NEXT: ;;#ASMSTART 13621; GFX90A-NEXT: ; def s[4:7] 13622; GFX90A-NEXT: ;;#ASMEND 13623; GFX90A-NEXT: s_mov_b32 s8, s11 13624; GFX90A-NEXT: s_mov_b32 s9, s7 13625; GFX90A-NEXT: ;;#ASMSTART 13626; GFX90A-NEXT: ; use s[8:10] 13627; GFX90A-NEXT: ;;#ASMEND 13628; GFX90A-NEXT: s_setpc_b64 s[30:31] 13629; 13630; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_6: 13631; GFX940: ; %bb.0: 13632; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13633; GFX940-NEXT: ;;#ASMSTART 13634; GFX940-NEXT: ; def s[8:11] 13635; GFX940-NEXT: ;;#ASMEND 13636; GFX940-NEXT: ;;#ASMSTART 13637; GFX940-NEXT: ; def s[0:3] 13638; GFX940-NEXT: ;;#ASMEND 13639; GFX940-NEXT: s_mov_b32 s8, s11 13640; GFX940-NEXT: s_mov_b32 s9, s3 13641; GFX940-NEXT: ;;#ASMSTART 13642; GFX940-NEXT: ; use s[8:10] 13643; GFX940-NEXT: ;;#ASMEND 13644; GFX940-NEXT: s_setpc_b64 s[30:31] 13645 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13646 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13647 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 6> 13648 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13649 ret void 13650} 13651 13652define void @s_shuffle_v3i32_v4i32__7_4_6() { 13653; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_4_6: 13654; GFX900: ; %bb.0: 13655; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13656; GFX900-NEXT: ;;#ASMSTART 13657; GFX900-NEXT: ; def s[4:7] 13658; GFX900-NEXT: ;;#ASMEND 13659; GFX900-NEXT: s_mov_b32 s8, s7 13660; GFX900-NEXT: s_mov_b32 s9, s4 13661; GFX900-NEXT: s_mov_b32 s10, s6 13662; GFX900-NEXT: ;;#ASMSTART 13663; GFX900-NEXT: ; use s[8:10] 13664; GFX900-NEXT: ;;#ASMEND 13665; GFX900-NEXT: s_setpc_b64 s[30:31] 13666; 13667; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_4_6: 13668; GFX90A: ; %bb.0: 13669; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13670; GFX90A-NEXT: ;;#ASMSTART 13671; GFX90A-NEXT: ; def s[4:7] 13672; GFX90A-NEXT: ;;#ASMEND 13673; GFX90A-NEXT: s_mov_b32 s8, s7 13674; GFX90A-NEXT: s_mov_b32 s9, s4 13675; GFX90A-NEXT: s_mov_b32 s10, s6 13676; GFX90A-NEXT: ;;#ASMSTART 13677; GFX90A-NEXT: ; use s[8:10] 13678; GFX90A-NEXT: ;;#ASMEND 13679; GFX90A-NEXT: s_setpc_b64 s[30:31] 13680; 13681; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_6: 13682; GFX940: ; %bb.0: 13683; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13684; GFX940-NEXT: ;;#ASMSTART 13685; GFX940-NEXT: ; def s[0:3] 13686; GFX940-NEXT: ;;#ASMEND 13687; GFX940-NEXT: s_mov_b32 s8, s3 13688; GFX940-NEXT: s_mov_b32 s9, s0 13689; GFX940-NEXT: s_mov_b32 s10, s2 13690; GFX940-NEXT: ;;#ASMSTART 13691; GFX940-NEXT: ; use s[8:10] 13692; GFX940-NEXT: ;;#ASMEND 13693; GFX940-NEXT: s_setpc_b64 s[30:31] 13694 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13695 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13696 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 6> 13697 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13698 ret void 13699} 13700 13701define void @s_shuffle_v3i32_v4i32__7_5_6() { 13702; GFX9-LABEL: s_shuffle_v3i32_v4i32__7_5_6: 13703; GFX9: ; %bb.0: 13704; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13705; GFX9-NEXT: ;;#ASMSTART 13706; GFX9-NEXT: ; def s[8:11] 13707; GFX9-NEXT: ;;#ASMEND 13708; GFX9-NEXT: s_mov_b32 s8, s11 13709; GFX9-NEXT: ;;#ASMSTART 13710; GFX9-NEXT: ; use s[8:10] 13711; GFX9-NEXT: ;;#ASMEND 13712; GFX9-NEXT: s_setpc_b64 s[30:31] 13713 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13714 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13715 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 6> 13716 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13717 ret void 13718} 13719 13720define void @s_shuffle_v3i32_v4i32__u_7_7() { 13721; GFX900-LABEL: s_shuffle_v3i32_v4i32__u_7_7: 13722; GFX900: ; %bb.0: 13723; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13724; GFX900-NEXT: ;;#ASMSTART 13725; GFX900-NEXT: ; def s[4:7] 13726; GFX900-NEXT: ;;#ASMEND 13727; GFX900-NEXT: s_mov_b32 s9, s7 13728; GFX900-NEXT: s_mov_b32 s10, s7 13729; GFX900-NEXT: ;;#ASMSTART 13730; GFX900-NEXT: ; use s[8:10] 13731; GFX900-NEXT: ;;#ASMEND 13732; GFX900-NEXT: s_setpc_b64 s[30:31] 13733; 13734; GFX90A-LABEL: s_shuffle_v3i32_v4i32__u_7_7: 13735; GFX90A: ; %bb.0: 13736; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13737; GFX90A-NEXT: ;;#ASMSTART 13738; GFX90A-NEXT: ; def s[4:7] 13739; GFX90A-NEXT: ;;#ASMEND 13740; GFX90A-NEXT: s_mov_b32 s9, s7 13741; GFX90A-NEXT: s_mov_b32 s10, s7 13742; GFX90A-NEXT: ;;#ASMSTART 13743; GFX90A-NEXT: ; use s[8:10] 13744; GFX90A-NEXT: ;;#ASMEND 13745; GFX90A-NEXT: s_setpc_b64 s[30:31] 13746; 13747; GFX940-LABEL: s_shuffle_v3i32_v4i32__u_7_7: 13748; GFX940: ; %bb.0: 13749; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13750; GFX940-NEXT: ;;#ASMSTART 13751; GFX940-NEXT: ; def s[0:3] 13752; GFX940-NEXT: ;;#ASMEND 13753; GFX940-NEXT: s_mov_b32 s9, s3 13754; GFX940-NEXT: s_mov_b32 s10, s3 13755; GFX940-NEXT: ;;#ASMSTART 13756; GFX940-NEXT: ; use s[8:10] 13757; GFX940-NEXT: ;;#ASMEND 13758; GFX940-NEXT: s_setpc_b64 s[30:31] 13759 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13760 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13761 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 poison, i32 7, i32 7> 13762 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13763 ret void 13764} 13765 13766define void @s_shuffle_v3i32_v4i32__0_7_7() { 13767; GFX900-LABEL: s_shuffle_v3i32_v4i32__0_7_7: 13768; GFX900: ; %bb.0: 13769; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13770; GFX900-NEXT: ;;#ASMSTART 13771; GFX900-NEXT: ; def s[8:11] 13772; GFX900-NEXT: ;;#ASMEND 13773; GFX900-NEXT: ;;#ASMSTART 13774; GFX900-NEXT: ; def s[4:7] 13775; GFX900-NEXT: ;;#ASMEND 13776; GFX900-NEXT: s_mov_b32 s9, s7 13777; GFX900-NEXT: s_mov_b32 s10, s7 13778; GFX900-NEXT: ;;#ASMSTART 13779; GFX900-NEXT: ; use s[8:10] 13780; GFX900-NEXT: ;;#ASMEND 13781; GFX900-NEXT: s_setpc_b64 s[30:31] 13782; 13783; GFX90A-LABEL: s_shuffle_v3i32_v4i32__0_7_7: 13784; GFX90A: ; %bb.0: 13785; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13786; GFX90A-NEXT: ;;#ASMSTART 13787; GFX90A-NEXT: ; def s[8:11] 13788; GFX90A-NEXT: ;;#ASMEND 13789; GFX90A-NEXT: ;;#ASMSTART 13790; GFX90A-NEXT: ; def s[4:7] 13791; GFX90A-NEXT: ;;#ASMEND 13792; GFX90A-NEXT: s_mov_b32 s9, s7 13793; GFX90A-NEXT: s_mov_b32 s10, s7 13794; GFX90A-NEXT: ;;#ASMSTART 13795; GFX90A-NEXT: ; use s[8:10] 13796; GFX90A-NEXT: ;;#ASMEND 13797; GFX90A-NEXT: s_setpc_b64 s[30:31] 13798; 13799; GFX940-LABEL: s_shuffle_v3i32_v4i32__0_7_7: 13800; GFX940: ; %bb.0: 13801; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13802; GFX940-NEXT: ;;#ASMSTART 13803; GFX940-NEXT: ; def s[8:11] 13804; GFX940-NEXT: ;;#ASMEND 13805; GFX940-NEXT: ;;#ASMSTART 13806; GFX940-NEXT: ; def s[0:3] 13807; GFX940-NEXT: ;;#ASMEND 13808; GFX940-NEXT: s_mov_b32 s9, s3 13809; GFX940-NEXT: s_mov_b32 s10, s3 13810; GFX940-NEXT: ;;#ASMSTART 13811; GFX940-NEXT: ; use s[8:10] 13812; GFX940-NEXT: ;;#ASMEND 13813; GFX940-NEXT: s_setpc_b64 s[30:31] 13814 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13815 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13816 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 0, i32 7, i32 7> 13817 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13818 ret void 13819} 13820 13821define void @s_shuffle_v3i32_v4i32__1_7_7() { 13822; GFX900-LABEL: s_shuffle_v3i32_v4i32__1_7_7: 13823; GFX900: ; %bb.0: 13824; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13825; GFX900-NEXT: ;;#ASMSTART 13826; GFX900-NEXT: ; def s[8:11] 13827; GFX900-NEXT: ;;#ASMEND 13828; GFX900-NEXT: ;;#ASMSTART 13829; GFX900-NEXT: ; def s[4:7] 13830; GFX900-NEXT: ;;#ASMEND 13831; GFX900-NEXT: s_mov_b32 s8, s5 13832; GFX900-NEXT: s_mov_b32 s9, s11 13833; GFX900-NEXT: s_mov_b32 s10, s11 13834; GFX900-NEXT: ;;#ASMSTART 13835; GFX900-NEXT: ; use s[8:10] 13836; GFX900-NEXT: ;;#ASMEND 13837; GFX900-NEXT: s_setpc_b64 s[30:31] 13838; 13839; GFX90A-LABEL: s_shuffle_v3i32_v4i32__1_7_7: 13840; GFX90A: ; %bb.0: 13841; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13842; GFX90A-NEXT: ;;#ASMSTART 13843; GFX90A-NEXT: ; def s[8:11] 13844; GFX90A-NEXT: ;;#ASMEND 13845; GFX90A-NEXT: ;;#ASMSTART 13846; GFX90A-NEXT: ; def s[4:7] 13847; GFX90A-NEXT: ;;#ASMEND 13848; GFX90A-NEXT: s_mov_b32 s8, s5 13849; GFX90A-NEXT: s_mov_b32 s9, s11 13850; GFX90A-NEXT: s_mov_b32 s10, s11 13851; GFX90A-NEXT: ;;#ASMSTART 13852; GFX90A-NEXT: ; use s[8:10] 13853; GFX90A-NEXT: ;;#ASMEND 13854; GFX90A-NEXT: s_setpc_b64 s[30:31] 13855; 13856; GFX940-LABEL: s_shuffle_v3i32_v4i32__1_7_7: 13857; GFX940: ; %bb.0: 13858; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13859; GFX940-NEXT: ;;#ASMSTART 13860; GFX940-NEXT: ; def s[0:3] 13861; GFX940-NEXT: ;;#ASMEND 13862; GFX940-NEXT: ;;#ASMSTART 13863; GFX940-NEXT: ; def s[4:7] 13864; GFX940-NEXT: ;;#ASMEND 13865; GFX940-NEXT: s_mov_b32 s8, s1 13866; GFX940-NEXT: s_mov_b32 s9, s7 13867; GFX940-NEXT: s_mov_b32 s10, s7 13868; GFX940-NEXT: ;;#ASMSTART 13869; GFX940-NEXT: ; use s[8:10] 13870; GFX940-NEXT: ;;#ASMEND 13871; GFX940-NEXT: s_setpc_b64 s[30:31] 13872 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13873 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13874 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 1, i32 7, i32 7> 13875 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13876 ret void 13877} 13878 13879define void @s_shuffle_v3i32_v4i32__2_7_7() { 13880; GFX900-LABEL: s_shuffle_v3i32_v4i32__2_7_7: 13881; GFX900: ; %bb.0: 13882; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13883; GFX900-NEXT: ;;#ASMSTART 13884; GFX900-NEXT: ; def s[8:11] 13885; GFX900-NEXT: ;;#ASMEND 13886; GFX900-NEXT: ;;#ASMSTART 13887; GFX900-NEXT: ; def s[4:7] 13888; GFX900-NEXT: ;;#ASMEND 13889; GFX900-NEXT: s_mov_b32 s8, s6 13890; GFX900-NEXT: s_mov_b32 s9, s11 13891; GFX900-NEXT: s_mov_b32 s10, s11 13892; GFX900-NEXT: ;;#ASMSTART 13893; GFX900-NEXT: ; use s[8:10] 13894; GFX900-NEXT: ;;#ASMEND 13895; GFX900-NEXT: s_setpc_b64 s[30:31] 13896; 13897; GFX90A-LABEL: s_shuffle_v3i32_v4i32__2_7_7: 13898; GFX90A: ; %bb.0: 13899; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13900; GFX90A-NEXT: ;;#ASMSTART 13901; GFX90A-NEXT: ; def s[8:11] 13902; GFX90A-NEXT: ;;#ASMEND 13903; GFX90A-NEXT: ;;#ASMSTART 13904; GFX90A-NEXT: ; def s[4:7] 13905; GFX90A-NEXT: ;;#ASMEND 13906; GFX90A-NEXT: s_mov_b32 s8, s6 13907; GFX90A-NEXT: s_mov_b32 s9, s11 13908; GFX90A-NEXT: s_mov_b32 s10, s11 13909; GFX90A-NEXT: ;;#ASMSTART 13910; GFX90A-NEXT: ; use s[8:10] 13911; GFX90A-NEXT: ;;#ASMEND 13912; GFX90A-NEXT: s_setpc_b64 s[30:31] 13913; 13914; GFX940-LABEL: s_shuffle_v3i32_v4i32__2_7_7: 13915; GFX940: ; %bb.0: 13916; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13917; GFX940-NEXT: ;;#ASMSTART 13918; GFX940-NEXT: ; def s[0:3] 13919; GFX940-NEXT: ;;#ASMEND 13920; GFX940-NEXT: ;;#ASMSTART 13921; GFX940-NEXT: ; def s[4:7] 13922; GFX940-NEXT: ;;#ASMEND 13923; GFX940-NEXT: s_mov_b32 s8, s2 13924; GFX940-NEXT: s_mov_b32 s9, s7 13925; GFX940-NEXT: s_mov_b32 s10, s7 13926; GFX940-NEXT: ;;#ASMSTART 13927; GFX940-NEXT: ; use s[8:10] 13928; GFX940-NEXT: ;;#ASMEND 13929; GFX940-NEXT: s_setpc_b64 s[30:31] 13930 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13931 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13932 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 2, i32 7, i32 7> 13933 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13934 ret void 13935} 13936 13937define void @s_shuffle_v3i32_v4i32__3_7_7() { 13938; GFX900-LABEL: s_shuffle_v3i32_v4i32__3_7_7: 13939; GFX900: ; %bb.0: 13940; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13941; GFX900-NEXT: ;;#ASMSTART 13942; GFX900-NEXT: ; def s[8:11] 13943; GFX900-NEXT: ;;#ASMEND 13944; GFX900-NEXT: ;;#ASMSTART 13945; GFX900-NEXT: ; def s[4:7] 13946; GFX900-NEXT: ;;#ASMEND 13947; GFX900-NEXT: s_mov_b32 s8, s7 13948; GFX900-NEXT: s_mov_b32 s9, s11 13949; GFX900-NEXT: s_mov_b32 s10, s11 13950; GFX900-NEXT: ;;#ASMSTART 13951; GFX900-NEXT: ; use s[8:10] 13952; GFX900-NEXT: ;;#ASMEND 13953; GFX900-NEXT: s_setpc_b64 s[30:31] 13954; 13955; GFX90A-LABEL: s_shuffle_v3i32_v4i32__3_7_7: 13956; GFX90A: ; %bb.0: 13957; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13958; GFX90A-NEXT: ;;#ASMSTART 13959; GFX90A-NEXT: ; def s[8:11] 13960; GFX90A-NEXT: ;;#ASMEND 13961; GFX90A-NEXT: ;;#ASMSTART 13962; GFX90A-NEXT: ; def s[4:7] 13963; GFX90A-NEXT: ;;#ASMEND 13964; GFX90A-NEXT: s_mov_b32 s8, s7 13965; GFX90A-NEXT: s_mov_b32 s9, s11 13966; GFX90A-NEXT: s_mov_b32 s10, s11 13967; GFX90A-NEXT: ;;#ASMSTART 13968; GFX90A-NEXT: ; use s[8:10] 13969; GFX90A-NEXT: ;;#ASMEND 13970; GFX90A-NEXT: s_setpc_b64 s[30:31] 13971; 13972; GFX940-LABEL: s_shuffle_v3i32_v4i32__3_7_7: 13973; GFX940: ; %bb.0: 13974; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13975; GFX940-NEXT: ;;#ASMSTART 13976; GFX940-NEXT: ; def s[0:3] 13977; GFX940-NEXT: ;;#ASMEND 13978; GFX940-NEXT: ;;#ASMSTART 13979; GFX940-NEXT: ; def s[4:7] 13980; GFX940-NEXT: ;;#ASMEND 13981; GFX940-NEXT: s_mov_b32 s8, s3 13982; GFX940-NEXT: s_mov_b32 s9, s7 13983; GFX940-NEXT: s_mov_b32 s10, s7 13984; GFX940-NEXT: ;;#ASMSTART 13985; GFX940-NEXT: ; use s[8:10] 13986; GFX940-NEXT: ;;#ASMEND 13987; GFX940-NEXT: s_setpc_b64 s[30:31] 13988 %vec0 = call <4 x i32> asm "; def $0", "=s"() 13989 %vec1 = call <4 x i32> asm "; def $0", "=s"() 13990 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 3, i32 7, i32 7> 13991 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 13992 ret void 13993} 13994 13995define void @s_shuffle_v3i32_v4i32__4_7_7() { 13996; GFX9-LABEL: s_shuffle_v3i32_v4i32__4_7_7: 13997; GFX9: ; %bb.0: 13998; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13999; GFX9-NEXT: ;;#ASMSTART 14000; GFX9-NEXT: ; def s[8:11] 14001; GFX9-NEXT: ;;#ASMEND 14002; GFX9-NEXT: s_mov_b32 s9, s11 14003; GFX9-NEXT: s_mov_b32 s10, s11 14004; GFX9-NEXT: ;;#ASMSTART 14005; GFX9-NEXT: ; use s[8:10] 14006; GFX9-NEXT: ;;#ASMEND 14007; GFX9-NEXT: s_setpc_b64 s[30:31] 14008 %vec0 = call <4 x i32> asm "; def $0", "=s"() 14009 %vec1 = call <4 x i32> asm "; def $0", "=s"() 14010 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 4, i32 7, i32 7> 14011 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 14012 ret void 14013} 14014 14015define void @s_shuffle_v3i32_v4i32__5_7_7() { 14016; GFX900-LABEL: s_shuffle_v3i32_v4i32__5_7_7: 14017; GFX900: ; %bb.0: 14018; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14019; GFX900-NEXT: ;;#ASMSTART 14020; GFX900-NEXT: ; def s[4:7] 14021; GFX900-NEXT: ;;#ASMEND 14022; GFX900-NEXT: s_mov_b32 s8, s5 14023; GFX900-NEXT: s_mov_b32 s9, s7 14024; GFX900-NEXT: s_mov_b32 s10, s7 14025; GFX900-NEXT: ;;#ASMSTART 14026; GFX900-NEXT: ; use s[8:10] 14027; GFX900-NEXT: ;;#ASMEND 14028; GFX900-NEXT: s_setpc_b64 s[30:31] 14029; 14030; GFX90A-LABEL: s_shuffle_v3i32_v4i32__5_7_7: 14031; GFX90A: ; %bb.0: 14032; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14033; GFX90A-NEXT: ;;#ASMSTART 14034; GFX90A-NEXT: ; def s[4:7] 14035; GFX90A-NEXT: ;;#ASMEND 14036; GFX90A-NEXT: s_mov_b32 s8, s5 14037; GFX90A-NEXT: s_mov_b32 s9, s7 14038; GFX90A-NEXT: s_mov_b32 s10, s7 14039; GFX90A-NEXT: ;;#ASMSTART 14040; GFX90A-NEXT: ; use s[8:10] 14041; GFX90A-NEXT: ;;#ASMEND 14042; GFX90A-NEXT: s_setpc_b64 s[30:31] 14043; 14044; GFX940-LABEL: s_shuffle_v3i32_v4i32__5_7_7: 14045; GFX940: ; %bb.0: 14046; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14047; GFX940-NEXT: ;;#ASMSTART 14048; GFX940-NEXT: ; def s[0:3] 14049; GFX940-NEXT: ;;#ASMEND 14050; GFX940-NEXT: s_mov_b32 s8, s1 14051; GFX940-NEXT: s_mov_b32 s9, s3 14052; GFX940-NEXT: s_mov_b32 s10, s3 14053; GFX940-NEXT: ;;#ASMSTART 14054; GFX940-NEXT: ; use s[8:10] 14055; GFX940-NEXT: ;;#ASMEND 14056; GFX940-NEXT: s_setpc_b64 s[30:31] 14057 %vec0 = call <4 x i32> asm "; def $0", "=s"() 14058 %vec1 = call <4 x i32> asm "; def $0", "=s"() 14059 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 5, i32 7, i32 7> 14060 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 14061 ret void 14062} 14063 14064define void @s_shuffle_v3i32_v4i32__6_7_7() { 14065; GFX900-LABEL: s_shuffle_v3i32_v4i32__6_7_7: 14066; GFX900: ; %bb.0: 14067; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14068; GFX900-NEXT: ;;#ASMSTART 14069; GFX900-NEXT: ; def s[4:7] 14070; GFX900-NEXT: ;;#ASMEND 14071; GFX900-NEXT: s_mov_b32 s8, s6 14072; GFX900-NEXT: s_mov_b32 s9, s7 14073; GFX900-NEXT: s_mov_b32 s10, s7 14074; GFX900-NEXT: ;;#ASMSTART 14075; GFX900-NEXT: ; use s[8:10] 14076; GFX900-NEXT: ;;#ASMEND 14077; GFX900-NEXT: s_setpc_b64 s[30:31] 14078; 14079; GFX90A-LABEL: s_shuffle_v3i32_v4i32__6_7_7: 14080; GFX90A: ; %bb.0: 14081; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14082; GFX90A-NEXT: ;;#ASMSTART 14083; GFX90A-NEXT: ; def s[4:7] 14084; GFX90A-NEXT: ;;#ASMEND 14085; GFX90A-NEXT: s_mov_b32 s8, s6 14086; GFX90A-NEXT: s_mov_b32 s9, s7 14087; GFX90A-NEXT: s_mov_b32 s10, s7 14088; GFX90A-NEXT: ;;#ASMSTART 14089; GFX90A-NEXT: ; use s[8:10] 14090; GFX90A-NEXT: ;;#ASMEND 14091; GFX90A-NEXT: s_setpc_b64 s[30:31] 14092; 14093; GFX940-LABEL: s_shuffle_v3i32_v4i32__6_7_7: 14094; GFX940: ; %bb.0: 14095; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14096; GFX940-NEXT: ;;#ASMSTART 14097; GFX940-NEXT: ; def s[0:3] 14098; GFX940-NEXT: ;;#ASMEND 14099; GFX940-NEXT: s_mov_b32 s8, s2 14100; GFX940-NEXT: s_mov_b32 s9, s3 14101; GFX940-NEXT: s_mov_b32 s10, s3 14102; GFX940-NEXT: ;;#ASMSTART 14103; GFX940-NEXT: ; use s[8:10] 14104; GFX940-NEXT: ;;#ASMEND 14105; GFX940-NEXT: s_setpc_b64 s[30:31] 14106 %vec0 = call <4 x i32> asm "; def $0", "=s"() 14107 %vec1 = call <4 x i32> asm "; def $0", "=s"() 14108 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 6, i32 7, i32 7> 14109 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 14110 ret void 14111} 14112 14113define void @s_shuffle_v3i32_v4i32__7_u_7() { 14114; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_u_7: 14115; GFX900: ; %bb.0: 14116; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14117; GFX900-NEXT: ;;#ASMSTART 14118; GFX900-NEXT: ; def s[4:7] 14119; GFX900-NEXT: ;;#ASMEND 14120; GFX900-NEXT: s_mov_b32 s8, s7 14121; GFX900-NEXT: s_mov_b32 s10, s7 14122; GFX900-NEXT: ;;#ASMSTART 14123; GFX900-NEXT: ; use s[8:10] 14124; GFX900-NEXT: ;;#ASMEND 14125; GFX900-NEXT: s_setpc_b64 s[30:31] 14126; 14127; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_u_7: 14128; GFX90A: ; %bb.0: 14129; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14130; GFX90A-NEXT: ;;#ASMSTART 14131; GFX90A-NEXT: ; def s[4:7] 14132; GFX90A-NEXT: ;;#ASMEND 14133; GFX90A-NEXT: s_mov_b32 s8, s7 14134; GFX90A-NEXT: s_mov_b32 s10, s7 14135; GFX90A-NEXT: ;;#ASMSTART 14136; GFX90A-NEXT: ; use s[8:10] 14137; GFX90A-NEXT: ;;#ASMEND 14138; GFX90A-NEXT: s_setpc_b64 s[30:31] 14139; 14140; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_u_7: 14141; GFX940: ; %bb.0: 14142; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14143; GFX940-NEXT: ;;#ASMSTART 14144; GFX940-NEXT: ; def s[0:3] 14145; GFX940-NEXT: ;;#ASMEND 14146; GFX940-NEXT: s_mov_b32 s8, s3 14147; GFX940-NEXT: s_mov_b32 s10, s3 14148; GFX940-NEXT: ;;#ASMSTART 14149; GFX940-NEXT: ; use s[8:10] 14150; GFX940-NEXT: ;;#ASMEND 14151; GFX940-NEXT: s_setpc_b64 s[30:31] 14152 %vec0 = call <4 x i32> asm "; def $0", "=s"() 14153 %vec1 = call <4 x i32> asm "; def $0", "=s"() 14154 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 poison, i32 7> 14155 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 14156 ret void 14157} 14158 14159define void @s_shuffle_v3i32_v4i32__7_0_7() { 14160; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_0_7: 14161; GFX900: ; %bb.0: 14162; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14163; GFX900-NEXT: ;;#ASMSTART 14164; GFX900-NEXT: ; def s[8:11] 14165; GFX900-NEXT: ;;#ASMEND 14166; GFX900-NEXT: ;;#ASMSTART 14167; GFX900-NEXT: ; def s[4:7] 14168; GFX900-NEXT: ;;#ASMEND 14169; GFX900-NEXT: s_mov_b32 s8, s11 14170; GFX900-NEXT: s_mov_b32 s9, s4 14171; GFX900-NEXT: s_mov_b32 s10, s11 14172; GFX900-NEXT: ;;#ASMSTART 14173; GFX900-NEXT: ; use s[8:10] 14174; GFX900-NEXT: ;;#ASMEND 14175; GFX900-NEXT: s_setpc_b64 s[30:31] 14176; 14177; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_0_7: 14178; GFX90A: ; %bb.0: 14179; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14180; GFX90A-NEXT: ;;#ASMSTART 14181; GFX90A-NEXT: ; def s[8:11] 14182; GFX90A-NEXT: ;;#ASMEND 14183; GFX90A-NEXT: ;;#ASMSTART 14184; GFX90A-NEXT: ; def s[4:7] 14185; GFX90A-NEXT: ;;#ASMEND 14186; GFX90A-NEXT: s_mov_b32 s8, s11 14187; GFX90A-NEXT: s_mov_b32 s9, s4 14188; GFX90A-NEXT: s_mov_b32 s10, s11 14189; GFX90A-NEXT: ;;#ASMSTART 14190; GFX90A-NEXT: ; use s[8:10] 14191; GFX90A-NEXT: ;;#ASMEND 14192; GFX90A-NEXT: s_setpc_b64 s[30:31] 14193; 14194; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_0_7: 14195; GFX940: ; %bb.0: 14196; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14197; GFX940-NEXT: ;;#ASMSTART 14198; GFX940-NEXT: ; def s[0:3] 14199; GFX940-NEXT: ;;#ASMEND 14200; GFX940-NEXT: ;;#ASMSTART 14201; GFX940-NEXT: ; def s[4:7] 14202; GFX940-NEXT: ;;#ASMEND 14203; GFX940-NEXT: s_mov_b32 s8, s7 14204; GFX940-NEXT: s_mov_b32 s9, s0 14205; GFX940-NEXT: s_mov_b32 s10, s7 14206; GFX940-NEXT: ;;#ASMSTART 14207; GFX940-NEXT: ; use s[8:10] 14208; GFX940-NEXT: ;;#ASMEND 14209; GFX940-NEXT: s_setpc_b64 s[30:31] 14210 %vec0 = call <4 x i32> asm "; def $0", "=s"() 14211 %vec1 = call <4 x i32> asm "; def $0", "=s"() 14212 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 0, i32 7> 14213 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 14214 ret void 14215} 14216 14217define void @s_shuffle_v3i32_v4i32__7_1_7() { 14218; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_1_7: 14219; GFX900: ; %bb.0: 14220; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14221; GFX900-NEXT: ;;#ASMSTART 14222; GFX900-NEXT: ; def s[8:11] 14223; GFX900-NEXT: ;;#ASMEND 14224; GFX900-NEXT: ;;#ASMSTART 14225; GFX900-NEXT: ; def s[4:7] 14226; GFX900-NEXT: ;;#ASMEND 14227; GFX900-NEXT: s_mov_b32 s8, s7 14228; GFX900-NEXT: s_mov_b32 s10, s7 14229; GFX900-NEXT: ;;#ASMSTART 14230; GFX900-NEXT: ; use s[8:10] 14231; GFX900-NEXT: ;;#ASMEND 14232; GFX900-NEXT: s_setpc_b64 s[30:31] 14233; 14234; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_1_7: 14235; GFX90A: ; %bb.0: 14236; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14237; GFX90A-NEXT: ;;#ASMSTART 14238; GFX90A-NEXT: ; def s[8:11] 14239; GFX90A-NEXT: ;;#ASMEND 14240; GFX90A-NEXT: ;;#ASMSTART 14241; GFX90A-NEXT: ; def s[4:7] 14242; GFX90A-NEXT: ;;#ASMEND 14243; GFX90A-NEXT: s_mov_b32 s8, s7 14244; GFX90A-NEXT: s_mov_b32 s10, s7 14245; GFX90A-NEXT: ;;#ASMSTART 14246; GFX90A-NEXT: ; use s[8:10] 14247; GFX90A-NEXT: ;;#ASMEND 14248; GFX90A-NEXT: s_setpc_b64 s[30:31] 14249; 14250; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_1_7: 14251; GFX940: ; %bb.0: 14252; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14253; GFX940-NEXT: ;;#ASMSTART 14254; GFX940-NEXT: ; def s[8:11] 14255; GFX940-NEXT: ;;#ASMEND 14256; GFX940-NEXT: ;;#ASMSTART 14257; GFX940-NEXT: ; def s[0:3] 14258; GFX940-NEXT: ;;#ASMEND 14259; GFX940-NEXT: s_mov_b32 s8, s3 14260; GFX940-NEXT: s_mov_b32 s10, s3 14261; GFX940-NEXT: ;;#ASMSTART 14262; GFX940-NEXT: ; use s[8:10] 14263; GFX940-NEXT: ;;#ASMEND 14264; GFX940-NEXT: s_setpc_b64 s[30:31] 14265 %vec0 = call <4 x i32> asm "; def $0", "=s"() 14266 %vec1 = call <4 x i32> asm "; def $0", "=s"() 14267 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 1, i32 7> 14268 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 14269 ret void 14270} 14271 14272define void @s_shuffle_v3i32_v4i32__7_2_7() { 14273; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_2_7: 14274; GFX900: ; %bb.0: 14275; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14276; GFX900-NEXT: ;;#ASMSTART 14277; GFX900-NEXT: ; def s[8:11] 14278; GFX900-NEXT: ;;#ASMEND 14279; GFX900-NEXT: ;;#ASMSTART 14280; GFX900-NEXT: ; def s[4:7] 14281; GFX900-NEXT: ;;#ASMEND 14282; GFX900-NEXT: s_mov_b32 s8, s11 14283; GFX900-NEXT: s_mov_b32 s9, s6 14284; GFX900-NEXT: s_mov_b32 s10, s11 14285; GFX900-NEXT: ;;#ASMSTART 14286; GFX900-NEXT: ; use s[8:10] 14287; GFX900-NEXT: ;;#ASMEND 14288; GFX900-NEXT: s_setpc_b64 s[30:31] 14289; 14290; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_2_7: 14291; GFX90A: ; %bb.0: 14292; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14293; GFX90A-NEXT: ;;#ASMSTART 14294; GFX90A-NEXT: ; def s[8:11] 14295; GFX90A-NEXT: ;;#ASMEND 14296; GFX90A-NEXT: ;;#ASMSTART 14297; GFX90A-NEXT: ; def s[4:7] 14298; GFX90A-NEXT: ;;#ASMEND 14299; GFX90A-NEXT: s_mov_b32 s8, s11 14300; GFX90A-NEXT: s_mov_b32 s9, s6 14301; GFX90A-NEXT: s_mov_b32 s10, s11 14302; GFX90A-NEXT: ;;#ASMSTART 14303; GFX90A-NEXT: ; use s[8:10] 14304; GFX90A-NEXT: ;;#ASMEND 14305; GFX90A-NEXT: s_setpc_b64 s[30:31] 14306; 14307; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_2_7: 14308; GFX940: ; %bb.0: 14309; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14310; GFX940-NEXT: ;;#ASMSTART 14311; GFX940-NEXT: ; def s[0:3] 14312; GFX940-NEXT: ;;#ASMEND 14313; GFX940-NEXT: ;;#ASMSTART 14314; GFX940-NEXT: ; def s[4:7] 14315; GFX940-NEXT: ;;#ASMEND 14316; GFX940-NEXT: s_mov_b32 s8, s7 14317; GFX940-NEXT: s_mov_b32 s9, s2 14318; GFX940-NEXT: s_mov_b32 s10, s7 14319; GFX940-NEXT: ;;#ASMSTART 14320; GFX940-NEXT: ; use s[8:10] 14321; GFX940-NEXT: ;;#ASMEND 14322; GFX940-NEXT: s_setpc_b64 s[30:31] 14323 %vec0 = call <4 x i32> asm "; def $0", "=s"() 14324 %vec1 = call <4 x i32> asm "; def $0", "=s"() 14325 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 2, i32 7> 14326 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 14327 ret void 14328} 14329 14330define void @s_shuffle_v3i32_v4i32__7_3_7() { 14331; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_3_7: 14332; GFX900: ; %bb.0: 14333; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14334; GFX900-NEXT: ;;#ASMSTART 14335; GFX900-NEXT: ; def s[8:11] 14336; GFX900-NEXT: ;;#ASMEND 14337; GFX900-NEXT: ;;#ASMSTART 14338; GFX900-NEXT: ; def s[4:7] 14339; GFX900-NEXT: ;;#ASMEND 14340; GFX900-NEXT: s_mov_b32 s8, s11 14341; GFX900-NEXT: s_mov_b32 s9, s7 14342; GFX900-NEXT: s_mov_b32 s10, s11 14343; GFX900-NEXT: ;;#ASMSTART 14344; GFX900-NEXT: ; use s[8:10] 14345; GFX900-NEXT: ;;#ASMEND 14346; GFX900-NEXT: s_setpc_b64 s[30:31] 14347; 14348; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_3_7: 14349; GFX90A: ; %bb.0: 14350; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14351; GFX90A-NEXT: ;;#ASMSTART 14352; GFX90A-NEXT: ; def s[8:11] 14353; GFX90A-NEXT: ;;#ASMEND 14354; GFX90A-NEXT: ;;#ASMSTART 14355; GFX90A-NEXT: ; def s[4:7] 14356; GFX90A-NEXT: ;;#ASMEND 14357; GFX90A-NEXT: s_mov_b32 s8, s11 14358; GFX90A-NEXT: s_mov_b32 s9, s7 14359; GFX90A-NEXT: s_mov_b32 s10, s11 14360; GFX90A-NEXT: ;;#ASMSTART 14361; GFX90A-NEXT: ; use s[8:10] 14362; GFX90A-NEXT: ;;#ASMEND 14363; GFX90A-NEXT: s_setpc_b64 s[30:31] 14364; 14365; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_3_7: 14366; GFX940: ; %bb.0: 14367; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14368; GFX940-NEXT: ;;#ASMSTART 14369; GFX940-NEXT: ; def s[0:3] 14370; GFX940-NEXT: ;;#ASMEND 14371; GFX940-NEXT: ;;#ASMSTART 14372; GFX940-NEXT: ; def s[4:7] 14373; GFX940-NEXT: ;;#ASMEND 14374; GFX940-NEXT: s_mov_b32 s8, s7 14375; GFX940-NEXT: s_mov_b32 s9, s3 14376; GFX940-NEXT: s_mov_b32 s10, s7 14377; GFX940-NEXT: ;;#ASMSTART 14378; GFX940-NEXT: ; use s[8:10] 14379; GFX940-NEXT: ;;#ASMEND 14380; GFX940-NEXT: s_setpc_b64 s[30:31] 14381 %vec0 = call <4 x i32> asm "; def $0", "=s"() 14382 %vec1 = call <4 x i32> asm "; def $0", "=s"() 14383 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 3, i32 7> 14384 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 14385 ret void 14386} 14387 14388define void @s_shuffle_v3i32_v4i32__7_4_7() { 14389; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_4_7: 14390; GFX900: ; %bb.0: 14391; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14392; GFX900-NEXT: ;;#ASMSTART 14393; GFX900-NEXT: ; def s[4:7] 14394; GFX900-NEXT: ;;#ASMEND 14395; GFX900-NEXT: s_mov_b32 s8, s7 14396; GFX900-NEXT: s_mov_b32 s9, s4 14397; GFX900-NEXT: s_mov_b32 s10, s7 14398; GFX900-NEXT: ;;#ASMSTART 14399; GFX900-NEXT: ; use s[8:10] 14400; GFX900-NEXT: ;;#ASMEND 14401; GFX900-NEXT: s_setpc_b64 s[30:31] 14402; 14403; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_4_7: 14404; GFX90A: ; %bb.0: 14405; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14406; GFX90A-NEXT: ;;#ASMSTART 14407; GFX90A-NEXT: ; def s[4:7] 14408; GFX90A-NEXT: ;;#ASMEND 14409; GFX90A-NEXT: s_mov_b32 s8, s7 14410; GFX90A-NEXT: s_mov_b32 s9, s4 14411; GFX90A-NEXT: s_mov_b32 s10, s7 14412; GFX90A-NEXT: ;;#ASMSTART 14413; GFX90A-NEXT: ; use s[8:10] 14414; GFX90A-NEXT: ;;#ASMEND 14415; GFX90A-NEXT: s_setpc_b64 s[30:31] 14416; 14417; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_4_7: 14418; GFX940: ; %bb.0: 14419; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14420; GFX940-NEXT: ;;#ASMSTART 14421; GFX940-NEXT: ; def s[0:3] 14422; GFX940-NEXT: ;;#ASMEND 14423; GFX940-NEXT: s_mov_b32 s8, s3 14424; GFX940-NEXT: s_mov_b32 s9, s0 14425; GFX940-NEXT: s_mov_b32 s10, s3 14426; GFX940-NEXT: ;;#ASMSTART 14427; GFX940-NEXT: ; use s[8:10] 14428; GFX940-NEXT: ;;#ASMEND 14429; GFX940-NEXT: s_setpc_b64 s[30:31] 14430 %vec0 = call <4 x i32> asm "; def $0", "=s"() 14431 %vec1 = call <4 x i32> asm "; def $0", "=s"() 14432 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 4, i32 7> 14433 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 14434 ret void 14435} 14436 14437define void @s_shuffle_v3i32_v4i32__7_5_7() { 14438; GFX9-LABEL: s_shuffle_v3i32_v4i32__7_5_7: 14439; GFX9: ; %bb.0: 14440; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14441; GFX9-NEXT: ;;#ASMSTART 14442; GFX9-NEXT: ; def s[8:11] 14443; GFX9-NEXT: ;;#ASMEND 14444; GFX9-NEXT: s_mov_b32 s8, s11 14445; GFX9-NEXT: s_mov_b32 s10, s11 14446; GFX9-NEXT: ;;#ASMSTART 14447; GFX9-NEXT: ; use s[8:10] 14448; GFX9-NEXT: ;;#ASMEND 14449; GFX9-NEXT: s_setpc_b64 s[30:31] 14450 %vec0 = call <4 x i32> asm "; def $0", "=s"() 14451 %vec1 = call <4 x i32> asm "; def $0", "=s"() 14452 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 5, i32 7> 14453 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 14454 ret void 14455} 14456 14457define void @s_shuffle_v3i32_v4i32__7_6_7() { 14458; GFX900-LABEL: s_shuffle_v3i32_v4i32__7_6_7: 14459; GFX900: ; %bb.0: 14460; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14461; GFX900-NEXT: ;;#ASMSTART 14462; GFX900-NEXT: ; def s[4:7] 14463; GFX900-NEXT: ;;#ASMEND 14464; GFX900-NEXT: s_mov_b32 s8, s7 14465; GFX900-NEXT: s_mov_b32 s9, s6 14466; GFX900-NEXT: s_mov_b32 s10, s7 14467; GFX900-NEXT: ;;#ASMSTART 14468; GFX900-NEXT: ; use s[8:10] 14469; GFX900-NEXT: ;;#ASMEND 14470; GFX900-NEXT: s_setpc_b64 s[30:31] 14471; 14472; GFX90A-LABEL: s_shuffle_v3i32_v4i32__7_6_7: 14473; GFX90A: ; %bb.0: 14474; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14475; GFX90A-NEXT: ;;#ASMSTART 14476; GFX90A-NEXT: ; def s[4:7] 14477; GFX90A-NEXT: ;;#ASMEND 14478; GFX90A-NEXT: s_mov_b32 s8, s7 14479; GFX90A-NEXT: s_mov_b32 s9, s6 14480; GFX90A-NEXT: s_mov_b32 s10, s7 14481; GFX90A-NEXT: ;;#ASMSTART 14482; GFX90A-NEXT: ; use s[8:10] 14483; GFX90A-NEXT: ;;#ASMEND 14484; GFX90A-NEXT: s_setpc_b64 s[30:31] 14485; 14486; GFX940-LABEL: s_shuffle_v3i32_v4i32__7_6_7: 14487; GFX940: ; %bb.0: 14488; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14489; GFX940-NEXT: ;;#ASMSTART 14490; GFX940-NEXT: ; def s[0:3] 14491; GFX940-NEXT: ;;#ASMEND 14492; GFX940-NEXT: s_mov_b32 s8, s3 14493; GFX940-NEXT: s_mov_b32 s9, s2 14494; GFX940-NEXT: s_mov_b32 s10, s3 14495; GFX940-NEXT: ;;#ASMSTART 14496; GFX940-NEXT: ; use s[8:10] 14497; GFX940-NEXT: ;;#ASMEND 14498; GFX940-NEXT: s_setpc_b64 s[30:31] 14499 %vec0 = call <4 x i32> asm "; def $0", "=s"() 14500 %vec1 = call <4 x i32> asm "; def $0", "=s"() 14501 %shuf = shufflevector <4 x i32> %vec0, <4 x i32> %vec1, <3 x i32> <i32 7, i32 6, i32 7> 14502 call void asm sideeffect "; use $0", "{s[8:10]}"(<3 x i32> %shuf) 14503 ret void 14504} 14505;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 14506; GFX90APLUS: {{.*}} 14507